agentboss 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/aboss.js +288 -288
- package/client/dist/assets/index-DxoLOxZ8.js +141 -0
- package/client/dist/index.html +1 -1
- package/package.json +1 -1
- package/server/analysis/dimensions/judgement.js +111 -107
- package/server/analysis/dimensions/llm-merge.js +59 -57
- package/server/analysis/dimensions/output-quality.js +167 -167
- package/server/analysis/dimensions/problem-definition.js +109 -104
- package/server/analysis/job.js +91 -14
- package/server/analysis/report-builder.js +574 -581
- package/server/analysis/scoring-v2.js +126 -72
- package/server/analysis/thresholds-v2.js +364 -358
- package/server/api/execution.js +94 -0
- package/server/db/schema.js +5 -2
- package/server/etl/opencode.js +5 -1
- package/server/execution/job.js +141 -2
- package/server/llm/advice-prompt.js +74 -11
- package/server/llm/advice.js +50 -1
- package/server/llm/analysis-prompt.js +173 -162
- package/server/llm/cli-runner.js +18 -2
- package/server/llm/judge.js +6 -1
- package/server/llm/mcp-classify.js +147 -0
- package/server/llm/project-advice-prompt.js +106 -6
- package/server/llm/project-advice.js +55 -2
- package/server/llm/session-analyzer.js +10 -1
- package/client/dist/assets/index-DBj1Ujlx.js +0 -137
|
@@ -1,167 +1,167 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* O1 — AI Output Quality.
|
|
3
|
-
*
|
|
4
|
-
* Sub-indicators:
|
|
5
|
-
* • first_take — fraction of assistant turns NOT followed by an
|
|
6
|
-
* immediate user correction
|
|
7
|
-
* • code_style — LLM-judged (rule fallback returns 0.7 neutral)
|
|
8
|
-
* • completeness — assistant text mentions edge/error/test concepts
|
|
9
|
-
* (LLM judge is more accurate; rules give a proxy)
|
|
10
|
-
*
|
|
11
|
-
* See spec §4.6.
|
|
12
|
-
*
|
|
13
|
-
* @author Felix
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
'use strict';
|
|
17
|
-
|
|
18
|
-
const {
|
|
19
|
-
fetchMessages,
|
|
20
|
-
matchesAny,
|
|
21
|
-
} = require('../text-signals');
|
|
22
|
-
const { explainIndicator, rollupDimension, scoreToLevel, O1 } = require('../thresholds-v2');
|
|
23
|
-
const { mergeIndicator, dimensionSource } = require('./llm-merge');
|
|
24
|
-
const { makeEvidence } = require('../evidence-builder');
|
|
25
|
-
|
|
26
|
-
const CORRECTION_PATTERNS = [
|
|
27
|
-
/改一下/, /不对/, /(再|重新)(写|来|改|做)/, /(错|有问题|有bug)/, /漏了/, /换/,
|
|
28
|
-
/fix that/i, /that'?s wrong/i, /redo/i, /try again/i, /not quite/i,
|
|
29
|
-
];
|
|
30
|
-
|
|
31
|
-
const COMPLETENESS_PATTERNS = [
|
|
32
|
-
/边界/, /异常/, /错误处理/, /测试/, /单元测试/, /回退/, /兜底/,
|
|
33
|
-
/edge case/i, /error handling/i, /test/i, /fallback/i, /exception/i,
|
|
34
|
-
];
|
|
35
|
-
|
|
36
|
-
function analyzeRules(db, session, difficulty = 2) {
|
|
37
|
-
const messages = fetchMessages(db, session.id);
|
|
38
|
-
const haveText = messages.some((m) => m.text && m.text.length > 0);
|
|
39
|
-
|
|
40
|
-
let first_take = null;
|
|
41
|
-
let assistantTurns = 0;
|
|
42
|
-
let correctionsAfter = 0;
|
|
43
|
-
|
|
44
|
-
for (let i = 0; i < messages.length; i++) {
|
|
45
|
-
const m = messages[i];
|
|
46
|
-
if (m.role !== 'assistant') continue;
|
|
47
|
-
assistantTurns++;
|
|
48
|
-
const next = i + 1 < messages.length ? messages[i + 1] : null;
|
|
49
|
-
if (!next || next.role !== 'user' || !next.text) continue;
|
|
50
|
-
if (matchesAny(next.text, CORRECTION_PATTERNS)) correctionsAfter++;
|
|
51
|
-
}
|
|
52
|
-
if (assistantTurns > 0) {
|
|
53
|
-
first_take = 1 - correctionsAfter / assistantTurns;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
const code_style = haveText ? 0.7 : null;
|
|
57
|
-
|
|
58
|
-
let completeness = null;
|
|
59
|
-
let completenessHits = 0;
|
|
60
|
-
let assistantMsgsWithText = 0;
|
|
61
|
-
if (haveText) {
|
|
62
|
-
const assistantMsgs = messages.filter((m) => m.role === 'assistant' && m.text);
|
|
63
|
-
assistantMsgsWithText = assistantMsgs.length;
|
|
64
|
-
if (assistantMsgs.length > 0) {
|
|
65
|
-
completenessHits = assistantMsgs.filter((m) => matchesAny(m.text, COMPLETENESS_PATTERNS)).length;
|
|
66
|
-
completeness = completenessHits / assistantMsgs.length;
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
const ftE = explainIndicator(O1.first_take, first_take, difficulty);
|
|
71
|
-
const csE = explainIndicator(O1.code_style, code_style, difficulty);
|
|
72
|
-
const cpE = explainIndicator(O1.completeness, completeness, difficulty);
|
|
73
|
-
|
|
74
|
-
const subScores = {
|
|
75
|
-
first_take: ftE.score,
|
|
76
|
-
code_style: csE.score,
|
|
77
|
-
completeness: cpE.score,
|
|
78
|
-
};
|
|
79
|
-
const subLevels = {
|
|
80
|
-
first_take: ftE.level,
|
|
81
|
-
code_style: csE.level,
|
|
82
|
-
completeness: cpE.level,
|
|
83
|
-
};
|
|
84
|
-
|
|
85
|
-
const subEvidence = buildSubEvidence(
|
|
86
|
-
{ ft: ftE, cs: csE, cp: cpE },
|
|
87
|
-
{
|
|
88
|
-
mode: 'rules', haveText, assistantTurns, correctionsAfter,
|
|
89
|
-
assistantMsgsWithText, completenessHits,
|
|
90
|
-
},
|
|
91
|
-
difficulty
|
|
92
|
-
);
|
|
93
|
-
|
|
94
|
-
const score = rollupDimension('O1', subScores);
|
|
95
|
-
const level = scoreToLevel(score);
|
|
96
|
-
|
|
97
|
-
return {
|
|
98
|
-
subScores,
|
|
99
|
-
subLevels,
|
|
100
|
-
subEvidence,
|
|
101
|
-
raw: { first_take, code_style, completeness, assistantTurns, correctionsAfter, completenessHits, assistantMsgsWithText },
|
|
102
|
-
score,
|
|
103
|
-
level,
|
|
104
|
-
judgeSource: 'rules',
|
|
105
|
-
llmJudge: null,
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
/** Public entry — synchronous. `llmCell` is llmJudge.O1 (or null). */
|
|
110
|
-
function analyze(db, session, difficulty = 2, llmCell = null) {
|
|
111
|
-
const ruleResult = analyzeRules(db, session, difficulty);
|
|
112
|
-
if (!llmCell) return ruleResult;
|
|
113
|
-
|
|
114
|
-
const cell = llmCell;
|
|
115
|
-
const m = {
|
|
116
|
-
first_take: mergeIndicator(cell.first_take, ruleResult.subScores.first_take, ruleResult.subLevels.first_take),
|
|
117
|
-
code_style: mergeIndicator(cell.code_style, ruleResult.subScores.code_style, ruleResult.subLevels.code_style),
|
|
118
|
-
completeness: mergeIndicator(cell.completeness, ruleResult.subScores.completeness, ruleResult.subLevels.completeness),
|
|
119
|
-
};
|
|
120
|
-
|
|
121
|
-
const subScores = { first_take: m.first_take.score, code_style: m.code_style.score, completeness: m.completeness.score };
|
|
122
|
-
const subLevels = { first_take: m.first_take.level, code_style: m.code_style.level, completeness: m.completeness.level };
|
|
123
|
-
|
|
124
|
-
const subEvidence = {
|
|
125
|
-
first_take: { ...ruleResult.subEvidence.first_take, what: m.first_take.evidence || ruleResult.subEvidence.first_take.what, level: subLevels.first_take, score: subScores.first_take },
|
|
126
|
-
code_style: { ...ruleResult.subEvidence.code_style, what: m.code_style.evidence || ruleResult.subEvidence.code_style.what, level: subLevels.code_style, score: subScores.code_style },
|
|
127
|
-
completeness: { ...ruleResult.subEvidence.completeness, what: m.completeness.evidence || ruleResult.subEvidence.completeness.what, level: subLevels.completeness, score: subScores.completeness },
|
|
128
|
-
};
|
|
129
|
-
|
|
130
|
-
const score = rollupDimension('O1', subScores);
|
|
131
|
-
const level = scoreToLevel(score);
|
|
132
|
-
const judgeSource = dimensionSource([m.first_take.source, m.code_style.source, m.completeness.source]);
|
|
133
|
-
|
|
134
|
-
return { subScores, subLevels, subEvidence, raw: { llmCell: cell, ruleRaw: ruleResult.raw }, score, level, judgeSource, llmJudge: null };
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
function buildSubEvidence(expls, ctx, difficulty) {
|
|
138
|
-
const { ft, cs, cp } = expls;
|
|
139
|
-
|
|
140
|
-
// rules
|
|
141
|
-
const { haveText, assistantTurns, correctionsAfter, assistantMsgsWithText, completenessHits } = ctx;
|
|
142
|
-
return {
|
|
143
|
-
first_take: makeEvidence({
|
|
144
|
-
key: 'first_take', label: '一次采纳率',
|
|
145
|
-
what: assistantTurns > 0
|
|
146
|
-
? `规则版:1 - (AI 回合后用户立即纠错的比例) = 1 - ${correctionsAfter} / ${assistantTurns} 个 AI 回合。`
|
|
147
|
-
: '规则版:无 AI 回合可评估。',
|
|
148
|
-
expl: ft, unit: '%', difficulty,
|
|
149
|
-
}),
|
|
150
|
-
code_style: makeEvidence({
|
|
151
|
-
key: 'code_style', label: '代码规范性',
|
|
152
|
-
what: haveText
|
|
153
|
-
? '规则版无法评估代码规范,默认给中性 0.7 分。开启 LLM judge 可获得真实评估。'
|
|
154
|
-
: '消息文本缺失,无法评估。',
|
|
155
|
-
expl: cs, unit: '%', difficulty,
|
|
156
|
-
}),
|
|
157
|
-
completeness: makeEvidence({
|
|
158
|
-
key: 'completeness', label: '方案完备性',
|
|
159
|
-
what: assistantMsgsWithText > 0
|
|
160
|
-
? `规则版:AI 消息中含完备性关键词("边界"、"异常"、"测试"等)的比例:${completenessHits} / ${assistantMsgsWithText}。`
|
|
161
|
-
: '规则版:无 AI 文本,无法评估。',
|
|
162
|
-
expl: cp, unit: '%', difficulty,
|
|
163
|
-
}),
|
|
164
|
-
};
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
module.exports = { analyze, analyzeRules };
|
|
1
|
+
/**
|
|
2
|
+
* O1 — AI Output Quality.
|
|
3
|
+
*
|
|
4
|
+
* Sub-indicators:
|
|
5
|
+
* • first_take — fraction of assistant turns NOT followed by an
|
|
6
|
+
* immediate user correction
|
|
7
|
+
* • code_style — LLM-judged (rule fallback returns 0.7 neutral)
|
|
8
|
+
* • completeness — assistant text mentions edge/error/test concepts
|
|
9
|
+
* (LLM judge is more accurate; rules give a proxy)
|
|
10
|
+
*
|
|
11
|
+
* See spec §4.6.
|
|
12
|
+
*
|
|
13
|
+
* @author Felix
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
'use strict';
|
|
17
|
+
|
|
18
|
+
const {
|
|
19
|
+
fetchMessages,
|
|
20
|
+
matchesAny,
|
|
21
|
+
} = require('../text-signals');
|
|
22
|
+
const { explainIndicator, rollupDimension, scoreToLevel, O1 } = require('../thresholds-v2');
|
|
23
|
+
const { mergeIndicator, dimensionSource } = require('./llm-merge');
|
|
24
|
+
const { makeEvidence } = require('../evidence-builder');
|
|
25
|
+
|
|
26
|
+
const CORRECTION_PATTERNS = [
|
|
27
|
+
/改一下/, /不对/, /(再|重新)(写|来|改|做)/, /(错|有问题|有bug)/, /漏了/, /换/,
|
|
28
|
+
/fix that/i, /that'?s wrong/i, /redo/i, /try again/i, /not quite/i,
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
const COMPLETENESS_PATTERNS = [
|
|
32
|
+
/边界/, /异常/, /错误处理/, /测试/, /单元测试/, /回退/, /兜底/,
|
|
33
|
+
/edge case/i, /error handling/i, /test/i, /fallback/i, /exception/i,
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
function analyzeRules(db, session, difficulty = 2) {
|
|
37
|
+
const messages = fetchMessages(db, session.id);
|
|
38
|
+
const haveText = messages.some((m) => m.text && m.text.length > 0);
|
|
39
|
+
|
|
40
|
+
let first_take = null;
|
|
41
|
+
let assistantTurns = 0;
|
|
42
|
+
let correctionsAfter = 0;
|
|
43
|
+
|
|
44
|
+
for (let i = 0; i < messages.length; i++) {
|
|
45
|
+
const m = messages[i];
|
|
46
|
+
if (m.role !== 'assistant') continue;
|
|
47
|
+
assistantTurns++;
|
|
48
|
+
const next = i + 1 < messages.length ? messages[i + 1] : null;
|
|
49
|
+
if (!next || next.role !== 'user' || !next.text) continue;
|
|
50
|
+
if (matchesAny(next.text, CORRECTION_PATTERNS)) correctionsAfter++;
|
|
51
|
+
}
|
|
52
|
+
if (assistantTurns > 0) {
|
|
53
|
+
first_take = 1 - correctionsAfter / assistantTurns;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const code_style = haveText ? 0.7 : null;
|
|
57
|
+
|
|
58
|
+
let completeness = null;
|
|
59
|
+
let completenessHits = 0;
|
|
60
|
+
let assistantMsgsWithText = 0;
|
|
61
|
+
if (haveText) {
|
|
62
|
+
const assistantMsgs = messages.filter((m) => m.role === 'assistant' && m.text);
|
|
63
|
+
assistantMsgsWithText = assistantMsgs.length;
|
|
64
|
+
if (assistantMsgs.length > 0) {
|
|
65
|
+
completenessHits = assistantMsgs.filter((m) => matchesAny(m.text, COMPLETENESS_PATTERNS)).length;
|
|
66
|
+
completeness = completenessHits / assistantMsgs.length;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const ftE = explainIndicator(O1.first_take, first_take, difficulty);
|
|
71
|
+
const csE = explainIndicator(O1.code_style, code_style, difficulty);
|
|
72
|
+
const cpE = explainIndicator(O1.completeness, completeness, difficulty);
|
|
73
|
+
|
|
74
|
+
const subScores = {
|
|
75
|
+
first_take: ftE.score,
|
|
76
|
+
code_style: csE.score,
|
|
77
|
+
completeness: cpE.score,
|
|
78
|
+
};
|
|
79
|
+
const subLevels = {
|
|
80
|
+
first_take: ftE.level,
|
|
81
|
+
code_style: csE.level,
|
|
82
|
+
completeness: cpE.level,
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const subEvidence = buildSubEvidence(
|
|
86
|
+
{ ft: ftE, cs: csE, cp: cpE },
|
|
87
|
+
{
|
|
88
|
+
mode: 'rules', haveText, assistantTurns, correctionsAfter,
|
|
89
|
+
assistantMsgsWithText, completenessHits,
|
|
90
|
+
},
|
|
91
|
+
difficulty
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
const score = rollupDimension('O1', subScores);
|
|
95
|
+
const level = scoreToLevel(score);
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
subScores,
|
|
99
|
+
subLevels,
|
|
100
|
+
subEvidence,
|
|
101
|
+
raw: { first_take, code_style, completeness, assistantTurns, correctionsAfter, completenessHits, assistantMsgsWithText },
|
|
102
|
+
score,
|
|
103
|
+
level,
|
|
104
|
+
judgeSource: 'rules',
|
|
105
|
+
llmJudge: null,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** Public entry — synchronous. `llmCell` is llmJudge.O1 (or null). */
|
|
110
|
+
function analyze(db, session, difficulty = 2, llmCell = null) {
|
|
111
|
+
const ruleResult = analyzeRules(db, session, difficulty);
|
|
112
|
+
if (!llmCell) return ruleResult;
|
|
113
|
+
|
|
114
|
+
const cell = llmCell;
|
|
115
|
+
const m = {
|
|
116
|
+
first_take: mergeIndicator(cell.first_take, ruleResult.subScores.first_take, ruleResult.subLevels.first_take),
|
|
117
|
+
code_style: mergeIndicator(cell.code_style, ruleResult.subScores.code_style, ruleResult.subLevels.code_style),
|
|
118
|
+
completeness: mergeIndicator(cell.completeness, ruleResult.subScores.completeness, ruleResult.subLevels.completeness),
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
const subScores = { first_take: m.first_take.score, code_style: m.code_style.score, completeness: m.completeness.score };
|
|
122
|
+
const subLevels = { first_take: m.first_take.level, code_style: m.code_style.level, completeness: m.completeness.level };
|
|
123
|
+
|
|
124
|
+
const subEvidence = {
|
|
125
|
+
first_take: { ...ruleResult.subEvidence.first_take, what: m.first_take.evidence || ruleResult.subEvidence.first_take.what, level: subLevels.first_take, score: subScores.first_take, source: m.first_take.source },
|
|
126
|
+
code_style: { ...ruleResult.subEvidence.code_style, what: m.code_style.evidence || ruleResult.subEvidence.code_style.what, level: subLevels.code_style, score: subScores.code_style, source: m.code_style.source },
|
|
127
|
+
completeness: { ...ruleResult.subEvidence.completeness, what: m.completeness.evidence || ruleResult.subEvidence.completeness.what, level: subLevels.completeness, score: subScores.completeness, source: m.completeness.source },
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
const score = rollupDimension('O1', subScores);
|
|
131
|
+
const level = scoreToLevel(score);
|
|
132
|
+
const judgeSource = dimensionSource([m.first_take.source, m.code_style.source, m.completeness.source]);
|
|
133
|
+
|
|
134
|
+
return { subScores, subLevels, subEvidence, raw: { llmCell: cell, ruleRaw: ruleResult.raw }, score, level, judgeSource, llmJudge: null };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function buildSubEvidence(expls, ctx, difficulty) {
|
|
138
|
+
const { ft, cs, cp } = expls;
|
|
139
|
+
|
|
140
|
+
// rules
|
|
141
|
+
const { haveText, assistantTurns, correctionsAfter, assistantMsgsWithText, completenessHits } = ctx;
|
|
142
|
+
return {
|
|
143
|
+
first_take: makeEvidence({
|
|
144
|
+
key: 'first_take', label: '一次采纳率',
|
|
145
|
+
what: assistantTurns > 0
|
|
146
|
+
? `规则版:1 - (AI 回合后用户立即纠错的比例) = 1 - ${correctionsAfter} / ${assistantTurns} 个 AI 回合。`
|
|
147
|
+
: '规则版:无 AI 回合可评估。',
|
|
148
|
+
expl: ft, unit: '%', difficulty,
|
|
149
|
+
}),
|
|
150
|
+
code_style: makeEvidence({
|
|
151
|
+
key: 'code_style', label: '代码规范性',
|
|
152
|
+
what: haveText
|
|
153
|
+
? '规则版无法评估代码规范,默认给中性 0.7 分。开启 LLM judge 可获得真实评估。'
|
|
154
|
+
: '消息文本缺失,无法评估。',
|
|
155
|
+
expl: cs, unit: '%', difficulty,
|
|
156
|
+
}),
|
|
157
|
+
completeness: makeEvidence({
|
|
158
|
+
key: 'completeness', label: '方案完备性',
|
|
159
|
+
what: assistantMsgsWithText > 0
|
|
160
|
+
? `规则版:AI 消息中含完备性关键词("边界"、"异常"、"测试"等)的比例:${completenessHits} / ${assistantMsgsWithText}。`
|
|
161
|
+
: '规则版:无 AI 文本,无法评估。',
|
|
162
|
+
expl: cp, unit: '%', difficulty,
|
|
163
|
+
}),
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
module.exports = { analyze, analyzeRules };
|
|
@@ -1,104 +1,109 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* H1 — Problem Definition.
|
|
3
|
-
*
|
|
4
|
-
* Captures the human ability to turn a vague need into a precise,
|
|
5
|
-
* AI-executable problem. Three sub-indicators:
|
|
6
|
-
* • clarity — AI proactive-question count in the first 30%
|
|
7
|
-
* • converge — number of user-message rounds to convergence
|
|
8
|
-
* • drift — direction-change events
|
|
9
|
-
*
|
|
10
|
-
* See docs/superpowers/specs/2026-06-13-capability-model-v2.md §4.1.
|
|
11
|
-
*
|
|
12
|
-
* @author Felix
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
'use strict';
|
|
16
|
-
|
|
17
|
-
const { queryAll } = require('../../db/queries');
|
|
18
|
-
const {
|
|
19
|
-
fetchMessages,
|
|
20
|
-
userMessages,
|
|
21
|
-
matchesAny,
|
|
22
|
-
DRIFT_PATTERNS,
|
|
23
|
-
} = require('../text-signals');
|
|
24
|
-
const { explainIndicator, rollupDimension, scoreToLevel, H1 } = require('../thresholds-v2');
|
|
25
|
-
const { makeEvidence } = require('../evidence-builder');
|
|
26
|
-
const { mergeIndicator, dimensionSource } = require('./llm-merge');
|
|
27
|
-
|
|
28
|
-
function analyze(db, session, difficulty = 2, llmCell = null) {
|
|
29
|
-
const messages = fetchMessages(db, session.id);
|
|
30
|
-
const users = userMessages(messages);
|
|
31
|
-
|
|
32
|
-
// ---- clarity: count of `question` tool calls in first 30% ----------
|
|
33
|
-
const toolCalls = queryAll(
|
|
34
|
-
db,
|
|
35
|
-
`SELECT tool_name, timestamp
|
|
36
|
-
FROM unified_tool_call
|
|
37
|
-
WHERE session_id = ?
|
|
38
|
-
ORDER BY timestamp ASC`,
|
|
39
|
-
[session.id]
|
|
40
|
-
);
|
|
41
|
-
let clarity = 0;
|
|
42
|
-
let clarityCutoff = 0;
|
|
43
|
-
if (toolCalls.length > 0) {
|
|
44
|
-
clarityCutoff = Math.max(1, Math.floor(toolCalls.length * 0.3));
|
|
45
|
-
clarity = toolCalls.slice(0, clarityCutoff).filter((t) => t.tool_name === 'question').length;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
// ---- converge: number of user messages -----------------------------
|
|
49
|
-
const converge = users.length;
|
|
50
|
-
|
|
51
|
-
// ---- drift: keyword spotting on user text -------------------------
|
|
52
|
-
const haveText = users.some((m) => m.text && m.text.length > 0);
|
|
53
|
-
|
|
54
|
-
let drift = null;
|
|
55
|
-
if (haveText) {
|
|
56
|
-
drift = users.filter((m) => matchesAny(m.text, DRIFT_PATTERNS)).length;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
// ---- eval each ----------------------------------------------------
|
|
60
|
-
const clarityE = explainIndicator(H1.clarity, clarity, difficulty);
|
|
61
|
-
const convergeE = explainIndicator(H1.converge, converge, difficulty);
|
|
62
|
-
const driftE = explainIndicator(H1.drift, drift, difficulty);
|
|
63
|
-
|
|
64
|
-
// Rule baseline per indicator (level + centred score).
|
|
65
|
-
const rule = {
|
|
66
|
-
clarity: { score: clarityE.score, level: clarityE.level },
|
|
67
|
-
converge: { score: convergeE.score, level: convergeE.level },
|
|
68
|
-
drift: { score: driftE.score, level: driftE.level },
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
// Merge LLM cell (if any) over the rule baseline.
|
|
72
|
-
const cell = llmCell || {};
|
|
73
|
-
const m = {
|
|
74
|
-
clarity: mergeIndicator(cell.clarity, rule.clarity.score, rule.clarity.level),
|
|
75
|
-
converge: mergeIndicator(cell.converge, rule.converge.score, rule.converge.level),
|
|
76
|
-
drift: mergeIndicator(cell.drift, rule.drift.score, rule.drift.level),
|
|
77
|
-
};
|
|
78
|
-
|
|
79
|
-
const subScores = { clarity: m.clarity.score, converge: m.converge.score, drift: m.drift.score };
|
|
80
|
-
const subLevels = { clarity: m.clarity.level, converge: m.converge.level, drift: m.drift.level };
|
|
81
|
-
|
|
82
|
-
// Evidence: prefer the LLM's cited rationale when that indicator used the LLM.
|
|
83
|
-
const subEvidence = {
|
|
84
|
-
clarity: makeEvidence({ key: 'clarity', label: '初始指令清晰度', what: m.clarity.evidence || `规则版:前 30% 工具调用中 question 次数 ${clarity}(共 ${toolCalls.length} 次调用)。`, expl: clarityE, unit: '次', difficulty }),
|
|
85
|
-
converge: makeEvidence({ key: 'converge', label: '任务收敛轮次', what: m.converge.evidence || `规则版:用户消息 ${converge} 条。`, expl: convergeE, unit: '轮', difficulty }),
|
|
86
|
-
drift: makeEvidence({ key: 'drift', label: '方向变更次数', what: m.drift.evidence || (haveText ? `规则版:方向变更关键词命中 ${drift} 条。` : '用户消息无文本,无法识别。'), expl: driftE, unit: '次', difficulty }),
|
|
87
|
-
};
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
|
|
1
|
+
/**
|
|
2
|
+
* H1 — Problem Definition.
|
|
3
|
+
*
|
|
4
|
+
* Captures the human ability to turn a vague need into a precise,
|
|
5
|
+
* AI-executable problem. Three sub-indicators:
|
|
6
|
+
* • clarity — AI proactive-question count in the first 30%
|
|
7
|
+
* • converge — number of user-message rounds to convergence
|
|
8
|
+
* • drift — direction-change events
|
|
9
|
+
*
|
|
10
|
+
* See docs/superpowers/specs/2026-06-13-capability-model-v2.md §4.1.
|
|
11
|
+
*
|
|
12
|
+
* @author Felix
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
'use strict';
|
|
16
|
+
|
|
17
|
+
const { queryAll } = require('../../db/queries');
|
|
18
|
+
const {
|
|
19
|
+
fetchMessages,
|
|
20
|
+
userMessages,
|
|
21
|
+
matchesAny,
|
|
22
|
+
DRIFT_PATTERNS,
|
|
23
|
+
} = require('../text-signals');
|
|
24
|
+
const { explainIndicator, rollupDimension, scoreToLevel, H1 } = require('../thresholds-v2');
|
|
25
|
+
const { makeEvidence } = require('../evidence-builder');
|
|
26
|
+
const { mergeIndicator, dimensionSource } = require('./llm-merge');
|
|
27
|
+
|
|
28
|
+
function analyze(db, session, difficulty = 2, llmCell = null) {
|
|
29
|
+
const messages = fetchMessages(db, session.id);
|
|
30
|
+
const users = userMessages(messages);
|
|
31
|
+
|
|
32
|
+
// ---- clarity: count of `question` tool calls in first 30% ----------
|
|
33
|
+
const toolCalls = queryAll(
|
|
34
|
+
db,
|
|
35
|
+
`SELECT tool_name, timestamp
|
|
36
|
+
FROM unified_tool_call
|
|
37
|
+
WHERE session_id = ?
|
|
38
|
+
ORDER BY timestamp ASC`,
|
|
39
|
+
[session.id]
|
|
40
|
+
);
|
|
41
|
+
let clarity = 0;
|
|
42
|
+
let clarityCutoff = 0;
|
|
43
|
+
if (toolCalls.length > 0) {
|
|
44
|
+
clarityCutoff = Math.max(1, Math.floor(toolCalls.length * 0.3));
|
|
45
|
+
clarity = toolCalls.slice(0, clarityCutoff).filter((t) => t.tool_name === 'question').length;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ---- converge: number of user messages -----------------------------
|
|
49
|
+
const converge = users.length;
|
|
50
|
+
|
|
51
|
+
// ---- drift: keyword spotting on user text -------------------------
|
|
52
|
+
const haveText = users.some((m) => m.text && m.text.length > 0);
|
|
53
|
+
|
|
54
|
+
let drift = null;
|
|
55
|
+
if (haveText) {
|
|
56
|
+
drift = users.filter((m) => matchesAny(m.text, DRIFT_PATTERNS)).length;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ---- eval each ----------------------------------------------------
|
|
60
|
+
const clarityE = explainIndicator(H1.clarity, clarity, difficulty);
|
|
61
|
+
const convergeE = explainIndicator(H1.converge, converge, difficulty);
|
|
62
|
+
const driftE = explainIndicator(H1.drift, drift, difficulty);
|
|
63
|
+
|
|
64
|
+
// Rule baseline per indicator (level + centred score).
|
|
65
|
+
const rule = {
|
|
66
|
+
clarity: { score: clarityE.score, level: clarityE.level },
|
|
67
|
+
converge: { score: convergeE.score, level: convergeE.level },
|
|
68
|
+
drift: { score: driftE.score, level: driftE.level },
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Merge LLM cell (if any) over the rule baseline.
|
|
72
|
+
const cell = llmCell || {};
|
|
73
|
+
const m = {
|
|
74
|
+
clarity: mergeIndicator(cell.clarity, rule.clarity.score, rule.clarity.level),
|
|
75
|
+
converge: mergeIndicator(cell.converge, rule.converge.score, rule.converge.level),
|
|
76
|
+
drift: mergeIndicator(cell.drift, rule.drift.score, rule.drift.level),
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const subScores = { clarity: m.clarity.score, converge: m.converge.score, drift: m.drift.score };
|
|
80
|
+
const subLevels = { clarity: m.clarity.level, converge: m.converge.level, drift: m.drift.level };
|
|
81
|
+
|
|
82
|
+
// Evidence: prefer the LLM's cited rationale when that indicator used the LLM.
|
|
83
|
+
const subEvidence = {
|
|
84
|
+
clarity: makeEvidence({ key: 'clarity', label: '初始指令清晰度', what: m.clarity.evidence || `规则版:前 30% 工具调用中 question 次数 ${clarity}(共 ${toolCalls.length} 次调用)。`, expl: clarityE, unit: '次', difficulty }),
|
|
85
|
+
converge: makeEvidence({ key: 'converge', label: '任务收敛轮次', what: m.converge.evidence || `规则版:用户消息 ${converge} 条。`, expl: convergeE, unit: '轮', difficulty }),
|
|
86
|
+
drift: makeEvidence({ key: 'drift', label: '方向变更次数', what: m.drift.evidence || (haveText ? `规则版:方向变更关键词命中 ${drift} 条。` : '用户消息无文本,无法识别。'), expl: driftE, unit: '次', difficulty }),
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
// Tag each sub-indicator with its source (llm / rules) for the UI badge.
|
|
90
|
+
subEvidence.clarity.source = m.clarity.source;
|
|
91
|
+
subEvidence.converge.source = m.converge.source;
|
|
92
|
+
subEvidence.drift.source = m.drift.source;
|
|
93
|
+
|
|
94
|
+
const score = rollupDimension('H1', subScores);
|
|
95
|
+
const level = scoreToLevel(score);
|
|
96
|
+
const judgeSource = dimensionSource([m.clarity.source, m.converge.source, m.drift.source]);
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
subScores,
|
|
100
|
+
subLevels,
|
|
101
|
+
subEvidence,
|
|
102
|
+
raw: { clarity, converge, drift, difficulty, haveText, toolCallCount: toolCalls.length, clarityCutoff },
|
|
103
|
+
score,
|
|
104
|
+
level,
|
|
105
|
+
judgeSource,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
module.exports = { analyze };
|