agentboss 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/client/dist/assets/{index-CsVml4AS.js → index-DxoLOxZ8.js} +53 -49
- package/client/dist/index.html +1 -1
- package/package.json +1 -1
- package/server/analysis/dimensions/judgement.js +111 -107
- package/server/analysis/dimensions/llm-merge.js +59 -57
- package/server/analysis/dimensions/output-quality.js +167 -167
- package/server/analysis/dimensions/problem-definition.js +109 -104
- package/server/analysis/job.js +37 -6
- package/server/analysis/scoring-v2.js +12 -8
- package/server/api/execution.js +94 -0
- package/server/db/schema.js +5 -2
- package/server/etl/opencode.js +5 -1
- package/server/execution/job.js +141 -2
- package/server/llm/advice-prompt.js +74 -11
- package/server/llm/advice.js +50 -1
- package/server/llm/mcp-classify.js +147 -0
- package/server/llm/project-advice-prompt.js +106 -6
- package/server/llm/project-advice.js +55 -2
|
@@ -1,167 +1,167 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* O1 — AI Output Quality.
|
|
3
|
-
*
|
|
4
|
-
* Sub-indicators:
|
|
5
|
-
* • first_take — fraction of assistant turns NOT followed by an
|
|
6
|
-
* immediate user correction
|
|
7
|
-
* • code_style — LLM-judged (rule fallback returns 0.7 neutral)
|
|
8
|
-
* • completeness — assistant text mentions edge/error/test concepts
|
|
9
|
-
* (LLM judge is more accurate; rules give a proxy)
|
|
10
|
-
*
|
|
11
|
-
* See spec §4.6.
|
|
12
|
-
*
|
|
13
|
-
* @author Felix
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
'use strict';
|
|
17
|
-
|
|
18
|
-
const {
|
|
19
|
-
fetchMessages,
|
|
20
|
-
matchesAny,
|
|
21
|
-
} = require('../text-signals');
|
|
22
|
-
const { explainIndicator, rollupDimension, scoreToLevel, O1 } = require('../thresholds-v2');
|
|
23
|
-
const { mergeIndicator, dimensionSource } = require('./llm-merge');
|
|
24
|
-
const { makeEvidence } = require('../evidence-builder');
|
|
25
|
-
|
|
26
|
-
const CORRECTION_PATTERNS = [
|
|
27
|
-
/改一下/, /不对/, /(再|重新)(写|来|改|做)/, /(错|有问题|有bug)/, /漏了/, /换/,
|
|
28
|
-
/fix that/i, /that'?s wrong/i, /redo/i, /try again/i, /not quite/i,
|
|
29
|
-
];
|
|
30
|
-
|
|
31
|
-
const COMPLETENESS_PATTERNS = [
|
|
32
|
-
/边界/, /异常/, /错误处理/, /测试/, /单元测试/, /回退/, /兜底/,
|
|
33
|
-
/edge case/i, /error handling/i, /test/i, /fallback/i, /exception/i,
|
|
34
|
-
];
|
|
35
|
-
|
|
36
|
-
function analyzeRules(db, session, difficulty = 2) {
|
|
37
|
-
const messages = fetchMessages(db, session.id);
|
|
38
|
-
const haveText = messages.some((m) => m.text && m.text.length > 0);
|
|
39
|
-
|
|
40
|
-
let first_take = null;
|
|
41
|
-
let assistantTurns = 0;
|
|
42
|
-
let correctionsAfter = 0;
|
|
43
|
-
|
|
44
|
-
for (let i = 0; i < messages.length; i++) {
|
|
45
|
-
const m = messages[i];
|
|
46
|
-
if (m.role !== 'assistant') continue;
|
|
47
|
-
assistantTurns++;
|
|
48
|
-
const next = i + 1 < messages.length ? messages[i + 1] : null;
|
|
49
|
-
if (!next || next.role !== 'user' || !next.text) continue;
|
|
50
|
-
if (matchesAny(next.text, CORRECTION_PATTERNS)) correctionsAfter++;
|
|
51
|
-
}
|
|
52
|
-
if (assistantTurns > 0) {
|
|
53
|
-
first_take = 1 - correctionsAfter / assistantTurns;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
const code_style = haveText ? 0.7 : null;
|
|
57
|
-
|
|
58
|
-
let completeness = null;
|
|
59
|
-
let completenessHits = 0;
|
|
60
|
-
let assistantMsgsWithText = 0;
|
|
61
|
-
if (haveText) {
|
|
62
|
-
const assistantMsgs = messages.filter((m) => m.role === 'assistant' && m.text);
|
|
63
|
-
assistantMsgsWithText = assistantMsgs.length;
|
|
64
|
-
if (assistantMsgs.length > 0) {
|
|
65
|
-
completenessHits = assistantMsgs.filter((m) => matchesAny(m.text, COMPLETENESS_PATTERNS)).length;
|
|
66
|
-
completeness = completenessHits / assistantMsgs.length;
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
const ftE = explainIndicator(O1.first_take, first_take, difficulty);
|
|
71
|
-
const csE = explainIndicator(O1.code_style, code_style, difficulty);
|
|
72
|
-
const cpE = explainIndicator(O1.completeness, completeness, difficulty);
|
|
73
|
-
|
|
74
|
-
const subScores = {
|
|
75
|
-
first_take: ftE.score,
|
|
76
|
-
code_style: csE.score,
|
|
77
|
-
completeness: cpE.score,
|
|
78
|
-
};
|
|
79
|
-
const subLevels = {
|
|
80
|
-
first_take: ftE.level,
|
|
81
|
-
code_style: csE.level,
|
|
82
|
-
completeness: cpE.level,
|
|
83
|
-
};
|
|
84
|
-
|
|
85
|
-
const subEvidence = buildSubEvidence(
|
|
86
|
-
{ ft: ftE, cs: csE, cp: cpE },
|
|
87
|
-
{
|
|
88
|
-
mode: 'rules', haveText, assistantTurns, correctionsAfter,
|
|
89
|
-
assistantMsgsWithText, completenessHits,
|
|
90
|
-
},
|
|
91
|
-
difficulty
|
|
92
|
-
);
|
|
93
|
-
|
|
94
|
-
const score = rollupDimension('O1', subScores);
|
|
95
|
-
const level = scoreToLevel(score);
|
|
96
|
-
|
|
97
|
-
return {
|
|
98
|
-
subScores,
|
|
99
|
-
subLevels,
|
|
100
|
-
subEvidence,
|
|
101
|
-
raw: { first_take, code_style, completeness, assistantTurns, correctionsAfter, completenessHits, assistantMsgsWithText },
|
|
102
|
-
score,
|
|
103
|
-
level,
|
|
104
|
-
judgeSource: 'rules',
|
|
105
|
-
llmJudge: null,
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
/** Public entry — synchronous. `llmCell` is llmJudge.O1 (or null). */
|
|
110
|
-
function analyze(db, session, difficulty = 2, llmCell = null) {
|
|
111
|
-
const ruleResult = analyzeRules(db, session, difficulty);
|
|
112
|
-
if (!llmCell) return ruleResult;
|
|
113
|
-
|
|
114
|
-
const cell = llmCell;
|
|
115
|
-
const m = {
|
|
116
|
-
first_take: mergeIndicator(cell.first_take, ruleResult.subScores.first_take, ruleResult.subLevels.first_take),
|
|
117
|
-
code_style: mergeIndicator(cell.code_style, ruleResult.subScores.code_style, ruleResult.subLevels.code_style),
|
|
118
|
-
completeness: mergeIndicator(cell.completeness, ruleResult.subScores.completeness, ruleResult.subLevels.completeness),
|
|
119
|
-
};
|
|
120
|
-
|
|
121
|
-
const subScores = { first_take: m.first_take.score, code_style: m.code_style.score, completeness: m.completeness.score };
|
|
122
|
-
const subLevels = { first_take: m.first_take.level, code_style: m.code_style.level, completeness: m.completeness.level };
|
|
123
|
-
|
|
124
|
-
const subEvidence = {
|
|
125
|
-
first_take: { ...ruleResult.subEvidence.first_take, what: m.first_take.evidence || ruleResult.subEvidence.first_take.what, level: subLevels.first_take, score: subScores.first_take },
|
|
126
|
-
code_style: { ...ruleResult.subEvidence.code_style, what: m.code_style.evidence || ruleResult.subEvidence.code_style.what, level: subLevels.code_style, score: subScores.code_style },
|
|
127
|
-
completeness: { ...ruleResult.subEvidence.completeness, what: m.completeness.evidence || ruleResult.subEvidence.completeness.what, level: subLevels.completeness, score: subScores.completeness },
|
|
128
|
-
};
|
|
129
|
-
|
|
130
|
-
const score = rollupDimension('O1', subScores);
|
|
131
|
-
const level = scoreToLevel(score);
|
|
132
|
-
const judgeSource = dimensionSource([m.first_take.source, m.code_style.source, m.completeness.source]);
|
|
133
|
-
|
|
134
|
-
return { subScores, subLevels, subEvidence, raw: { llmCell: cell, ruleRaw: ruleResult.raw }, score, level, judgeSource, llmJudge: null };
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
function buildSubEvidence(expls, ctx, difficulty) {
|
|
138
|
-
const { ft, cs, cp } = expls;
|
|
139
|
-
|
|
140
|
-
// rules
|
|
141
|
-
const { haveText, assistantTurns, correctionsAfter, assistantMsgsWithText, completenessHits } = ctx;
|
|
142
|
-
return {
|
|
143
|
-
first_take: makeEvidence({
|
|
144
|
-
key: 'first_take', label: '一次采纳率',
|
|
145
|
-
what: assistantTurns > 0
|
|
146
|
-
? `规则版:1 - (AI 回合后用户立即纠错的比例) = 1 - ${correctionsAfter} / ${assistantTurns} 个 AI 回合。`
|
|
147
|
-
: '规则版:无 AI 回合可评估。',
|
|
148
|
-
expl: ft, unit: '%', difficulty,
|
|
149
|
-
}),
|
|
150
|
-
code_style: makeEvidence({
|
|
151
|
-
key: 'code_style', label: '代码规范性',
|
|
152
|
-
what: haveText
|
|
153
|
-
? '规则版无法评估代码规范,默认给中性 0.7 分。开启 LLM judge 可获得真实评估。'
|
|
154
|
-
: '消息文本缺失,无法评估。',
|
|
155
|
-
expl: cs, unit: '%', difficulty,
|
|
156
|
-
}),
|
|
157
|
-
completeness: makeEvidence({
|
|
158
|
-
key: 'completeness', label: '方案完备性',
|
|
159
|
-
what: assistantMsgsWithText > 0
|
|
160
|
-
? `规则版:AI 消息中含完备性关键词("边界"、"异常"、"测试"等)的比例:${completenessHits} / ${assistantMsgsWithText}。`
|
|
161
|
-
: '规则版:无 AI 文本,无法评估。',
|
|
162
|
-
expl: cp, unit: '%', difficulty,
|
|
163
|
-
}),
|
|
164
|
-
};
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
module.exports = { analyze, analyzeRules };
|
|
1
|
+
/**
|
|
2
|
+
* O1 — AI Output Quality.
|
|
3
|
+
*
|
|
4
|
+
* Sub-indicators:
|
|
5
|
+
* • first_take — fraction of assistant turns NOT followed by an
|
|
6
|
+
* immediate user correction
|
|
7
|
+
* • code_style — LLM-judged (rule fallback returns 0.7 neutral)
|
|
8
|
+
* • completeness — assistant text mentions edge/error/test concepts
|
|
9
|
+
* (LLM judge is more accurate; rules give a proxy)
|
|
10
|
+
*
|
|
11
|
+
* See spec §4.6.
|
|
12
|
+
*
|
|
13
|
+
* @author Felix
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
'use strict';
|
|
17
|
+
|
|
18
|
+
const {
|
|
19
|
+
fetchMessages,
|
|
20
|
+
matchesAny,
|
|
21
|
+
} = require('../text-signals');
|
|
22
|
+
const { explainIndicator, rollupDimension, scoreToLevel, O1 } = require('../thresholds-v2');
|
|
23
|
+
const { mergeIndicator, dimensionSource } = require('./llm-merge');
|
|
24
|
+
const { makeEvidence } = require('../evidence-builder');
|
|
25
|
+
|
|
26
|
+
const CORRECTION_PATTERNS = [
|
|
27
|
+
/改一下/, /不对/, /(再|重新)(写|来|改|做)/, /(错|有问题|有bug)/, /漏了/, /换/,
|
|
28
|
+
/fix that/i, /that'?s wrong/i, /redo/i, /try again/i, /not quite/i,
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
const COMPLETENESS_PATTERNS = [
|
|
32
|
+
/边界/, /异常/, /错误处理/, /测试/, /单元测试/, /回退/, /兜底/,
|
|
33
|
+
/edge case/i, /error handling/i, /test/i, /fallback/i, /exception/i,
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
function analyzeRules(db, session, difficulty = 2) {
|
|
37
|
+
const messages = fetchMessages(db, session.id);
|
|
38
|
+
const haveText = messages.some((m) => m.text && m.text.length > 0);
|
|
39
|
+
|
|
40
|
+
let first_take = null;
|
|
41
|
+
let assistantTurns = 0;
|
|
42
|
+
let correctionsAfter = 0;
|
|
43
|
+
|
|
44
|
+
for (let i = 0; i < messages.length; i++) {
|
|
45
|
+
const m = messages[i];
|
|
46
|
+
if (m.role !== 'assistant') continue;
|
|
47
|
+
assistantTurns++;
|
|
48
|
+
const next = i + 1 < messages.length ? messages[i + 1] : null;
|
|
49
|
+
if (!next || next.role !== 'user' || !next.text) continue;
|
|
50
|
+
if (matchesAny(next.text, CORRECTION_PATTERNS)) correctionsAfter++;
|
|
51
|
+
}
|
|
52
|
+
if (assistantTurns > 0) {
|
|
53
|
+
first_take = 1 - correctionsAfter / assistantTurns;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const code_style = haveText ? 0.7 : null;
|
|
57
|
+
|
|
58
|
+
let completeness = null;
|
|
59
|
+
let completenessHits = 0;
|
|
60
|
+
let assistantMsgsWithText = 0;
|
|
61
|
+
if (haveText) {
|
|
62
|
+
const assistantMsgs = messages.filter((m) => m.role === 'assistant' && m.text);
|
|
63
|
+
assistantMsgsWithText = assistantMsgs.length;
|
|
64
|
+
if (assistantMsgs.length > 0) {
|
|
65
|
+
completenessHits = assistantMsgs.filter((m) => matchesAny(m.text, COMPLETENESS_PATTERNS)).length;
|
|
66
|
+
completeness = completenessHits / assistantMsgs.length;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const ftE = explainIndicator(O1.first_take, first_take, difficulty);
|
|
71
|
+
const csE = explainIndicator(O1.code_style, code_style, difficulty);
|
|
72
|
+
const cpE = explainIndicator(O1.completeness, completeness, difficulty);
|
|
73
|
+
|
|
74
|
+
const subScores = {
|
|
75
|
+
first_take: ftE.score,
|
|
76
|
+
code_style: csE.score,
|
|
77
|
+
completeness: cpE.score,
|
|
78
|
+
};
|
|
79
|
+
const subLevels = {
|
|
80
|
+
first_take: ftE.level,
|
|
81
|
+
code_style: csE.level,
|
|
82
|
+
completeness: cpE.level,
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const subEvidence = buildSubEvidence(
|
|
86
|
+
{ ft: ftE, cs: csE, cp: cpE },
|
|
87
|
+
{
|
|
88
|
+
mode: 'rules', haveText, assistantTurns, correctionsAfter,
|
|
89
|
+
assistantMsgsWithText, completenessHits,
|
|
90
|
+
},
|
|
91
|
+
difficulty
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
const score = rollupDimension('O1', subScores);
|
|
95
|
+
const level = scoreToLevel(score);
|
|
96
|
+
|
|
97
|
+
return {
|
|
98
|
+
subScores,
|
|
99
|
+
subLevels,
|
|
100
|
+
subEvidence,
|
|
101
|
+
raw: { first_take, code_style, completeness, assistantTurns, correctionsAfter, completenessHits, assistantMsgsWithText },
|
|
102
|
+
score,
|
|
103
|
+
level,
|
|
104
|
+
judgeSource: 'rules',
|
|
105
|
+
llmJudge: null,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** Public entry — synchronous. `llmCell` is llmJudge.O1 (or null). */
|
|
110
|
+
function analyze(db, session, difficulty = 2, llmCell = null) {
|
|
111
|
+
const ruleResult = analyzeRules(db, session, difficulty);
|
|
112
|
+
if (!llmCell) return ruleResult;
|
|
113
|
+
|
|
114
|
+
const cell = llmCell;
|
|
115
|
+
const m = {
|
|
116
|
+
first_take: mergeIndicator(cell.first_take, ruleResult.subScores.first_take, ruleResult.subLevels.first_take),
|
|
117
|
+
code_style: mergeIndicator(cell.code_style, ruleResult.subScores.code_style, ruleResult.subLevels.code_style),
|
|
118
|
+
completeness: mergeIndicator(cell.completeness, ruleResult.subScores.completeness, ruleResult.subLevels.completeness),
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
const subScores = { first_take: m.first_take.score, code_style: m.code_style.score, completeness: m.completeness.score };
|
|
122
|
+
const subLevels = { first_take: m.first_take.level, code_style: m.code_style.level, completeness: m.completeness.level };
|
|
123
|
+
|
|
124
|
+
const subEvidence = {
|
|
125
|
+
first_take: { ...ruleResult.subEvidence.first_take, what: m.first_take.evidence || ruleResult.subEvidence.first_take.what, level: subLevels.first_take, score: subScores.first_take, source: m.first_take.source },
|
|
126
|
+
code_style: { ...ruleResult.subEvidence.code_style, what: m.code_style.evidence || ruleResult.subEvidence.code_style.what, level: subLevels.code_style, score: subScores.code_style, source: m.code_style.source },
|
|
127
|
+
completeness: { ...ruleResult.subEvidence.completeness, what: m.completeness.evidence || ruleResult.subEvidence.completeness.what, level: subLevels.completeness, score: subScores.completeness, source: m.completeness.source },
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
const score = rollupDimension('O1', subScores);
|
|
131
|
+
const level = scoreToLevel(score);
|
|
132
|
+
const judgeSource = dimensionSource([m.first_take.source, m.code_style.source, m.completeness.source]);
|
|
133
|
+
|
|
134
|
+
return { subScores, subLevels, subEvidence, raw: { llmCell: cell, ruleRaw: ruleResult.raw }, score, level, judgeSource, llmJudge: null };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function buildSubEvidence(expls, ctx, difficulty) {
|
|
138
|
+
const { ft, cs, cp } = expls;
|
|
139
|
+
|
|
140
|
+
// rules
|
|
141
|
+
const { haveText, assistantTurns, correctionsAfter, assistantMsgsWithText, completenessHits } = ctx;
|
|
142
|
+
return {
|
|
143
|
+
first_take: makeEvidence({
|
|
144
|
+
key: 'first_take', label: '一次采纳率',
|
|
145
|
+
what: assistantTurns > 0
|
|
146
|
+
? `规则版:1 - (AI 回合后用户立即纠错的比例) = 1 - ${correctionsAfter} / ${assistantTurns} 个 AI 回合。`
|
|
147
|
+
: '规则版:无 AI 回合可评估。',
|
|
148
|
+
expl: ft, unit: '%', difficulty,
|
|
149
|
+
}),
|
|
150
|
+
code_style: makeEvidence({
|
|
151
|
+
key: 'code_style', label: '代码规范性',
|
|
152
|
+
what: haveText
|
|
153
|
+
? '规则版无法评估代码规范,默认给中性 0.7 分。开启 LLM judge 可获得真实评估。'
|
|
154
|
+
: '消息文本缺失,无法评估。',
|
|
155
|
+
expl: cs, unit: '%', difficulty,
|
|
156
|
+
}),
|
|
157
|
+
completeness: makeEvidence({
|
|
158
|
+
key: 'completeness', label: '方案完备性',
|
|
159
|
+
what: assistantMsgsWithText > 0
|
|
160
|
+
? `规则版:AI 消息中含完备性关键词("边界"、"异常"、"测试"等)的比例:${completenessHits} / ${assistantMsgsWithText}。`
|
|
161
|
+
: '规则版:无 AI 文本,无法评估。',
|
|
162
|
+
expl: cp, unit: '%', difficulty,
|
|
163
|
+
}),
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
module.exports = { analyze, analyzeRules };
|
|
@@ -1,104 +1,109 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* H1 — Problem Definition.
|
|
3
|
-
*
|
|
4
|
-
* Captures the human ability to turn a vague need into a precise,
|
|
5
|
-
* AI-executable problem. Three sub-indicators:
|
|
6
|
-
* • clarity — AI proactive-question count in the first 30%
|
|
7
|
-
* • converge — number of user-message rounds to convergence
|
|
8
|
-
* • drift — direction-change events
|
|
9
|
-
*
|
|
10
|
-
* See docs/superpowers/specs/2026-06-13-capability-model-v2.md §4.1.
|
|
11
|
-
*
|
|
12
|
-
* @author Felix
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
'use strict';
|
|
16
|
-
|
|
17
|
-
const { queryAll } = require('../../db/queries');
|
|
18
|
-
const {
|
|
19
|
-
fetchMessages,
|
|
20
|
-
userMessages,
|
|
21
|
-
matchesAny,
|
|
22
|
-
DRIFT_PATTERNS,
|
|
23
|
-
} = require('../text-signals');
|
|
24
|
-
const { explainIndicator, rollupDimension, scoreToLevel, H1 } = require('../thresholds-v2');
|
|
25
|
-
const { makeEvidence } = require('../evidence-builder');
|
|
26
|
-
const { mergeIndicator, dimensionSource } = require('./llm-merge');
|
|
27
|
-
|
|
28
|
-
function analyze(db, session, difficulty = 2, llmCell = null) {
|
|
29
|
-
const messages = fetchMessages(db, session.id);
|
|
30
|
-
const users = userMessages(messages);
|
|
31
|
-
|
|
32
|
-
// ---- clarity: count of `question` tool calls in first 30% ----------
|
|
33
|
-
const toolCalls = queryAll(
|
|
34
|
-
db,
|
|
35
|
-
`SELECT tool_name, timestamp
|
|
36
|
-
FROM unified_tool_call
|
|
37
|
-
WHERE session_id = ?
|
|
38
|
-
ORDER BY timestamp ASC`,
|
|
39
|
-
[session.id]
|
|
40
|
-
);
|
|
41
|
-
let clarity = 0;
|
|
42
|
-
let clarityCutoff = 0;
|
|
43
|
-
if (toolCalls.length > 0) {
|
|
44
|
-
clarityCutoff = Math.max(1, Math.floor(toolCalls.length * 0.3));
|
|
45
|
-
clarity = toolCalls.slice(0, clarityCutoff).filter((t) => t.tool_name === 'question').length;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
// ---- converge: number of user messages -----------------------------
|
|
49
|
-
const converge = users.length;
|
|
50
|
-
|
|
51
|
-
// ---- drift: keyword spotting on user text -------------------------
|
|
52
|
-
const haveText = users.some((m) => m.text && m.text.length > 0);
|
|
53
|
-
|
|
54
|
-
let drift = null;
|
|
55
|
-
if (haveText) {
|
|
56
|
-
drift = users.filter((m) => matchesAny(m.text, DRIFT_PATTERNS)).length;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
// ---- eval each ----------------------------------------------------
|
|
60
|
-
const clarityE = explainIndicator(H1.clarity, clarity, difficulty);
|
|
61
|
-
const convergeE = explainIndicator(H1.converge, converge, difficulty);
|
|
62
|
-
const driftE = explainIndicator(H1.drift, drift, difficulty);
|
|
63
|
-
|
|
64
|
-
// Rule baseline per indicator (level + centred score).
|
|
65
|
-
const rule = {
|
|
66
|
-
clarity: { score: clarityE.score, level: clarityE.level },
|
|
67
|
-
converge: { score: convergeE.score, level: convergeE.level },
|
|
68
|
-
drift: { score: driftE.score, level: driftE.level },
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
// Merge LLM cell (if any) over the rule baseline.
|
|
72
|
-
const cell = llmCell || {};
|
|
73
|
-
const m = {
|
|
74
|
-
clarity: mergeIndicator(cell.clarity, rule.clarity.score, rule.clarity.level),
|
|
75
|
-
converge: mergeIndicator(cell.converge, rule.converge.score, rule.converge.level),
|
|
76
|
-
drift: mergeIndicator(cell.drift, rule.drift.score, rule.drift.level),
|
|
77
|
-
};
|
|
78
|
-
|
|
79
|
-
const subScores = { clarity: m.clarity.score, converge: m.converge.score, drift: m.drift.score };
|
|
80
|
-
const subLevels = { clarity: m.clarity.level, converge: m.converge.level, drift: m.drift.level };
|
|
81
|
-
|
|
82
|
-
// Evidence: prefer the LLM's cited rationale when that indicator used the LLM.
|
|
83
|
-
const subEvidence = {
|
|
84
|
-
clarity: makeEvidence({ key: 'clarity', label: '初始指令清晰度', what: m.clarity.evidence || `规则版:前 30% 工具调用中 question 次数 ${clarity}(共 ${toolCalls.length} 次调用)。`, expl: clarityE, unit: '次', difficulty }),
|
|
85
|
-
converge: makeEvidence({ key: 'converge', label: '任务收敛轮次', what: m.converge.evidence || `规则版:用户消息 ${converge} 条。`, expl: convergeE, unit: '轮', difficulty }),
|
|
86
|
-
drift: makeEvidence({ key: 'drift', label: '方向变更次数', what: m.drift.evidence || (haveText ? `规则版:方向变更关键词命中 ${drift} 条。` : '用户消息无文本,无法识别。'), expl: driftE, unit: '次', difficulty }),
|
|
87
|
-
};
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
|
|
1
|
+
/**
|
|
2
|
+
* H1 — Problem Definition.
|
|
3
|
+
*
|
|
4
|
+
* Captures the human ability to turn a vague need into a precise,
|
|
5
|
+
* AI-executable problem. Three sub-indicators:
|
|
6
|
+
* • clarity — AI proactive-question count in the first 30%
|
|
7
|
+
* • converge — number of user-message rounds to convergence
|
|
8
|
+
* • drift — direction-change events
|
|
9
|
+
*
|
|
10
|
+
* See docs/superpowers/specs/2026-06-13-capability-model-v2.md §4.1.
|
|
11
|
+
*
|
|
12
|
+
* @author Felix
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
'use strict';
|
|
16
|
+
|
|
17
|
+
const { queryAll } = require('../../db/queries');
|
|
18
|
+
const {
|
|
19
|
+
fetchMessages,
|
|
20
|
+
userMessages,
|
|
21
|
+
matchesAny,
|
|
22
|
+
DRIFT_PATTERNS,
|
|
23
|
+
} = require('../text-signals');
|
|
24
|
+
const { explainIndicator, rollupDimension, scoreToLevel, H1 } = require('../thresholds-v2');
|
|
25
|
+
const { makeEvidence } = require('../evidence-builder');
|
|
26
|
+
const { mergeIndicator, dimensionSource } = require('./llm-merge');
|
|
27
|
+
|
|
28
|
+
function analyze(db, session, difficulty = 2, llmCell = null) {
|
|
29
|
+
const messages = fetchMessages(db, session.id);
|
|
30
|
+
const users = userMessages(messages);
|
|
31
|
+
|
|
32
|
+
// ---- clarity: count of `question` tool calls in first 30% ----------
|
|
33
|
+
const toolCalls = queryAll(
|
|
34
|
+
db,
|
|
35
|
+
`SELECT tool_name, timestamp
|
|
36
|
+
FROM unified_tool_call
|
|
37
|
+
WHERE session_id = ?
|
|
38
|
+
ORDER BY timestamp ASC`,
|
|
39
|
+
[session.id]
|
|
40
|
+
);
|
|
41
|
+
let clarity = 0;
|
|
42
|
+
let clarityCutoff = 0;
|
|
43
|
+
if (toolCalls.length > 0) {
|
|
44
|
+
clarityCutoff = Math.max(1, Math.floor(toolCalls.length * 0.3));
|
|
45
|
+
clarity = toolCalls.slice(0, clarityCutoff).filter((t) => t.tool_name === 'question').length;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ---- converge: number of user messages -----------------------------
|
|
49
|
+
const converge = users.length;
|
|
50
|
+
|
|
51
|
+
// ---- drift: keyword spotting on user text -------------------------
|
|
52
|
+
const haveText = users.some((m) => m.text && m.text.length > 0);
|
|
53
|
+
|
|
54
|
+
let drift = null;
|
|
55
|
+
if (haveText) {
|
|
56
|
+
drift = users.filter((m) => matchesAny(m.text, DRIFT_PATTERNS)).length;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ---- eval each ----------------------------------------------------
|
|
60
|
+
const clarityE = explainIndicator(H1.clarity, clarity, difficulty);
|
|
61
|
+
const convergeE = explainIndicator(H1.converge, converge, difficulty);
|
|
62
|
+
const driftE = explainIndicator(H1.drift, drift, difficulty);
|
|
63
|
+
|
|
64
|
+
// Rule baseline per indicator (level + centred score).
|
|
65
|
+
const rule = {
|
|
66
|
+
clarity: { score: clarityE.score, level: clarityE.level },
|
|
67
|
+
converge: { score: convergeE.score, level: convergeE.level },
|
|
68
|
+
drift: { score: driftE.score, level: driftE.level },
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Merge LLM cell (if any) over the rule baseline.
|
|
72
|
+
const cell = llmCell || {};
|
|
73
|
+
const m = {
|
|
74
|
+
clarity: mergeIndicator(cell.clarity, rule.clarity.score, rule.clarity.level),
|
|
75
|
+
converge: mergeIndicator(cell.converge, rule.converge.score, rule.converge.level),
|
|
76
|
+
drift: mergeIndicator(cell.drift, rule.drift.score, rule.drift.level),
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const subScores = { clarity: m.clarity.score, converge: m.converge.score, drift: m.drift.score };
|
|
80
|
+
const subLevels = { clarity: m.clarity.level, converge: m.converge.level, drift: m.drift.level };
|
|
81
|
+
|
|
82
|
+
// Evidence: prefer the LLM's cited rationale when that indicator used the LLM.
|
|
83
|
+
const subEvidence = {
|
|
84
|
+
clarity: makeEvidence({ key: 'clarity', label: '初始指令清晰度', what: m.clarity.evidence || `规则版:前 30% 工具调用中 question 次数 ${clarity}(共 ${toolCalls.length} 次调用)。`, expl: clarityE, unit: '次', difficulty }),
|
|
85
|
+
converge: makeEvidence({ key: 'converge', label: '任务收敛轮次', what: m.converge.evidence || `规则版:用户消息 ${converge} 条。`, expl: convergeE, unit: '轮', difficulty }),
|
|
86
|
+
drift: makeEvidence({ key: 'drift', label: '方向变更次数', what: m.drift.evidence || (haveText ? `规则版:方向变更关键词命中 ${drift} 条。` : '用户消息无文本,无法识别。'), expl: driftE, unit: '次', difficulty }),
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
// Tag each sub-indicator with its source (llm / rules) for the UI badge.
|
|
90
|
+
subEvidence.clarity.source = m.clarity.source;
|
|
91
|
+
subEvidence.converge.source = m.converge.source;
|
|
92
|
+
subEvidence.drift.source = m.drift.source;
|
|
93
|
+
|
|
94
|
+
const score = rollupDimension('H1', subScores);
|
|
95
|
+
const level = scoreToLevel(score);
|
|
96
|
+
const judgeSource = dimensionSource([m.clarity.source, m.converge.source, m.drift.source]);
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
subScores,
|
|
100
|
+
subLevels,
|
|
101
|
+
subEvidence,
|
|
102
|
+
raw: { clarity, converge, drift, difficulty, haveText, toolCallCount: toolCalls.length, clarityCutoff },
|
|
103
|
+
score,
|
|
104
|
+
level,
|
|
105
|
+
judgeSource,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
module.exports = { analyze };
|
package/server/analysis/job.js
CHANGED
|
@@ -27,17 +27,25 @@ const { aggregateDailySummary } = require('./daily-aggregator');
|
|
|
27
27
|
/**
|
|
28
28
|
* Build a list of YYYY-MM-DD strings starting from today going back
|
|
29
29
|
* `days` days, ordered most-recent first. Today is included so the
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
30
|
+
* Today is EXCLUDED — sessions on the current calendar day are still
|
|
31
|
+
* actively being held (judge / advice analysing them would race against
|
|
32
|
+
* the user typing more messages, churn the cache, and waste LLM calls).
|
|
33
|
+
* Yesterday + N earlier days only. Callers who really want to (re)
|
|
34
|
+
* analyze today must pass `dates: ['YYYY-MM-DD']` explicitly to
|
|
35
|
+
* runAnalysisJob, or use the per-session reanalyze endpoint
|
|
36
|
+
* (`POST /api/analysis/session/:id`) which bypasses this path.
|
|
33
37
|
*
|
|
34
|
-
*
|
|
38
|
+
* Default `days = 7` therefore produces 7 dates (yesterday through 7
|
|
39
|
+
* days ago), not 8.
|
|
40
|
+
*
|
|
41
|
+
* @param {number} days number of past days to include (yesterday-anchored)
|
|
35
42
|
* @returns {string[]}
|
|
36
43
|
*/
|
|
37
44
|
function buildDateList(days) {
|
|
38
45
|
const dates = [];
|
|
39
46
|
const now = new Date();
|
|
40
|
-
|
|
47
|
+
// Start at i=1 → yesterday; end at i=days inclusive → `days` total dates.
|
|
48
|
+
for (let i = 1; i <= days; i++) {
|
|
41
49
|
const d = new Date(now);
|
|
42
50
|
d.setDate(d.getDate() - i);
|
|
43
51
|
dates.push(formatDate(d));
|
|
@@ -63,6 +71,11 @@ function formatDate(d) {
|
|
|
63
71
|
* We skip this session in the job so we don't analyze a still-growing
|
|
64
72
|
* conversation; the next job pass will pick it up once it settles.
|
|
65
73
|
*
|
|
74
|
+
* Note: the default invocation of runAnalysisJob no longer includes
|
|
75
|
+
* today at all (see buildDateList). This function only fires when a
|
|
76
|
+
* caller explicitly passes `dates` that includes today — in which case
|
|
77
|
+
* we still shield the actively-typed-in session.
|
|
78
|
+
*
|
|
66
79
|
* @param {object} db
|
|
67
80
|
* @param {string} date YYYY-MM-DD
|
|
68
81
|
* @param {object[]} sessions candidate session rows for that date
|
|
@@ -156,7 +169,24 @@ async function analyzeAndStoreSession(db, session, opts = {}) {
|
|
|
156
169
|
|
|
157
170
|
/**
|
|
158
171
|
* Run analysis job: analyze unanalyzed sessions in reverse chronological order.
|
|
159
|
-
*
|
|
172
|
+
*
|
|
173
|
+
* # Today policy
|
|
174
|
+
*
|
|
175
|
+
* Default invocation ({days}) skips TODAY entirely — sessions on the
|
|
176
|
+
* current calendar day are likely still being held, and analysing
|
|
177
|
+
* them now means churning the LLM cache for results that will be stale
|
|
178
|
+
* within minutes. The boot path (bin/aboss.js) uses this default, so
|
|
179
|
+
* "open aboss → background scan" never touches today.
|
|
180
|
+
*
|
|
181
|
+
* Two escape hatches keep today analysable when the user really asks:
|
|
182
|
+
* - `dates: ['YYYY-MM-DD']` explicit list → not filtered. Used by
|
|
183
|
+
* manual triggers that pass a specific date set.
|
|
184
|
+
* - `POST /api/analysis/session/:id` → goes straight through
|
|
185
|
+
* `analyzeAndStoreSession`, doesn't use this job loop, so today's
|
|
186
|
+
* "Re-analyze" button in the UI keeps working.
|
|
187
|
+
*
|
|
188
|
+
* Default: last 7 days (yesterday → 7 days ago). Processes one date at
|
|
189
|
+
* a time, most recent first.
|
|
160
190
|
*
|
|
161
191
|
* @param {object} db - sql.js boss.db instance
|
|
162
192
|
* @param {object} options - {
|
|
@@ -164,6 +194,7 @@ async function analyzeAndStoreSession(db, session, opts = {}) {
|
|
|
164
194
|
* onProgress: fn,
|
|
165
195
|
* forceReanalyze: false,
|
|
166
196
|
* dates: string[] // optional explicit YYYY-MM-DD list; overrides `days`
|
|
197
|
+
* // AND bypasses the "skip today" rule
|
|
167
198
|
* }
|
|
168
199
|
* @returns {Promise<{analyzed: number, errors: number, skipped: number}>}
|
|
169
200
|
*/
|