agentboss 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,167 +1,167 @@
1
- /**
2
- * O1 — AI Output Quality.
3
- *
4
- * Sub-indicators:
5
- * • first_take — fraction of assistant turns NOT followed by an
6
- * immediate user correction
7
- * • code_style — LLM-judged (rule fallback returns 0.7 neutral)
8
- * • completeness — assistant text mentions edge/error/test concepts
9
- * (LLM judge is more accurate; rules give a proxy)
10
- *
11
- * See spec §4.6.
12
- *
13
- * @author Felix
14
- */
15
-
16
- 'use strict';
17
-
18
- const {
19
- fetchMessages,
20
- matchesAny,
21
- } = require('../text-signals');
22
- const { explainIndicator, rollupDimension, scoreToLevel, O1 } = require('../thresholds-v2');
23
- const { mergeIndicator, dimensionSource } = require('./llm-merge');
24
- const { makeEvidence } = require('../evidence-builder');
25
-
26
- const CORRECTION_PATTERNS = [
27
- /改一下/, /不对/, /(再|重新)(写|来|改|做)/, /(错|有问题|有bug)/, /漏了/, /换/,
28
- /fix that/i, /that'?s wrong/i, /redo/i, /try again/i, /not quite/i,
29
- ];
30
-
31
- const COMPLETENESS_PATTERNS = [
32
- /边界/, /异常/, /错误处理/, /测试/, /单元测试/, /回退/, /兜底/,
33
- /edge case/i, /error handling/i, /test/i, /fallback/i, /exception/i,
34
- ];
35
-
36
- function analyzeRules(db, session, difficulty = 2) {
37
- const messages = fetchMessages(db, session.id);
38
- const haveText = messages.some((m) => m.text && m.text.length > 0);
39
-
40
- let first_take = null;
41
- let assistantTurns = 0;
42
- let correctionsAfter = 0;
43
-
44
- for (let i = 0; i < messages.length; i++) {
45
- const m = messages[i];
46
- if (m.role !== 'assistant') continue;
47
- assistantTurns++;
48
- const next = i + 1 < messages.length ? messages[i + 1] : null;
49
- if (!next || next.role !== 'user' || !next.text) continue;
50
- if (matchesAny(next.text, CORRECTION_PATTERNS)) correctionsAfter++;
51
- }
52
- if (assistantTurns > 0) {
53
- first_take = 1 - correctionsAfter / assistantTurns;
54
- }
55
-
56
- const code_style = haveText ? 0.7 : null;
57
-
58
- let completeness = null;
59
- let completenessHits = 0;
60
- let assistantMsgsWithText = 0;
61
- if (haveText) {
62
- const assistantMsgs = messages.filter((m) => m.role === 'assistant' && m.text);
63
- assistantMsgsWithText = assistantMsgs.length;
64
- if (assistantMsgs.length > 0) {
65
- completenessHits = assistantMsgs.filter((m) => matchesAny(m.text, COMPLETENESS_PATTERNS)).length;
66
- completeness = completenessHits / assistantMsgs.length;
67
- }
68
- }
69
-
70
- const ftE = explainIndicator(O1.first_take, first_take, difficulty);
71
- const csE = explainIndicator(O1.code_style, code_style, difficulty);
72
- const cpE = explainIndicator(O1.completeness, completeness, difficulty);
73
-
74
- const subScores = {
75
- first_take: ftE.score,
76
- code_style: csE.score,
77
- completeness: cpE.score,
78
- };
79
- const subLevels = {
80
- first_take: ftE.level,
81
- code_style: csE.level,
82
- completeness: cpE.level,
83
- };
84
-
85
- const subEvidence = buildSubEvidence(
86
- { ft: ftE, cs: csE, cp: cpE },
87
- {
88
- mode: 'rules', haveText, assistantTurns, correctionsAfter,
89
- assistantMsgsWithText, completenessHits,
90
- },
91
- difficulty
92
- );
93
-
94
- const score = rollupDimension('O1', subScores);
95
- const level = scoreToLevel(score);
96
-
97
- return {
98
- subScores,
99
- subLevels,
100
- subEvidence,
101
- raw: { first_take, code_style, completeness, assistantTurns, correctionsAfter, completenessHits, assistantMsgsWithText },
102
- score,
103
- level,
104
- judgeSource: 'rules',
105
- llmJudge: null,
106
- };
107
- }
108
-
109
- /** Public entry — synchronous. `llmCell` is llmJudge.O1 (or null). */
110
- function analyze(db, session, difficulty = 2, llmCell = null) {
111
- const ruleResult = analyzeRules(db, session, difficulty);
112
- if (!llmCell) return ruleResult;
113
-
114
- const cell = llmCell;
115
- const m = {
116
- first_take: mergeIndicator(cell.first_take, ruleResult.subScores.first_take, ruleResult.subLevels.first_take),
117
- code_style: mergeIndicator(cell.code_style, ruleResult.subScores.code_style, ruleResult.subLevels.code_style),
118
- completeness: mergeIndicator(cell.completeness, ruleResult.subScores.completeness, ruleResult.subLevels.completeness),
119
- };
120
-
121
- const subScores = { first_take: m.first_take.score, code_style: m.code_style.score, completeness: m.completeness.score };
122
- const subLevels = { first_take: m.first_take.level, code_style: m.code_style.level, completeness: m.completeness.level };
123
-
124
- const subEvidence = {
125
- first_take: { ...ruleResult.subEvidence.first_take, what: m.first_take.evidence || ruleResult.subEvidence.first_take.what, level: subLevels.first_take, score: subScores.first_take },
126
- code_style: { ...ruleResult.subEvidence.code_style, what: m.code_style.evidence || ruleResult.subEvidence.code_style.what, level: subLevels.code_style, score: subScores.code_style },
127
- completeness: { ...ruleResult.subEvidence.completeness, what: m.completeness.evidence || ruleResult.subEvidence.completeness.what, level: subLevels.completeness, score: subScores.completeness },
128
- };
129
-
130
- const score = rollupDimension('O1', subScores);
131
- const level = scoreToLevel(score);
132
- const judgeSource = dimensionSource([m.first_take.source, m.code_style.source, m.completeness.source]);
133
-
134
- return { subScores, subLevels, subEvidence, raw: { llmCell: cell, ruleRaw: ruleResult.raw }, score, level, judgeSource, llmJudge: null };
135
- }
136
-
137
- function buildSubEvidence(expls, ctx, difficulty) {
138
- const { ft, cs, cp } = expls;
139
-
140
- // rules
141
- const { haveText, assistantTurns, correctionsAfter, assistantMsgsWithText, completenessHits } = ctx;
142
- return {
143
- first_take: makeEvidence({
144
- key: 'first_take', label: '一次采纳率',
145
- what: assistantTurns > 0
146
- ? `规则版:1 - (AI 回合后用户立即纠错的比例) = 1 - ${correctionsAfter} / ${assistantTurns} 个 AI 回合。`
147
- : '规则版:无 AI 回合可评估。',
148
- expl: ft, unit: '%', difficulty,
149
- }),
150
- code_style: makeEvidence({
151
- key: 'code_style', label: '代码规范性',
152
- what: haveText
153
- ? '规则版无法评估代码规范,默认给中性 0.7 分。开启 LLM judge 可获得真实评估。'
154
- : '消息文本缺失,无法评估。',
155
- expl: cs, unit: '%', difficulty,
156
- }),
157
- completeness: makeEvidence({
158
- key: 'completeness', label: '方案完备性',
159
- what: assistantMsgsWithText > 0
160
- ? `规则版:AI 消息中含完备性关键词("边界"、"异常"、"测试"等)的比例:${completenessHits} / ${assistantMsgsWithText}。`
161
- : '规则版:无 AI 文本,无法评估。',
162
- expl: cp, unit: '%', difficulty,
163
- }),
164
- };
165
- }
166
-
167
- module.exports = { analyze, analyzeRules };
1
+ /**
2
+ * O1 — AI Output Quality.
3
+ *
4
+ * Sub-indicators:
5
+ * • first_take — fraction of assistant turns NOT followed by an
6
+ * immediate user correction
7
+ * • code_style — LLM-judged (rule fallback returns 0.7 neutral)
8
+ * • completeness — assistant text mentions edge/error/test concepts
9
+ * (LLM judge is more accurate; rules give a proxy)
10
+ *
11
+ * See spec §4.6.
12
+ *
13
+ * @author Felix
14
+ */
15
+
16
+ 'use strict';
17
+
18
+ const {
19
+ fetchMessages,
20
+ matchesAny,
21
+ } = require('../text-signals');
22
+ const { explainIndicator, rollupDimension, scoreToLevel, O1 } = require('../thresholds-v2');
23
+ const { mergeIndicator, dimensionSource } = require('./llm-merge');
24
+ const { makeEvidence } = require('../evidence-builder');
25
+
26
+ const CORRECTION_PATTERNS = [
27
+ /改一下/, /不对/, /(再|重新)(写|来|改|做)/, /(错|有问题|有bug)/, /漏了/, /换/,
28
+ /fix that/i, /that'?s wrong/i, /redo/i, /try again/i, /not quite/i,
29
+ ];
30
+
31
+ const COMPLETENESS_PATTERNS = [
32
+ /边界/, /异常/, /错误处理/, /测试/, /单元测试/, /回退/, /兜底/,
33
+ /edge case/i, /error handling/i, /test/i, /fallback/i, /exception/i,
34
+ ];
35
+
36
+ function analyzeRules(db, session, difficulty = 2) {
37
+ const messages = fetchMessages(db, session.id);
38
+ const haveText = messages.some((m) => m.text && m.text.length > 0);
39
+
40
+ let first_take = null;
41
+ let assistantTurns = 0;
42
+ let correctionsAfter = 0;
43
+
44
+ for (let i = 0; i < messages.length; i++) {
45
+ const m = messages[i];
46
+ if (m.role !== 'assistant') continue;
47
+ assistantTurns++;
48
+ const next = i + 1 < messages.length ? messages[i + 1] : null;
49
+ if (!next || next.role !== 'user' || !next.text) continue;
50
+ if (matchesAny(next.text, CORRECTION_PATTERNS)) correctionsAfter++;
51
+ }
52
+ if (assistantTurns > 0) {
53
+ first_take = 1 - correctionsAfter / assistantTurns;
54
+ }
55
+
56
+ const code_style = haveText ? 0.7 : null;
57
+
58
+ let completeness = null;
59
+ let completenessHits = 0;
60
+ let assistantMsgsWithText = 0;
61
+ if (haveText) {
62
+ const assistantMsgs = messages.filter((m) => m.role === 'assistant' && m.text);
63
+ assistantMsgsWithText = assistantMsgs.length;
64
+ if (assistantMsgs.length > 0) {
65
+ completenessHits = assistantMsgs.filter((m) => matchesAny(m.text, COMPLETENESS_PATTERNS)).length;
66
+ completeness = completenessHits / assistantMsgs.length;
67
+ }
68
+ }
69
+
70
+ const ftE = explainIndicator(O1.first_take, first_take, difficulty);
71
+ const csE = explainIndicator(O1.code_style, code_style, difficulty);
72
+ const cpE = explainIndicator(O1.completeness, completeness, difficulty);
73
+
74
+ const subScores = {
75
+ first_take: ftE.score,
76
+ code_style: csE.score,
77
+ completeness: cpE.score,
78
+ };
79
+ const subLevels = {
80
+ first_take: ftE.level,
81
+ code_style: csE.level,
82
+ completeness: cpE.level,
83
+ };
84
+
85
+ const subEvidence = buildSubEvidence(
86
+ { ft: ftE, cs: csE, cp: cpE },
87
+ {
88
+ mode: 'rules', haveText, assistantTurns, correctionsAfter,
89
+ assistantMsgsWithText, completenessHits,
90
+ },
91
+ difficulty
92
+ );
93
+
94
+ const score = rollupDimension('O1', subScores);
95
+ const level = scoreToLevel(score);
96
+
97
+ return {
98
+ subScores,
99
+ subLevels,
100
+ subEvidence,
101
+ raw: { first_take, code_style, completeness, assistantTurns, correctionsAfter, completenessHits, assistantMsgsWithText },
102
+ score,
103
+ level,
104
+ judgeSource: 'rules',
105
+ llmJudge: null,
106
+ };
107
+ }
108
+
109
+ /** Public entry — synchronous. `llmCell` is llmJudge.O1 (or null). */
110
+ function analyze(db, session, difficulty = 2, llmCell = null) {
111
+ const ruleResult = analyzeRules(db, session, difficulty);
112
+ if (!llmCell) return ruleResult;
113
+
114
+ const cell = llmCell;
115
+ const m = {
116
+ first_take: mergeIndicator(cell.first_take, ruleResult.subScores.first_take, ruleResult.subLevels.first_take),
117
+ code_style: mergeIndicator(cell.code_style, ruleResult.subScores.code_style, ruleResult.subLevels.code_style),
118
+ completeness: mergeIndicator(cell.completeness, ruleResult.subScores.completeness, ruleResult.subLevels.completeness),
119
+ };
120
+
121
+ const subScores = { first_take: m.first_take.score, code_style: m.code_style.score, completeness: m.completeness.score };
122
+ const subLevels = { first_take: m.first_take.level, code_style: m.code_style.level, completeness: m.completeness.level };
123
+
124
+ const subEvidence = {
125
+ first_take: { ...ruleResult.subEvidence.first_take, what: m.first_take.evidence || ruleResult.subEvidence.first_take.what, level: subLevels.first_take, score: subScores.first_take, source: m.first_take.source },
126
+ code_style: { ...ruleResult.subEvidence.code_style, what: m.code_style.evidence || ruleResult.subEvidence.code_style.what, level: subLevels.code_style, score: subScores.code_style, source: m.code_style.source },
127
+ completeness: { ...ruleResult.subEvidence.completeness, what: m.completeness.evidence || ruleResult.subEvidence.completeness.what, level: subLevels.completeness, score: subScores.completeness, source: m.completeness.source },
128
+ };
129
+
130
+ const score = rollupDimension('O1', subScores);
131
+ const level = scoreToLevel(score);
132
+ const judgeSource = dimensionSource([m.first_take.source, m.code_style.source, m.completeness.source]);
133
+
134
+ return { subScores, subLevels, subEvidence, raw: { llmCell: cell, ruleRaw: ruleResult.raw }, score, level, judgeSource, llmJudge: null };
135
+ }
136
+
137
+ function buildSubEvidence(expls, ctx, difficulty) {
138
+ const { ft, cs, cp } = expls;
139
+
140
+ // rules
141
+ const { haveText, assistantTurns, correctionsAfter, assistantMsgsWithText, completenessHits } = ctx;
142
+ return {
143
+ first_take: makeEvidence({
144
+ key: 'first_take', label: '一次采纳率',
145
+ what: assistantTurns > 0
146
+ ? `规则版:1 - (AI 回合后用户立即纠错的比例) = 1 - ${correctionsAfter} / ${assistantTurns} 个 AI 回合。`
147
+ : '规则版:无 AI 回合可评估。',
148
+ expl: ft, unit: '%', difficulty,
149
+ }),
150
+ code_style: makeEvidence({
151
+ key: 'code_style', label: '代码规范性',
152
+ what: haveText
153
+ ? '规则版无法评估代码规范,默认给中性 0.7 分。开启 LLM judge 可获得真实评估。'
154
+ : '消息文本缺失,无法评估。',
155
+ expl: cs, unit: '%', difficulty,
156
+ }),
157
+ completeness: makeEvidence({
158
+ key: 'completeness', label: '方案完备性',
159
+ what: assistantMsgsWithText > 0
160
+ ? `规则版:AI 消息中含完备性关键词("边界"、"异常"、"测试"等)的比例:${completenessHits} / ${assistantMsgsWithText}。`
161
+ : '规则版:无 AI 文本,无法评估。',
162
+ expl: cp, unit: '%', difficulty,
163
+ }),
164
+ };
165
+ }
166
+
167
+ module.exports = { analyze, analyzeRules };
@@ -1,104 +1,109 @@
1
- /**
2
- * H1 — Problem Definition.
3
- *
4
- * Captures the human ability to turn a vague need into a precise,
5
- * AI-executable problem. Three sub-indicators:
6
- * • clarity — AI proactive-question count in the first 30%
7
- * • converge — number of user-message rounds to convergence
8
- * • drift — direction-change events
9
- *
10
- * See docs/superpowers/specs/2026-06-13-capability-model-v2.md §4.1.
11
- *
12
- * @author Felix
13
- */
14
-
15
- 'use strict';
16
-
17
- const { queryAll } = require('../../db/queries');
18
- const {
19
- fetchMessages,
20
- userMessages,
21
- matchesAny,
22
- DRIFT_PATTERNS,
23
- } = require('../text-signals');
24
- const { explainIndicator, rollupDimension, scoreToLevel, H1 } = require('../thresholds-v2');
25
- const { makeEvidence } = require('../evidence-builder');
26
- const { mergeIndicator, dimensionSource } = require('./llm-merge');
27
-
28
- function analyze(db, session, difficulty = 2, llmCell = null) {
29
- const messages = fetchMessages(db, session.id);
30
- const users = userMessages(messages);
31
-
32
- // ---- clarity: count of `question` tool calls in first 30% ----------
33
- const toolCalls = queryAll(
34
- db,
35
- `SELECT tool_name, timestamp
36
- FROM unified_tool_call
37
- WHERE session_id = ?
38
- ORDER BY timestamp ASC`,
39
- [session.id]
40
- );
41
- let clarity = 0;
42
- let clarityCutoff = 0;
43
- if (toolCalls.length > 0) {
44
- clarityCutoff = Math.max(1, Math.floor(toolCalls.length * 0.3));
45
- clarity = toolCalls.slice(0, clarityCutoff).filter((t) => t.tool_name === 'question').length;
46
- }
47
-
48
- // ---- converge: number of user messages -----------------------------
49
- const converge = users.length;
50
-
51
- // ---- drift: keyword spotting on user text -------------------------
52
- const haveText = users.some((m) => m.text && m.text.length > 0);
53
-
54
- let drift = null;
55
- if (haveText) {
56
- drift = users.filter((m) => matchesAny(m.text, DRIFT_PATTERNS)).length;
57
- }
58
-
59
- // ---- eval each ----------------------------------------------------
60
- const clarityE = explainIndicator(H1.clarity, clarity, difficulty);
61
- const convergeE = explainIndicator(H1.converge, converge, difficulty);
62
- const driftE = explainIndicator(H1.drift, drift, difficulty);
63
-
64
- // Rule baseline per indicator (level + centred score).
65
- const rule = {
66
- clarity: { score: clarityE.score, level: clarityE.level },
67
- converge: { score: convergeE.score, level: convergeE.level },
68
- drift: { score: driftE.score, level: driftE.level },
69
- };
70
-
71
- // Merge LLM cell (if any) over the rule baseline.
72
- const cell = llmCell || {};
73
- const m = {
74
- clarity: mergeIndicator(cell.clarity, rule.clarity.score, rule.clarity.level),
75
- converge: mergeIndicator(cell.converge, rule.converge.score, rule.converge.level),
76
- drift: mergeIndicator(cell.drift, rule.drift.score, rule.drift.level),
77
- };
78
-
79
- const subScores = { clarity: m.clarity.score, converge: m.converge.score, drift: m.drift.score };
80
- const subLevels = { clarity: m.clarity.level, converge: m.converge.level, drift: m.drift.level };
81
-
82
- // Evidence: prefer the LLM's cited rationale when that indicator used the LLM.
83
- const subEvidence = {
84
- clarity: makeEvidence({ key: 'clarity', label: '初始指令清晰度', what: m.clarity.evidence || `规则版:前 30% 工具调用中 question 次数 ${clarity}(共 ${toolCalls.length} 次调用)。`, expl: clarityE, unit: '次', difficulty }),
85
- converge: makeEvidence({ key: 'converge', label: '任务收敛轮次', what: m.converge.evidence || `规则版:用户消息 ${converge} 条。`, expl: convergeE, unit: '轮', difficulty }),
86
- drift: makeEvidence({ key: 'drift', label: '方向变更次数', what: m.drift.evidence || (haveText ? `规则版:方向变更关键词命中 ${drift} 条。` : '用户消息无文本,无法识别。'), expl: driftE, unit: '次', difficulty }),
87
- };
88
-
89
- const score = rollupDimension('H1', subScores);
90
- const level = scoreToLevel(score);
91
- const judgeSource = dimensionSource([m.clarity.source, m.converge.source, m.drift.source]);
92
-
93
- return {
94
- subScores,
95
- subLevels,
96
- subEvidence,
97
- raw: { clarity, converge, drift, difficulty, haveText, toolCallCount: toolCalls.length, clarityCutoff },
98
- score,
99
- level,
100
- judgeSource,
101
- };
102
- }
103
-
104
- module.exports = { analyze };
1
+ /**
2
+ * H1 — Problem Definition.
3
+ *
4
+ * Captures the human ability to turn a vague need into a precise,
5
+ * AI-executable problem. Three sub-indicators:
6
+ * • clarity — AI proactive-question count in the first 30%
7
+ * • converge — number of user-message rounds to convergence
8
+ * • drift — direction-change events
9
+ *
10
+ * See docs/superpowers/specs/2026-06-13-capability-model-v2.md §4.1.
11
+ *
12
+ * @author Felix
13
+ */
14
+
15
+ 'use strict';
16
+
17
+ const { queryAll } = require('../../db/queries');
18
+ const {
19
+ fetchMessages,
20
+ userMessages,
21
+ matchesAny,
22
+ DRIFT_PATTERNS,
23
+ } = require('../text-signals');
24
+ const { explainIndicator, rollupDimension, scoreToLevel, H1 } = require('../thresholds-v2');
25
+ const { makeEvidence } = require('../evidence-builder');
26
+ const { mergeIndicator, dimensionSource } = require('./llm-merge');
27
+
28
+ function analyze(db, session, difficulty = 2, llmCell = null) {
29
+ const messages = fetchMessages(db, session.id);
30
+ const users = userMessages(messages);
31
+
32
+ // ---- clarity: count of `question` tool calls in first 30% ----------
33
+ const toolCalls = queryAll(
34
+ db,
35
+ `SELECT tool_name, timestamp
36
+ FROM unified_tool_call
37
+ WHERE session_id = ?
38
+ ORDER BY timestamp ASC`,
39
+ [session.id]
40
+ );
41
+ let clarity = 0;
42
+ let clarityCutoff = 0;
43
+ if (toolCalls.length > 0) {
44
+ clarityCutoff = Math.max(1, Math.floor(toolCalls.length * 0.3));
45
+ clarity = toolCalls.slice(0, clarityCutoff).filter((t) => t.tool_name === 'question').length;
46
+ }
47
+
48
+ // ---- converge: number of user messages -----------------------------
49
+ const converge = users.length;
50
+
51
+ // ---- drift: keyword spotting on user text -------------------------
52
+ const haveText = users.some((m) => m.text && m.text.length > 0);
53
+
54
+ let drift = null;
55
+ if (haveText) {
56
+ drift = users.filter((m) => matchesAny(m.text, DRIFT_PATTERNS)).length;
57
+ }
58
+
59
+ // ---- eval each ----------------------------------------------------
60
+ const clarityE = explainIndicator(H1.clarity, clarity, difficulty);
61
+ const convergeE = explainIndicator(H1.converge, converge, difficulty);
62
+ const driftE = explainIndicator(H1.drift, drift, difficulty);
63
+
64
+ // Rule baseline per indicator (level + centred score).
65
+ const rule = {
66
+ clarity: { score: clarityE.score, level: clarityE.level },
67
+ converge: { score: convergeE.score, level: convergeE.level },
68
+ drift: { score: driftE.score, level: driftE.level },
69
+ };
70
+
71
+ // Merge LLM cell (if any) over the rule baseline.
72
+ const cell = llmCell || {};
73
+ const m = {
74
+ clarity: mergeIndicator(cell.clarity, rule.clarity.score, rule.clarity.level),
75
+ converge: mergeIndicator(cell.converge, rule.converge.score, rule.converge.level),
76
+ drift: mergeIndicator(cell.drift, rule.drift.score, rule.drift.level),
77
+ };
78
+
79
+ const subScores = { clarity: m.clarity.score, converge: m.converge.score, drift: m.drift.score };
80
+ const subLevels = { clarity: m.clarity.level, converge: m.converge.level, drift: m.drift.level };
81
+
82
+ // Evidence: prefer the LLM's cited rationale when that indicator used the LLM.
83
+ const subEvidence = {
84
+ clarity: makeEvidence({ key: 'clarity', label: '初始指令清晰度', what: m.clarity.evidence || `规则版:前 30% 工具调用中 question 次数 ${clarity}(共 ${toolCalls.length} 次调用)。`, expl: clarityE, unit: '次', difficulty }),
85
+ converge: makeEvidence({ key: 'converge', label: '任务收敛轮次', what: m.converge.evidence || `规则版:用户消息 ${converge} 条。`, expl: convergeE, unit: '轮', difficulty }),
86
+ drift: makeEvidence({ key: 'drift', label: '方向变更次数', what: m.drift.evidence || (haveText ? `规则版:方向变更关键词命中 ${drift} 条。` : '用户消息无文本,无法识别。'), expl: driftE, unit: '次', difficulty }),
87
+ };
88
+
89
+ // Tag each sub-indicator with its source (llm / rules) for the UI badge.
90
+ subEvidence.clarity.source = m.clarity.source;
91
+ subEvidence.converge.source = m.converge.source;
92
+ subEvidence.drift.source = m.drift.source;
93
+
94
+ const score = rollupDimension('H1', subScores);
95
+ const level = scoreToLevel(score);
96
+ const judgeSource = dimensionSource([m.clarity.source, m.converge.source, m.drift.source]);
97
+
98
+ return {
99
+ subScores,
100
+ subLevels,
101
+ subEvidence,
102
+ raw: { clarity, converge, drift, difficulty, haveText, toolCallCount: toolCalls.length, clarityCutoff },
103
+ score,
104
+ level,
105
+ judgeSource,
106
+ };
107
+ }
108
+
109
+ module.exports = { analyze };
@@ -27,17 +27,25 @@ const { aggregateDailySummary } = require('./daily-aggregator');
27
27
  /**
28
28
  * Build a list of YYYY-MM-DD strings starting from today going back
29
29
  * `days` days, ordered most-recent first. Today is included so the
30
- * current day's sessions get scored too; sessions that keep growing
31
- * after being scored are re-picked by getUnanalyzedSessions (ended_at
32
- * newer than analyzed_at).
30
+ * Today is EXCLUDED — sessions on the current calendar day are still
31
+ * actively being held (judge / advice analysing them would race against
32
+ * the user typing more messages, churn the cache, and waste LLM calls).
33
+ * Yesterday + N earlier days only. Callers who really want to (re)
34
+ * analyze today must pass `dates: ['YYYY-MM-DD']` explicitly to
35
+ * runAnalysisJob, or use the per-session reanalyze endpoint
36
+ * (`POST /api/analysis/session/:id`) which bypasses this path.
33
37
  *
34
- * @param {number} days
38
+ * Default `days = 7` therefore produces 7 dates (yesterday through 7
39
+ * days ago), not 8.
40
+ *
41
+ * @param {number} days number of past days to include (yesterday-anchored)
35
42
  * @returns {string[]}
36
43
  */
37
44
  function buildDateList(days) {
38
45
  const dates = [];
39
46
  const now = new Date();
40
- for (let i = 0; i <= days; i++) {
47
+ // Start at i=1 → yesterday; end at i=days inclusive → `days` total dates.
48
+ for (let i = 1; i <= days; i++) {
41
49
  const d = new Date(now);
42
50
  d.setDate(d.getDate() - i);
43
51
  dates.push(formatDate(d));
@@ -63,6 +71,11 @@ function formatDate(d) {
63
71
  * We skip this session in the job so we don't analyze a still-growing
64
72
  * conversation; the next job pass will pick it up once it settles.
65
73
  *
74
+ * Note: the default invocation of runAnalysisJob no longer includes
75
+ * today at all (see buildDateList). This function only fires when a
76
+ * caller explicitly passes `dates` that includes today — in which case
77
+ * we still shield the actively-typed-in session.
78
+ *
66
79
  * @param {object} db
67
80
  * @param {string} date YYYY-MM-DD
68
81
  * @param {object[]} sessions candidate session rows for that date
@@ -156,7 +169,24 @@ async function analyzeAndStoreSession(db, session, opts = {}) {
156
169
 
157
170
  /**
158
171
  * Run analysis job: analyze unanalyzed sessions in reverse chronological order.
159
- * Default: last 7 days. Processes one date at a time, most recent first.
172
+ *
173
+ * # Today policy
174
+ *
175
+ * Default invocation ({days}) skips TODAY entirely — sessions on the
176
+ * current calendar day are likely still being held, and analysing
177
+ * them now means churning the LLM cache for results that will be stale
178
+ * within minutes. The boot path (bin/aboss.js) uses this default, so
179
+ * "open aboss → background scan" never touches today.
180
+ *
181
+ * Two escape hatches keep today analysable when the user really asks:
182
+ * - `dates: ['YYYY-MM-DD']` explicit list → not filtered. Used by
183
+ * manual triggers that pass a specific date set.
184
+ * - `POST /api/analysis/session/:id` → goes straight through
185
+ * `analyzeAndStoreSession`, doesn't use this job loop, so today's
186
+ * "Re-analyze" button in the UI keeps working.
187
+ *
188
+ * Default: last 7 days (yesterday → 7 days ago). Processes one date at
189
+ * a time, most recent first.
160
190
  *
161
191
  * @param {object} db - sql.js boss.db instance
162
192
  * @param {object} options - {
@@ -164,6 +194,7 @@ async function analyzeAndStoreSession(db, session, opts = {}) {
164
194
  * onProgress: fn,
165
195
  * forceReanalyze: false,
166
196
  * dates: string[] // optional explicit YYYY-MM-DD list; overrides `days`
197
+ * // AND bypasses the "skip today" rule
167
198
  * }
168
199
  * @returns {Promise<{analyzed: number, errors: number, skipped: number}>}
169
200
  */