agentboss 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/bin/aboss.js +288 -0
- package/client/dist/assets/index-C1wFD_Vo.css +1 -0
- package/client/dist/assets/index-DBj1Ujlx.js +137 -0
- package/client/dist/index.html +34 -0
- package/package.json +64 -0
- package/server/analysis/daily-aggregator.js +258 -0
- package/server/analysis/difficulty.js +129 -0
- package/server/analysis/dimensions/ai-knowledge.js +172 -0
- package/server/analysis/dimensions/ai-tools.js +161 -0
- package/server/analysis/dimensions/judgement.js +107 -0
- package/server/analysis/dimensions/llm-merge.js +57 -0
- package/server/analysis/dimensions/output-quality.js +167 -0
- package/server/analysis/dimensions/problem-definition.js +104 -0
- package/server/analysis/dimensions/system-thinking.js +225 -0
- package/server/analysis/evidence-builder.js +104 -0
- package/server/analysis/job.js +273 -0
- package/server/analysis/report-builder.js +581 -0
- package/server/analysis/scoring-v2.js +72 -0
- package/server/analysis/text-signals.js +179 -0
- package/server/analysis/thresholds-v2.js +358 -0
- package/server/api/advice.js +124 -0
- package/server/api/analysis.js +141 -0
- package/server/api/execution.js +330 -0
- package/server/api/metrics.js +277 -0
- package/server/api/overview.js +308 -0
- package/server/api/project.js +255 -0
- package/server/api/reports.js +125 -0
- package/server/api/sessions.js +118 -0
- package/server/api/settings.js +119 -0
- package/server/db/connection.js +175 -0
- package/server/db/queries.js +1051 -0
- package/server/db/schema.js +487 -0
- package/server/etl/active-time.js +150 -0
- package/server/etl/backfill-subagents.js +178 -0
- package/server/etl/claude-code.js +826 -0
- package/server/etl/detect.js +341 -0
- package/server/etl/judge-filter.js +117 -0
- package/server/etl/opencode.js +606 -0
- package/server/execution/job.js +662 -0
- package/server/execution/prompt.js +227 -0
- package/server/execution/runner.js +218 -0
- package/server/index.js +94 -0
- package/server/llm/advice-prompt.js +339 -0
- package/server/llm/advice.js +384 -0
- package/server/llm/analysis-prompt.js +162 -0
- package/server/llm/cli-runner.js +249 -0
- package/server/llm/judge-prompts.js +179 -0
- package/server/llm/judge.js +118 -0
- package/server/llm/project-advice-prompt.js +332 -0
- package/server/llm/project-advice.js +491 -0
- package/server/llm/session-analyzer.js +122 -0
- package/server/utils/project.js +80 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text-signal extraction utilities.
|
|
3
|
+
*
|
|
4
|
+
* Shared by H1/H2/H3 (and the rule fallbacks in E1/O1). We deliberately
|
|
5
|
+
* keep this in one file so the keyword vocabulary is reviewed in one
|
|
6
|
+
* place — the spec doc is the source of truth, this file just encodes it.
|
|
7
|
+
*
|
|
8
|
+
* @author Felix
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
'use strict';
|
|
12
|
+
|
|
13
|
+
const { queryAll } = require('../db/queries');
|
|
14
|
+
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Keyword vocabularies
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
/** User saying "change direction" / "redo" / "start over". */
|
|
20
|
+
const DRIFT_PATTERNS = [
|
|
21
|
+
/换(一种|一个|个)/, /算了/, /改成/, /重新来/, /不(对|是)[,,]?\s*(应该|是)/, /推翻/, /换思路/,
|
|
22
|
+
/forget (it|that)/i, /let'?s restart/i, /never mind/i, /actually,? (let'?s|use|do)/i,
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
/** User explicitly reframing or planning. */
|
|
26
|
+
const REFRAME_PATTERNS = [
|
|
27
|
+
/^我先/, /先理清/, /先?分?(两|三|四|多)步/, /我希望(先|你)/, /先?规划/, /先?设计/, /先?列(出)?/,
|
|
28
|
+
/step (one|1)/i, /^let'?s first/i, /first,? let'?s/i, /^plan:/i, /^step 1/i,
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
/** User pushing back / questioning AI output. */
|
|
32
|
+
const CHALLENGE_PATTERNS = [
|
|
33
|
+
/为什么/, /不对/, /再确认/, /你刚说/, /真的吗/, /再想想/, /有(没有)?更好/, /有问题/, /(我)?不(同意|认可)/,
|
|
34
|
+
/为何/, /哪里/,
|
|
35
|
+
/why\b/i, /that'?s wrong/i, /are you sure/i, /reconsider/i, /(i )?disagree/i,
|
|
36
|
+
/that doesn'?t/i, /that won'?t/i,
|
|
37
|
+
];
|
|
38
|
+
|
|
39
|
+
/** User overriding AI's choice. */
|
|
40
|
+
const OVERRIDE_PATTERNS = [
|
|
41
|
+
/用别的/, /换/, /(应)?该用/, /不要(用)?/, /(必须|得)用/, /改用/, /(放|去)弃/,
|
|
42
|
+
/use\s+\w+ instead/i, /don'?t use/i, /switch to/i,
|
|
43
|
+
];
|
|
44
|
+
|
|
45
|
+
/** User saying "thanks / great / continue" without correction. */
|
|
46
|
+
const ACCEPT_PATTERNS = [
|
|
47
|
+
/^好的?[!.。!]*$/, /^可以[。.]?$/, /^继续[。.]?$/, /^没问题[。.]?$/, /^谢谢?[!.。!]*$/, /^完美/, /^很好/, /^赞/,
|
|
48
|
+
/^ok\b/i, /^thanks?\b/i, /^great\b/i, /^perfect\b/i, /^lgtm\b/i, /^continue\b/i,
|
|
49
|
+
];
|
|
50
|
+
|
|
51
|
+
/** User pointing out a deprecated/missing/wrong API. */
|
|
52
|
+
const STALENESS_PATTERNS = [
|
|
53
|
+
/已?废弃/, /已?(被)?移除/, /已不支持/, /不存在(这个|该)/, /用错了/, /(应)?该是/,
|
|
54
|
+
/deprecated/i, /removed/i, /no longer/i, /that (api|method|function) doesn'?t exist/i,
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
/** Abstraction / system-design vocabulary. */
|
|
58
|
+
const ABSTRACTION_TERMS = [
|
|
59
|
+
'架构', '模块', '接口', '抽象', '复用', '约束', '规范', '协议', '范式', '策略', '契约',
|
|
60
|
+
'边界', '解耦', '依赖倒置', '泛化', '模板',
|
|
61
|
+
'architecture', 'module', 'interface', 'abstraction', 'protocol', 'contract',
|
|
62
|
+
'boundary', 'decouple', 'generalise', 'template',
|
|
63
|
+
];
|
|
64
|
+
|
|
65
|
+
/** Active-refactoring vocabulary (H3). */
|
|
66
|
+
const REFACTOR_PATTERNS = [
|
|
67
|
+
/重构/, /统一/, /沉淀/, /抽象成/, /模板化/, /标准化/, /整合/, /合并/,
|
|
68
|
+
/refactor/i, /consolidate/i, /unify/i, /standard[ie]se/i, /extract\s+to/i,
|
|
69
|
+
];
|
|
70
|
+
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// Matchers
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
function matchesAny(text, patterns) {
|
|
76
|
+
if (!text) return false;
|
|
77
|
+
for (const p of patterns) if (p.test(text)) return true;
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function countMatches(text, patterns) {
|
|
82
|
+
if (!text) return 0;
|
|
83
|
+
let n = 0;
|
|
84
|
+
for (const p of patterns) {
|
|
85
|
+
const m = text.match(new RegExp(p.source, p.flags.includes('g') ? p.flags : p.flags + 'g'));
|
|
86
|
+
if (m) n += m.length;
|
|
87
|
+
}
|
|
88
|
+
return n;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function termOccurrences(text, terms) {
|
|
92
|
+
if (!text) return 0;
|
|
93
|
+
const lower = text.toLowerCase();
|
|
94
|
+
let n = 0;
|
|
95
|
+
for (const t of terms) {
|
|
96
|
+
// very lightweight token count
|
|
97
|
+
const re = new RegExp(t.toLowerCase().replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g');
|
|
98
|
+
const m = lower.match(re);
|
|
99
|
+
if (m) n += m.length;
|
|
100
|
+
}
|
|
101
|
+
return n;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
// Per-session text fetcher
|
|
106
|
+
//
|
|
107
|
+
// Returns the user/assistant message stream for a session as a chronological
|
|
108
|
+
// list of { role, text, ts }. We currently only have `content_length` and
|
|
109
|
+
// `is_error` on unified_message — the actual text is in unified_part rows
|
|
110
|
+
// with type='text'. To keep this drop-in we read part.data when available
|
|
111
|
+
// and fall back to title/length-only stats.
|
|
112
|
+
// ---------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Fetch every text fragment for a session, joined per parent message,
|
|
116
|
+
* tagged with role + timestamp.
|
|
117
|
+
*
|
|
118
|
+
* @param {object} db
|
|
119
|
+
* @param {string} sessionId
|
|
120
|
+
* @returns {Array<{ id: string, role: string, text: string, ts: string }>}
|
|
121
|
+
*/
|
|
122
|
+
function fetchMessages(db, sessionId) {
|
|
123
|
+
// The v2 ETL writes the text payload into unified_message.text (capped
|
|
124
|
+
// at ~4 KB per row). Legacy rows from before the migration still have
|
|
125
|
+
// NULL there — callers should treat empty text as "signal unknown" and
|
|
126
|
+
// either skip the indicator or fall back to length-only signals.
|
|
127
|
+
|
|
128
|
+
const rows = queryAll(
|
|
129
|
+
db,
|
|
130
|
+
`SELECT id, role, timestamp, content_length, is_error, text
|
|
131
|
+
FROM unified_message
|
|
132
|
+
WHERE session_id = ?
|
|
133
|
+
ORDER BY timestamp ASC`,
|
|
134
|
+
[sessionId]
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
return rows.map((r) => ({
|
|
138
|
+
id: r.id,
|
|
139
|
+
role: r.role,
|
|
140
|
+
text: r.text || '',
|
|
141
|
+
contentLength: r.content_length || 0,
|
|
142
|
+
isError: !!r.is_error,
|
|
143
|
+
ts: r.timestamp,
|
|
144
|
+
}));
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Convenience: list user messages only.
|
|
149
|
+
*/
|
|
150
|
+
function userMessages(messages) {
|
|
151
|
+
return messages.filter((m) => m.role === 'user');
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Convenience: list assistant messages only.
|
|
156
|
+
*/
|
|
157
|
+
function assistantMessages(messages) {
|
|
158
|
+
return messages.filter((m) => m.role === 'assistant');
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
module.exports = {
|
|
162
|
+
// patterns
|
|
163
|
+
DRIFT_PATTERNS,
|
|
164
|
+
REFRAME_PATTERNS,
|
|
165
|
+
CHALLENGE_PATTERNS,
|
|
166
|
+
OVERRIDE_PATTERNS,
|
|
167
|
+
ACCEPT_PATTERNS,
|
|
168
|
+
STALENESS_PATTERNS,
|
|
169
|
+
ABSTRACTION_TERMS,
|
|
170
|
+
REFACTOR_PATTERNS,
|
|
171
|
+
// matchers
|
|
172
|
+
matchesAny,
|
|
173
|
+
countMatches,
|
|
174
|
+
termOccurrences,
|
|
175
|
+
// data fetchers
|
|
176
|
+
fetchMessages,
|
|
177
|
+
userMessages,
|
|
178
|
+
assistantMessages,
|
|
179
|
+
};
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v2 capability thresholds — single source of truth.
|
|
3
|
+
*
|
|
4
|
+
* Each sub-indicator declares:
|
|
5
|
+
* • how its raw value should be interpreted (higher / lower / band-better)
|
|
6
|
+
* • the L1-L4 boundaries, possibly per difficulty bucket
|
|
7
|
+
*
|
|
8
|
+
* The shape is deliberately flat data so it can be tweaked without code
|
|
9
|
+
* changes and (eventually) edited from the Settings UI.
|
|
10
|
+
*
|
|
11
|
+
* Source of truth: docs/superpowers/specs/2026-06-13-capability-model-v2.md §4
|
|
12
|
+
*
|
|
13
|
+
* @author Felix
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
'use strict';
|
|
17
|
+
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Helpers
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
/** Map L1..L4 to a centred numeric score the rollup can average. */
|
|
23
|
+
const LEVEL_SCORE = { 1: 25, 2: 55, 3: 80, 4: 95 };
|
|
24
|
+
|
|
25
|
+
/** Map a numeric score back to a level (matches §5 rules). */
|
|
26
|
+
function scoreToLevel(score) {
|
|
27
|
+
if (score == null || Number.isNaN(score)) return null;
|
|
28
|
+
if (score >= 85) return 4;
|
|
29
|
+
if (score >= 65) return 3;
|
|
30
|
+
if (score >= 40) return 2;
|
|
31
|
+
return 1;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Threshold tables
|
|
36
|
+
// Each row reads "if value satisfies the cell for level L at difficulty D,
|
|
37
|
+
// return L". Difficulty buckets: 1 trivial · 2 routine · 3 complex · 4 heavy.
|
|
38
|
+
//
|
|
39
|
+
// direction: 'lower' = lower is better
|
|
40
|
+
// 'higher' = higher is better
|
|
41
|
+
// 'band' = ideal range, anything else penalised
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
/* ----- H1 — Problem Definition ------------------------------------- */
|
|
45
|
+
|
|
46
|
+
const H1 = {
|
|
47
|
+
/** AI proactive-question count (HEAD 30% of the session). */
|
|
48
|
+
clarity: {
|
|
49
|
+
direction: 'lower',
|
|
50
|
+
// bucket = difficulty. cell = inclusive upper bound for that level.
|
|
51
|
+
table: {
|
|
52
|
+
1: { L4: 0, L3: 1, L2: 3 },
|
|
53
|
+
2: { L4: 1, L3: 2, L2: 5 },
|
|
54
|
+
3: { L4: 2, L3: 4, L2: 8 },
|
|
55
|
+
4: { L4: 3, L3: 6, L2: 12 },
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
/** User-message rounds needed to converge. */
|
|
59
|
+
converge: {
|
|
60
|
+
direction: 'lower',
|
|
61
|
+
table: {
|
|
62
|
+
1: { L4: 3, L3: 5, L2: 8 },
|
|
63
|
+
2: { L4: 5, L3: 8, L2: 15 },
|
|
64
|
+
3: { L4: 8, L3: 15, L2: 25 },
|
|
65
|
+
4: { L4: 15, L3: 30, L2: 50 },
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
/** Direction-change events. */
|
|
69
|
+
drift: {
|
|
70
|
+
direction: 'lower',
|
|
71
|
+
table: {
|
|
72
|
+
1: { L4: 0, L3: 1, L2: 2 },
|
|
73
|
+
2: { L4: 0, L3: 1, L2: 2 },
|
|
74
|
+
3: { L4: 1, L3: 2, L2: 4 },
|
|
75
|
+
4: { L4: 1, L3: 3, L2: 6 },
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
/* ----- H2 — Judgement ---------------------------------------------- */
|
|
81
|
+
|
|
82
|
+
const H2 = {
|
|
83
|
+
challenge: {
|
|
84
|
+
direction: 'higher',
|
|
85
|
+
table: {
|
|
86
|
+
1: { L4: 0.20, L3: 0.10, L2: 0.03 },
|
|
87
|
+
2: { L4: 0.30, L3: 0.18, L2: 0.08 },
|
|
88
|
+
3: { L4: 0.35, L3: 0.25, L2: 0.15 },
|
|
89
|
+
4: { L4: 0.40, L3: 0.30, L2: 0.20 },
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
/**
|
|
93
|
+
* Override rate — band metric: too low = rubber-stamping, too high =
|
|
94
|
+
* thrashing. Encoded as { ideal: [lo, hi], tolerance: [[L3 lo, L3 hi], …] }
|
|
95
|
+
*/
|
|
96
|
+
override: {
|
|
97
|
+
direction: 'band',
|
|
98
|
+
bands: {
|
|
99
|
+
1: { L4: [0, 0.10], L3: [0, 0.20], L2: [0, 0.35] },
|
|
100
|
+
2: { L4: [0.05, 0.15],L3: [0, 0.25], L2: [0, 0.40] },
|
|
101
|
+
3: { L4: [0.10, 0.20],L3: [0, 0.30], L2: [0, 0.45] },
|
|
102
|
+
4: { L4: [0.10, 0.25],L3: [0, 0.35], L2: [0, 0.50] },
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
/** Compliant-without-comment rate — ideal band 60-85% (difficulty-agnostic). */
|
|
106
|
+
accept_rate: {
|
|
107
|
+
direction: 'band',
|
|
108
|
+
bands: {
|
|
109
|
+
1: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
110
|
+
2: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
111
|
+
3: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
112
|
+
4: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
/* ----- H3 — System Thinking (rolling) ------------------------------ */
|
|
118
|
+
|
|
119
|
+
const H3 = {
|
|
120
|
+
consistency: {
|
|
121
|
+
direction: 'higher',
|
|
122
|
+
table: { all: { L4: 0.80, L3: 0.60, L2: 0.40 } },
|
|
123
|
+
},
|
|
124
|
+
dedup: {
|
|
125
|
+
direction: 'lower',
|
|
126
|
+
table: { all: { L4: 0.05, L3: 0.15, L2: 0.30 } },
|
|
127
|
+
},
|
|
128
|
+
refactor: { // per 100 sessions
|
|
129
|
+
direction: 'higher',
|
|
130
|
+
table: { all: { L4: 6, L3: 3, L2: 1 } },
|
|
131
|
+
},
|
|
132
|
+
abstraction: {
|
|
133
|
+
direction: 'higher',
|
|
134
|
+
table: { all: { L4: 0.20, L3: 0.10, L2: 0.05 } },
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
/* ----- E1 — Knowledge Coverage ------------------------------------- */
|
|
139
|
+
|
|
140
|
+
const E1 = {
|
|
141
|
+
domain_errors: {
|
|
142
|
+
direction: 'lower',
|
|
143
|
+
table: { all: { L4: 0.03, L3: 0.08, L2: 0.15 } },
|
|
144
|
+
},
|
|
145
|
+
staleness: {
|
|
146
|
+
direction: 'lower',
|
|
147
|
+
table: { all: { L4: 0, L3: 1, L2: 3 } },
|
|
148
|
+
},
|
|
149
|
+
best_practice: {
|
|
150
|
+
direction: 'higher',
|
|
151
|
+
table: { all: { L4: 0.85, L3: 0.65, L2: 0.45 } },
|
|
152
|
+
},
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
/* ----- E2 — Tool Coverage ------------------------------------------ */
|
|
156
|
+
|
|
157
|
+
const E2 = {
|
|
158
|
+
tool_pick: {
|
|
159
|
+
direction: 'higher',
|
|
160
|
+
table: {
|
|
161
|
+
1: { L4: 0.95, L3: 0.85, L2: 0.70 },
|
|
162
|
+
2: { L4: 0.90, L3: 0.80, L2: 0.65 },
|
|
163
|
+
3: { L4: 0.85, L3: 0.75, L2: 0.60 },
|
|
164
|
+
4: { L4: 0.80, L3: 0.70, L2: 0.55 },
|
|
165
|
+
},
|
|
166
|
+
},
|
|
167
|
+
/** calls-per-intent ratio vs baseline 1.0 (LOWER better) */
|
|
168
|
+
chain_eff: {
|
|
169
|
+
direction: 'lower',
|
|
170
|
+
table: { all: { L4: 1.1, L3: 1.4, L2: 1.8 } },
|
|
171
|
+
},
|
|
172
|
+
self_heal: {
|
|
173
|
+
direction: 'higher',
|
|
174
|
+
table: { all: { L4: 0.85, L3: 0.65, L2: 0.40 } },
|
|
175
|
+
},
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
/* ----- O1 — Output Quality ----------------------------------------- */
|
|
179
|
+
|
|
180
|
+
const O1 = {
|
|
181
|
+
first_take: {
|
|
182
|
+
direction: 'higher',
|
|
183
|
+
table: {
|
|
184
|
+
1: { L4: 0.80, L3: 0.60, L2: 0.40 },
|
|
185
|
+
2: { L4: 0.70, L3: 0.55, L2: 0.40 },
|
|
186
|
+
3: { L4: 0.60, L3: 0.45, L2: 0.30 },
|
|
187
|
+
4: { L4: 0.50, L3: 0.35, L2: 0.25 },
|
|
188
|
+
},
|
|
189
|
+
},
|
|
190
|
+
code_style: {
|
|
191
|
+
direction: 'higher',
|
|
192
|
+
table: { all: { L4: 0.85, L3: 0.65, L2: 0.45 } },
|
|
193
|
+
},
|
|
194
|
+
completeness: {
|
|
195
|
+
direction: 'higher',
|
|
196
|
+
table: { all: { L4: 0.80, L3: 0.60, L2: 0.40 } },
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
const ALL = { H1, H2, H3, E1, E2, O1 };
|
|
201
|
+
|
|
202
|
+
// ---------------------------------------------------------------------------
|
|
203
|
+
// Roll-up weights — see spec §5
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
|
|
206
|
+
const WEIGHTS = {
|
|
207
|
+
H1: { clarity: 0.45, converge: 0.35, drift: 0.20 },
|
|
208
|
+
H2: { challenge: 0.40, override: 0.35, accept_rate: 0.25 },
|
|
209
|
+
H3: { consistency: 0.30, dedup: 0.25, refactor: 0.20, abstraction: 0.25 },
|
|
210
|
+
E1: { domain_errors: 0.40, staleness: 0.25, best_practice: 0.35 },
|
|
211
|
+
E2: { tool_pick: 0.40, chain_eff: 0.30, self_heal: 0.30 },
|
|
212
|
+
O1: { first_take: 0.45, code_style: 0.25, completeness: 0.30 },
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
// ---------------------------------------------------------------------------
|
|
216
|
+
// Level evaluation
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Resolve a numeric value to L1..L4 against an indicator spec.
|
|
221
|
+
*
|
|
222
|
+
* @param {Object} indicator one of H1.clarity, H2.override, etc.
|
|
223
|
+
* @param {number|null} value the raw measurement
|
|
224
|
+
* @param {number} difficulty 1-4 (ignored if the spec table is keyed by 'all')
|
|
225
|
+
* @returns {1|2|3|4|null}
|
|
226
|
+
*/
|
|
227
|
+
function evalLevel(indicator, value, difficulty = 2) {
|
|
228
|
+
if (!indicator || value == null || Number.isNaN(value)) return null;
|
|
229
|
+
|
|
230
|
+
const key = pickTableKey(indicator, difficulty);
|
|
231
|
+
|
|
232
|
+
if (indicator.direction === 'band') {
|
|
233
|
+
const bands = indicator.bands[key];
|
|
234
|
+
if (!bands) return null;
|
|
235
|
+
if (inBand(value, bands.L4)) return 4;
|
|
236
|
+
if (inBand(value, bands.L3)) return 3;
|
|
237
|
+
if (inBand(value, bands.L2)) return 2;
|
|
238
|
+
return 1;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const cells = indicator.table[key];
|
|
242
|
+
if (!cells) return null;
|
|
243
|
+
|
|
244
|
+
if (indicator.direction === 'lower') {
|
|
245
|
+
if (value <= cells.L4) return 4;
|
|
246
|
+
if (value <= cells.L3) return 3;
|
|
247
|
+
if (value <= cells.L2) return 2;
|
|
248
|
+
return 1;
|
|
249
|
+
}
|
|
250
|
+
// 'higher'
|
|
251
|
+
if (value >= cells.L4) return 4;
|
|
252
|
+
if (value >= cells.L3) return 3;
|
|
253
|
+
if (value >= cells.L2) return 2;
|
|
254
|
+
return 1;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
function pickTableKey(indicator, difficulty) {
|
|
258
|
+
const t = indicator.table || indicator.bands || {};
|
|
259
|
+
if (t.all) return 'all';
|
|
260
|
+
if (t[difficulty] != null) return difficulty;
|
|
261
|
+
// fall back to the closest available bucket
|
|
262
|
+
const keys = Object.keys(t).map(Number).filter(Number.isFinite).sort((a, b) => a - b);
|
|
263
|
+
if (!keys.length) return null;
|
|
264
|
+
return keys.reduce((acc, k) => (Math.abs(k - difficulty) < Math.abs(acc - difficulty) ? k : acc), keys[0]);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function inBand(value, range) {
|
|
268
|
+
return Array.isArray(range) && value >= range[0] && value <= range[1];
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Convenience: given a raw measurement, return both the level and a
|
|
273
|
+
* centred numeric score useful for averaging into the dimension score.
|
|
274
|
+
*
|
|
275
|
+
* @param {Object} indicator
|
|
276
|
+
* @param {number} value
|
|
277
|
+
* @param {number} difficulty
|
|
278
|
+
* @returns {{ level: number|null, score: number|null }}
|
|
279
|
+
*/
|
|
280
|
+
function evalIndicator(indicator, value, difficulty = 2) {
|
|
281
|
+
const lvl = evalLevel(indicator, value, difficulty);
|
|
282
|
+
return { level: lvl, score: lvl == null ? null : LEVEL_SCORE[lvl] };
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Like evalIndicator but also returns a structured "why" payload the UI
|
|
287
|
+
* can render in a tooltip without re-implementing the threshold tables.
|
|
288
|
+
*
|
|
289
|
+
* { level, score, bounds, direction, table }
|
|
290
|
+
* - bounds.l4, l3, l2 : the inclusive boundaries used for this
|
|
291
|
+
* difficulty bucket (numbers or [lo, hi] for bands)
|
|
292
|
+
* - direction : 'lower' | 'higher' | 'band'
|
|
293
|
+
* - bucketKey : 'all' or 1-4 (which row of the table was used)
|
|
294
|
+
*
|
|
295
|
+
* @param {Object} indicator
|
|
296
|
+
* @param {number|null} value
|
|
297
|
+
* @param {number} difficulty
|
|
298
|
+
*/
|
|
299
|
+
function explainIndicator(indicator, value, difficulty = 2) {
|
|
300
|
+
const base = evalIndicator(indicator, value, difficulty);
|
|
301
|
+
if (!indicator) return { ...base, value, direction: null, bounds: null, bucketKey: null };
|
|
302
|
+
|
|
303
|
+
const bucketKey = pickTableKey(indicator, difficulty);
|
|
304
|
+
let bounds = null;
|
|
305
|
+
if (indicator.direction === 'band' && indicator.bands?.[bucketKey]) {
|
|
306
|
+
const b = indicator.bands[bucketKey];
|
|
307
|
+
bounds = { L4: b.L4 || null, L3: b.L3 || null, L2: b.L2 || null };
|
|
308
|
+
} else if (indicator.table?.[bucketKey]) {
|
|
309
|
+
const t = indicator.table[bucketKey];
|
|
310
|
+
bounds = { L4: t.L4 ?? null, L3: t.L3 ?? null, L2: t.L2 ?? null };
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
return {
|
|
314
|
+
...base,
|
|
315
|
+
value,
|
|
316
|
+
direction: indicator.direction,
|
|
317
|
+
bounds,
|
|
318
|
+
bucketKey,
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Roll up a set of sub-scores into one dimension score using the
|
|
324
|
+
* declared weights. Missing sub-scores are skipped and the remaining
|
|
325
|
+
* weights are re-normalised so a partial measurement still produces a
|
|
326
|
+
* sensible number.
|
|
327
|
+
*
|
|
328
|
+
* @param {string} dimensionKey 'H1' | 'H2' | … | 'O1'
|
|
329
|
+
* @param {Object} subScores { clarity: 80, converge: 55, … }
|
|
330
|
+
* @returns {number|null}
|
|
331
|
+
*/
|
|
332
|
+
function rollupDimension(dimensionKey, subScores) {
|
|
333
|
+
const w = WEIGHTS[dimensionKey];
|
|
334
|
+
if (!w) return null;
|
|
335
|
+
|
|
336
|
+
let total = 0;
|
|
337
|
+
let weightSum = 0;
|
|
338
|
+
for (const key of Object.keys(w)) {
|
|
339
|
+
const v = subScores ? subScores[key] : null;
|
|
340
|
+
if (v == null || Number.isNaN(v)) continue;
|
|
341
|
+
total += v * w[key];
|
|
342
|
+
weightSum += w[key];
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (weightSum === 0) return null;
|
|
346
|
+
return Math.round(total / weightSum);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
module.exports = {
|
|
350
|
+
H1, H2, H3, E1, E2, O1, ALL,
|
|
351
|
+
WEIGHTS,
|
|
352
|
+
LEVEL_SCORE,
|
|
353
|
+
scoreToLevel,
|
|
354
|
+
evalLevel,
|
|
355
|
+
evalIndicator,
|
|
356
|
+
explainIndicator,
|
|
357
|
+
rollupDimension,
|
|
358
|
+
};
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-session AI advice API.
|
|
3
|
+
*
|
|
4
|
+
* POST /api/advice/session/:id body: { force?: boolean } trigger generation
|
|
5
|
+
* GET /api/advice/session/:id read cached payload
|
|
6
|
+
*
|
|
7
|
+
* Failure reasons from server/llm/advice.js are mapped to HTTP codes:
|
|
8
|
+
* no-session → 404
|
|
9
|
+
* llm-disabled → 409
|
|
10
|
+
* no-cli → 409
|
|
11
|
+
* timeout → 504
|
|
12
|
+
* bad-json → 502
|
|
13
|
+
* spawn-error → 500
|
|
14
|
+
* exit-non-zero → 500
|
|
15
|
+
* internal / any → 500
|
|
16
|
+
*
|
|
17
|
+
* @author Felix
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
'use strict';
|
|
21
|
+
|
|
22
|
+
const router = require('express').Router();
|
|
23
|
+
|
|
24
|
+
const { generateAdvice, loadAdvice } = require('../llm/advice');
|
|
25
|
+
const { getSessionById } = require('../db/queries');
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Helpers
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
const REASON_TO_STATUS = {
|
|
32
|
+
'no-session': 404,
|
|
33
|
+
'llm-disabled': 409,
|
|
34
|
+
'no-cli': 409,
|
|
35
|
+
'timeout': 504,
|
|
36
|
+
'bad-json': 502,
|
|
37
|
+
'spawn-error': 500,
|
|
38
|
+
'exit-non-zero': 500,
|
|
39
|
+
'no-prompt': 500,
|
|
40
|
+
'internal': 500,
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
const REASON_TO_MESSAGE = {
|
|
44
|
+
'no-session': 'Session not found',
|
|
45
|
+
'llm-disabled': 'LLM judging is disabled in settings',
|
|
46
|
+
'no-cli': 'No opencode/claude CLI detected on PATH',
|
|
47
|
+
'timeout': 'LLM call timed out after 90s',
|
|
48
|
+
'bad-json': 'LLM returned non-JSON output',
|
|
49
|
+
'spawn-error': 'Failed to spawn LLM CLI',
|
|
50
|
+
'exit-non-zero': 'LLM CLI exited with a non-zero status',
|
|
51
|
+
'no-prompt': 'Internal: empty prompt assembled',
|
|
52
|
+
'internal': 'Internal advice generation error',
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
function failure(res, reason, extra) {
|
|
56
|
+
const status = REASON_TO_STATUS[reason] || 500;
|
|
57
|
+
const message = (extra && extra.error) || REASON_TO_MESSAGE[reason] || reason;
|
|
58
|
+
return res.status(status).json({
|
|
59
|
+
ok: false,
|
|
60
|
+
error: { code: reason.toUpperCase().replace(/-/g, '_'), message },
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
// Routes
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
module.exports = function (db) {
|
|
69
|
+
// POST /api/advice/session/:id body { force?: boolean }
|
|
70
|
+
router.post('/session/:id', async (req, res) => {
|
|
71
|
+
const sessionId = req.params.id;
|
|
72
|
+
const force = req.body?.force === true || req.body?.force === '1';
|
|
73
|
+
|
|
74
|
+
// Cheap pre-check so a bad URL fails fast without spinning up the LLM
|
|
75
|
+
// path or scanning cache. The advice layer also catches no-session,
|
|
76
|
+
// but this gives clients a 404 in the obvious case.
|
|
77
|
+
if (!getSessionById(db, sessionId)) {
|
|
78
|
+
return failure(res, 'no-session');
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
const result = await generateAdvice(db, sessionId, { force });
|
|
83
|
+
if (!result.ok) return failure(res, result.reason, result);
|
|
84
|
+
|
|
85
|
+
return res.json({
|
|
86
|
+
ok: true,
|
|
87
|
+
data: {
|
|
88
|
+
advice: result.data,
|
|
89
|
+
fromCache: !!result.fromCache,
|
|
90
|
+
generatedAt: result.data.cachedAt,
|
|
91
|
+
cli: result.data.cli,
|
|
92
|
+
},
|
|
93
|
+
meta: { generated_at: new Date().toISOString() },
|
|
94
|
+
});
|
|
95
|
+
} catch (err) {
|
|
96
|
+
// Should not happen — generateAdvice catches its own errors — but
|
|
97
|
+
// belt-and-braces so the API never throws an uncaught.
|
|
98
|
+
return failure(res, 'internal', { error: err && err.message });
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
// GET /api/advice/session/:id read-only cache fetch (never spawns CLI)
|
|
103
|
+
router.get('/session/:id', (req, res) => {
|
|
104
|
+
const sessionId = req.params.id;
|
|
105
|
+
if (!getSessionById(db, sessionId)) {
|
|
106
|
+
return failure(res, 'no-session');
|
|
107
|
+
}
|
|
108
|
+
const cached = loadAdvice(db, sessionId);
|
|
109
|
+
return res.json({
|
|
110
|
+
ok: true,
|
|
111
|
+
data: cached
|
|
112
|
+
? {
|
|
113
|
+
advice: cached,
|
|
114
|
+
fromCache: true,
|
|
115
|
+
generatedAt: cached.cachedAt || null,
|
|
116
|
+
cli: cached.cli || null,
|
|
117
|
+
}
|
|
118
|
+
: { advice: null, fromCache: false, generatedAt: null, cli: null },
|
|
119
|
+
meta: { generated_at: new Date().toISOString() },
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
return router;
|
|
124
|
+
};
|