agentboss 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/aboss.js +288 -288
- package/client/dist/assets/index-DxoLOxZ8.js +141 -0
- package/client/dist/index.html +1 -1
- package/package.json +1 -1
- package/server/analysis/dimensions/judgement.js +111 -107
- package/server/analysis/dimensions/llm-merge.js +59 -57
- package/server/analysis/dimensions/output-quality.js +167 -167
- package/server/analysis/dimensions/problem-definition.js +109 -104
- package/server/analysis/job.js +91 -14
- package/server/analysis/report-builder.js +574 -581
- package/server/analysis/scoring-v2.js +126 -72
- package/server/analysis/thresholds-v2.js +364 -358
- package/server/api/execution.js +94 -0
- package/server/db/schema.js +5 -2
- package/server/etl/opencode.js +5 -1
- package/server/execution/job.js +141 -2
- package/server/llm/advice-prompt.js +74 -11
- package/server/llm/advice.js +50 -1
- package/server/llm/analysis-prompt.js +173 -162
- package/server/llm/cli-runner.js +18 -2
- package/server/llm/judge.js +6 -1
- package/server/llm/mcp-classify.js +147 -0
- package/server/llm/project-advice-prompt.js +106 -6
- package/server/llm/project-advice.js +55 -2
- package/server/llm/session-analyzer.js +10 -1
- package/client/dist/assets/index-DBj1Ujlx.js +0 -137
|
@@ -1,358 +1,364 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* v2 capability thresholds — single source of truth.
|
|
3
|
-
*
|
|
4
|
-
* Each sub-indicator declares:
|
|
5
|
-
* • how its raw value should be interpreted (higher / lower / band-better)
|
|
6
|
-
* • the L1-L4 boundaries, possibly per difficulty bucket
|
|
7
|
-
*
|
|
8
|
-
* The shape is deliberately flat data so it can be tweaked without code
|
|
9
|
-
* changes and (eventually) edited from the Settings UI.
|
|
10
|
-
*
|
|
11
|
-
* Source of truth: docs/superpowers/specs/2026-06-13-capability-model-v2.md §4
|
|
12
|
-
*
|
|
13
|
-
* @author Felix
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
'use strict';
|
|
17
|
-
|
|
18
|
-
// ---------------------------------------------------------------------------
|
|
19
|
-
// Helpers
|
|
20
|
-
// ---------------------------------------------------------------------------
|
|
21
|
-
|
|
22
|
-
/** Map L1..L4 to a centred numeric score the rollup can average. */
|
|
23
|
-
const LEVEL_SCORE = { 1: 25, 2: 55, 3: 80, 4: 95 };
|
|
24
|
-
|
|
25
|
-
/** Map a numeric score back to a level (matches §5 rules). */
|
|
26
|
-
function scoreToLevel(score) {
|
|
27
|
-
if (score == null || Number.isNaN(score)) return null;
|
|
28
|
-
if (score >= 85) return 4;
|
|
29
|
-
if (score >= 65) return 3;
|
|
30
|
-
if (score >= 40) return 2;
|
|
31
|
-
return 1;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// ---------------------------------------------------------------------------
|
|
35
|
-
// Threshold tables
|
|
36
|
-
// Each row reads "if value satisfies the cell for level L at difficulty D,
|
|
37
|
-
// return L". Difficulty buckets: 1 trivial · 2 routine · 3 complex · 4 heavy.
|
|
38
|
-
//
|
|
39
|
-
// direction: 'lower' = lower is better
|
|
40
|
-
// 'higher' = higher is better
|
|
41
|
-
// 'band' = ideal range, anything else penalised
|
|
42
|
-
// ---------------------------------------------------------------------------
|
|
43
|
-
|
|
44
|
-
/* ----- H1 — Problem Definition ------------------------------------- */
|
|
45
|
-
|
|
46
|
-
const H1 = {
|
|
47
|
-
/** AI proactive-question count (HEAD 30% of the session). */
|
|
48
|
-
clarity: {
|
|
49
|
-
direction: 'lower',
|
|
50
|
-
// bucket = difficulty. cell = inclusive upper bound for that level.
|
|
51
|
-
table: {
|
|
52
|
-
1: { L4: 0, L3: 1, L2: 3 },
|
|
53
|
-
2: { L4: 1, L3: 2, L2: 5 },
|
|
54
|
-
3: { L4: 2, L3: 4, L2: 8 },
|
|
55
|
-
4: { L4: 3, L3: 6, L2: 12 },
|
|
56
|
-
},
|
|
57
|
-
},
|
|
58
|
-
/** User-message rounds needed to converge. */
|
|
59
|
-
converge: {
|
|
60
|
-
direction: 'lower',
|
|
61
|
-
table: {
|
|
62
|
-
1: { L4: 3, L3: 5, L2: 8 },
|
|
63
|
-
2: { L4: 5, L3: 8, L2: 15 },
|
|
64
|
-
3: { L4: 8, L3: 15, L2: 25 },
|
|
65
|
-
4: { L4: 15, L3: 30, L2: 50 },
|
|
66
|
-
},
|
|
67
|
-
},
|
|
68
|
-
/** Direction-change events. */
|
|
69
|
-
drift: {
|
|
70
|
-
direction: 'lower',
|
|
71
|
-
table: {
|
|
72
|
-
1: { L4: 0, L3: 1, L2: 2 },
|
|
73
|
-
2: { L4: 0, L3: 1, L2: 2 },
|
|
74
|
-
3: { L4: 1, L3: 2, L2: 4 },
|
|
75
|
-
4: { L4: 1, L3: 3, L2: 6 },
|
|
76
|
-
},
|
|
77
|
-
},
|
|
78
|
-
};
|
|
79
|
-
|
|
80
|
-
/* ----- H2 — Judgement ---------------------------------------------- */
|
|
81
|
-
|
|
82
|
-
const H2 = {
|
|
83
|
-
challenge: {
|
|
84
|
-
direction: 'higher',
|
|
85
|
-
table: {
|
|
86
|
-
1: { L4: 0.20, L3: 0.10, L2: 0.03 },
|
|
87
|
-
2: { L4: 0.30, L3: 0.18, L2: 0.08 },
|
|
88
|
-
3: { L4: 0.35, L3: 0.25, L2: 0.15 },
|
|
89
|
-
4: { L4: 0.40, L3: 0.30, L2: 0.20 },
|
|
90
|
-
},
|
|
91
|
-
},
|
|
92
|
-
/**
|
|
93
|
-
* Override rate — band metric: too low = rubber-stamping, too high =
|
|
94
|
-
* thrashing. Encoded as { ideal: [lo, hi], tolerance: [[L3 lo, L3 hi], …] }
|
|
95
|
-
*/
|
|
96
|
-
override: {
|
|
97
|
-
direction: 'band',
|
|
98
|
-
bands: {
|
|
99
|
-
1: { L4: [0, 0.10], L3: [0, 0.20], L2: [0, 0.35] },
|
|
100
|
-
2: { L4: [0.05, 0.15],L3: [0, 0.25], L2: [0, 0.40] },
|
|
101
|
-
3: { L4: [0.10, 0.20],L3: [0, 0.30], L2: [0, 0.45] },
|
|
102
|
-
4: { L4: [0.10, 0.25],L3: [0, 0.35], L2: [0, 0.50] },
|
|
103
|
-
},
|
|
104
|
-
},
|
|
105
|
-
/** Compliant-without-comment rate — ideal band 60-85% (difficulty-agnostic). */
|
|
106
|
-
accept_rate: {
|
|
107
|
-
direction: 'band',
|
|
108
|
-
bands: {
|
|
109
|
-
1: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
110
|
-
2: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
111
|
-
3: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
112
|
-
4: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
113
|
-
},
|
|
114
|
-
},
|
|
115
|
-
};
|
|
116
|
-
|
|
117
|
-
/* ----- H3 — System Thinking (rolling) ------------------------------ */
|
|
118
|
-
|
|
119
|
-
const H3 = {
|
|
120
|
-
consistency: {
|
|
121
|
-
direction: 'higher',
|
|
122
|
-
table: { all: { L4: 0.80, L3: 0.60, L2: 0.40 } },
|
|
123
|
-
},
|
|
124
|
-
dedup: {
|
|
125
|
-
direction: 'lower',
|
|
126
|
-
table: { all: { L4: 0.05, L3: 0.15, L2: 0.30 } },
|
|
127
|
-
},
|
|
128
|
-
refactor: { // per 100 sessions
|
|
129
|
-
direction: 'higher',
|
|
130
|
-
table: { all: { L4: 6, L3: 3, L2: 1 } },
|
|
131
|
-
},
|
|
132
|
-
abstraction: {
|
|
133
|
-
direction: 'higher',
|
|
134
|
-
table: { all: { L4: 0.20, L3: 0.10, L2: 0.05 } },
|
|
135
|
-
},
|
|
136
|
-
};
|
|
137
|
-
|
|
138
|
-
/* ----- E1 — Knowledge Coverage ------------------------------------- */
|
|
139
|
-
|
|
140
|
-
const E1 = {
|
|
141
|
-
domain_errors: {
|
|
142
|
-
direction: 'lower',
|
|
143
|
-
table: { all: { L4: 0.03, L3: 0.08, L2: 0.15 } },
|
|
144
|
-
},
|
|
145
|
-
staleness: {
|
|
146
|
-
direction: 'lower',
|
|
147
|
-
table: { all: { L4: 0, L3: 1, L2: 3 } },
|
|
148
|
-
},
|
|
149
|
-
best_practice: {
|
|
150
|
-
direction: 'higher',
|
|
151
|
-
table: { all: { L4: 0.85, L3: 0.65, L2: 0.45 } },
|
|
152
|
-
},
|
|
153
|
-
};
|
|
154
|
-
|
|
155
|
-
/* ----- E2 — Tool Coverage ------------------------------------------ */
|
|
156
|
-
|
|
157
|
-
const E2 = {
|
|
158
|
-
tool_pick: {
|
|
159
|
-
direction: 'higher',
|
|
160
|
-
table: {
|
|
161
|
-
1: { L4: 0.95, L3: 0.85, L2: 0.70 },
|
|
162
|
-
2: { L4: 0.90, L3: 0.80, L2: 0.65 },
|
|
163
|
-
3: { L4: 0.85, L3: 0.75, L2: 0.60 },
|
|
164
|
-
4: { L4: 0.80, L3: 0.70, L2: 0.55 },
|
|
165
|
-
},
|
|
166
|
-
},
|
|
167
|
-
/** calls-per-intent ratio vs baseline 1.0 (LOWER better) */
|
|
168
|
-
chain_eff: {
|
|
169
|
-
direction: 'lower',
|
|
170
|
-
table: { all: { L4: 1.1, L3: 1.4, L2: 1.8 } },
|
|
171
|
-
},
|
|
172
|
-
self_heal: {
|
|
173
|
-
direction: 'higher',
|
|
174
|
-
table: { all: { L4: 0.85, L3: 0.65, L2: 0.40 } },
|
|
175
|
-
},
|
|
176
|
-
};
|
|
177
|
-
|
|
178
|
-
/* ----- O1 — Output Quality ----------------------------------------- */
|
|
179
|
-
|
|
180
|
-
const O1 = {
|
|
181
|
-
first_take: {
|
|
182
|
-
direction: 'higher',
|
|
183
|
-
table: {
|
|
184
|
-
1: { L4: 0.80, L3: 0.60, L2: 0.40 },
|
|
185
|
-
2: { L4: 0.70, L3: 0.55, L2: 0.40 },
|
|
186
|
-
3: { L4: 0.60, L3: 0.45, L2: 0.30 },
|
|
187
|
-
4: { L4: 0.50, L3: 0.35, L2: 0.25 },
|
|
188
|
-
},
|
|
189
|
-
},
|
|
190
|
-
code_style: {
|
|
191
|
-
direction: 'higher',
|
|
192
|
-
table: { all: { L4: 0.85, L3: 0.65, L2: 0.45 } },
|
|
193
|
-
},
|
|
194
|
-
completeness: {
|
|
195
|
-
direction: 'higher',
|
|
196
|
-
table: { all: { L4: 0.80, L3: 0.60, L2: 0.40 } },
|
|
197
|
-
},
|
|
198
|
-
};
|
|
199
|
-
|
|
200
|
-
const ALL = { H1, H2, H3, E1, E2, O1 };
|
|
201
|
-
|
|
202
|
-
// ---------------------------------------------------------------------------
|
|
203
|
-
// Roll-up weights — see spec §5
|
|
204
|
-
// ---------------------------------------------------------------------------
|
|
205
|
-
|
|
206
|
-
const WEIGHTS = {
|
|
207
|
-
H1: { clarity: 0.45, converge: 0.35, drift: 0.20 },
|
|
208
|
-
H2: { challenge: 0.40, override: 0.35, accept_rate: 0.25 },
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
//
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
if (
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
*
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
*
|
|
293
|
-
*
|
|
294
|
-
*
|
|
295
|
-
*
|
|
296
|
-
*
|
|
297
|
-
*
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
*
|
|
330
|
-
*
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
1
|
+
/**
|
|
2
|
+
* v2 capability thresholds — single source of truth.
|
|
3
|
+
*
|
|
4
|
+
* Each sub-indicator declares:
|
|
5
|
+
* • how its raw value should be interpreted (higher / lower / band-better)
|
|
6
|
+
* • the L1-L4 boundaries, possibly per difficulty bucket
|
|
7
|
+
*
|
|
8
|
+
* The shape is deliberately flat data so it can be tweaked without code
|
|
9
|
+
* changes and (eventually) edited from the Settings UI.
|
|
10
|
+
*
|
|
11
|
+
* Source of truth: docs/superpowers/specs/2026-06-13-capability-model-v2.md §4
|
|
12
|
+
*
|
|
13
|
+
* @author Felix
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
'use strict';
|
|
17
|
+
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Helpers
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
/** Map L1..L4 to a centred numeric score the rollup can average. */
|
|
23
|
+
const LEVEL_SCORE = { 1: 25, 2: 55, 3: 80, 4: 95 };
|
|
24
|
+
|
|
25
|
+
/** Map a numeric score back to a level (matches §5 rules). */
|
|
26
|
+
function scoreToLevel(score) {
|
|
27
|
+
if (score == null || Number.isNaN(score)) return null;
|
|
28
|
+
if (score >= 85) return 4;
|
|
29
|
+
if (score >= 65) return 3;
|
|
30
|
+
if (score >= 40) return 2;
|
|
31
|
+
return 1;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Threshold tables
|
|
36
|
+
// Each row reads "if value satisfies the cell for level L at difficulty D,
|
|
37
|
+
// return L". Difficulty buckets: 1 trivial · 2 routine · 3 complex · 4 heavy.
|
|
38
|
+
//
|
|
39
|
+
// direction: 'lower' = lower is better
|
|
40
|
+
// 'higher' = higher is better
|
|
41
|
+
// 'band' = ideal range, anything else penalised
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
/* ----- H1 — Problem Definition ------------------------------------- */
|
|
45
|
+
|
|
46
|
+
const H1 = {
|
|
47
|
+
/** AI proactive-question count (HEAD 30% of the session). */
|
|
48
|
+
clarity: {
|
|
49
|
+
direction: 'lower',
|
|
50
|
+
// bucket = difficulty. cell = inclusive upper bound for that level.
|
|
51
|
+
table: {
|
|
52
|
+
1: { L4: 0, L3: 1, L2: 3 },
|
|
53
|
+
2: { L4: 1, L3: 2, L2: 5 },
|
|
54
|
+
3: { L4: 2, L3: 4, L2: 8 },
|
|
55
|
+
4: { L4: 3, L3: 6, L2: 12 },
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
/** User-message rounds needed to converge. */
|
|
59
|
+
converge: {
|
|
60
|
+
direction: 'lower',
|
|
61
|
+
table: {
|
|
62
|
+
1: { L4: 3, L3: 5, L2: 8 },
|
|
63
|
+
2: { L4: 5, L3: 8, L2: 15 },
|
|
64
|
+
3: { L4: 8, L3: 15, L2: 25 },
|
|
65
|
+
4: { L4: 15, L3: 30, L2: 50 },
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
/** Direction-change events. */
|
|
69
|
+
drift: {
|
|
70
|
+
direction: 'lower',
|
|
71
|
+
table: {
|
|
72
|
+
1: { L4: 0, L3: 1, L2: 2 },
|
|
73
|
+
2: { L4: 0, L3: 1, L2: 2 },
|
|
74
|
+
3: { L4: 1, L3: 2, L2: 4 },
|
|
75
|
+
4: { L4: 1, L3: 3, L2: 6 },
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
/* ----- H2 — Judgement ---------------------------------------------- */
|
|
81
|
+
|
|
82
|
+
const H2 = {
|
|
83
|
+
challenge: {
|
|
84
|
+
direction: 'higher',
|
|
85
|
+
table: {
|
|
86
|
+
1: { L4: 0.20, L3: 0.10, L2: 0.03 },
|
|
87
|
+
2: { L4: 0.30, L3: 0.18, L2: 0.08 },
|
|
88
|
+
3: { L4: 0.35, L3: 0.25, L2: 0.15 },
|
|
89
|
+
4: { L4: 0.40, L3: 0.30, L2: 0.20 },
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
/**
|
|
93
|
+
* Override rate — band metric: too low = rubber-stamping, too high =
|
|
94
|
+
* thrashing. Encoded as { ideal: [lo, hi], tolerance: [[L3 lo, L3 hi], …] }
|
|
95
|
+
*/
|
|
96
|
+
override: {
|
|
97
|
+
direction: 'band',
|
|
98
|
+
bands: {
|
|
99
|
+
1: { L4: [0, 0.10], L3: [0, 0.20], L2: [0, 0.35] },
|
|
100
|
+
2: { L4: [0.05, 0.15],L3: [0, 0.25], L2: [0, 0.40] },
|
|
101
|
+
3: { L4: [0.10, 0.20],L3: [0, 0.30], L2: [0, 0.45] },
|
|
102
|
+
4: { L4: [0.10, 0.25],L3: [0, 0.35], L2: [0, 0.50] },
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
/** Compliant-without-comment rate — ideal band 60-85% (difficulty-agnostic). */
|
|
106
|
+
accept_rate: {
|
|
107
|
+
direction: 'band',
|
|
108
|
+
bands: {
|
|
109
|
+
1: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
110
|
+
2: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
111
|
+
3: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
112
|
+
4: { L4: [0.60, 0.85], L3: [0.50, 0.90], L2: [0.40, 0.95] },
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
/* ----- H3 — System Thinking (rolling) ------------------------------ */
|
|
118
|
+
|
|
119
|
+
const H3 = {
|
|
120
|
+
consistency: {
|
|
121
|
+
direction: 'higher',
|
|
122
|
+
table: { all: { L4: 0.80, L3: 0.60, L2: 0.40 } },
|
|
123
|
+
},
|
|
124
|
+
dedup: {
|
|
125
|
+
direction: 'lower',
|
|
126
|
+
table: { all: { L4: 0.05, L3: 0.15, L2: 0.30 } },
|
|
127
|
+
},
|
|
128
|
+
refactor: { // per 100 sessions
|
|
129
|
+
direction: 'higher',
|
|
130
|
+
table: { all: { L4: 6, L3: 3, L2: 1 } },
|
|
131
|
+
},
|
|
132
|
+
abstraction: {
|
|
133
|
+
direction: 'higher',
|
|
134
|
+
table: { all: { L4: 0.20, L3: 0.10, L2: 0.05 } },
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
/* ----- E1 — Knowledge Coverage ------------------------------------- */
|
|
139
|
+
|
|
140
|
+
const E1 = {
|
|
141
|
+
domain_errors: {
|
|
142
|
+
direction: 'lower',
|
|
143
|
+
table: { all: { L4: 0.03, L3: 0.08, L2: 0.15 } },
|
|
144
|
+
},
|
|
145
|
+
staleness: {
|
|
146
|
+
direction: 'lower',
|
|
147
|
+
table: { all: { L4: 0, L3: 1, L2: 3 } },
|
|
148
|
+
},
|
|
149
|
+
best_practice: {
|
|
150
|
+
direction: 'higher',
|
|
151
|
+
table: { all: { L4: 0.85, L3: 0.65, L2: 0.45 } },
|
|
152
|
+
},
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
/* ----- E2 — Tool Coverage ------------------------------------------ */
|
|
156
|
+
|
|
157
|
+
const E2 = {
|
|
158
|
+
tool_pick: {
|
|
159
|
+
direction: 'higher',
|
|
160
|
+
table: {
|
|
161
|
+
1: { L4: 0.95, L3: 0.85, L2: 0.70 },
|
|
162
|
+
2: { L4: 0.90, L3: 0.80, L2: 0.65 },
|
|
163
|
+
3: { L4: 0.85, L3: 0.75, L2: 0.60 },
|
|
164
|
+
4: { L4: 0.80, L3: 0.70, L2: 0.55 },
|
|
165
|
+
},
|
|
166
|
+
},
|
|
167
|
+
/** calls-per-intent ratio vs baseline 1.0 (LOWER better) */
|
|
168
|
+
chain_eff: {
|
|
169
|
+
direction: 'lower',
|
|
170
|
+
table: { all: { L4: 1.1, L3: 1.4, L2: 1.8 } },
|
|
171
|
+
},
|
|
172
|
+
self_heal: {
|
|
173
|
+
direction: 'higher',
|
|
174
|
+
table: { all: { L4: 0.85, L3: 0.65, L2: 0.40 } },
|
|
175
|
+
},
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
/* ----- O1 — Output Quality ----------------------------------------- */
|
|
179
|
+
|
|
180
|
+
const O1 = {
|
|
181
|
+
first_take: {
|
|
182
|
+
direction: 'higher',
|
|
183
|
+
table: {
|
|
184
|
+
1: { L4: 0.80, L3: 0.60, L2: 0.40 },
|
|
185
|
+
2: { L4: 0.70, L3: 0.55, L2: 0.40 },
|
|
186
|
+
3: { L4: 0.60, L3: 0.45, L2: 0.30 },
|
|
187
|
+
4: { L4: 0.50, L3: 0.35, L2: 0.25 },
|
|
188
|
+
},
|
|
189
|
+
},
|
|
190
|
+
code_style: {
|
|
191
|
+
direction: 'higher',
|
|
192
|
+
table: { all: { L4: 0.85, L3: 0.65, L2: 0.45 } },
|
|
193
|
+
},
|
|
194
|
+
completeness: {
|
|
195
|
+
direction: 'higher',
|
|
196
|
+
table: { all: { L4: 0.80, L3: 0.60, L2: 0.40 } },
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
const ALL = { H1, H2, H3, E1, E2, O1 };
|
|
201
|
+
|
|
202
|
+
// ---------------------------------------------------------------------------
|
|
203
|
+
// Roll-up weights — see spec §5
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
|
|
206
|
+
const WEIGHTS = {
|
|
207
|
+
H1: { clarity: 0.45, converge: 0.35, drift: 0.20 },
|
|
208
|
+
H2: { challenge: 0.40, override: 0.35, accept_rate: 0.25 },
|
|
209
|
+
// Per-session H3 (v2.1) uses abstraction/reuse/standard; the older
|
|
210
|
+
// rolling keys (consistency/dedup/refactor) are kept for back-compat with
|
|
211
|
+
// the now-unused rolling aggregator. rollupDimension only sums weights
|
|
212
|
+
// for the sub-scores actually present, so both shapes work.
|
|
213
|
+
H3: { abstraction: 0.40, reuse: 0.35, standard: 0.25, consistency: 0.30, dedup: 0.25, refactor: 0.20 },
|
|
214
|
+
E1: { domain_errors: 0.40, staleness: 0.25, best_practice: 0.35 },
|
|
215
|
+
E2: { tool_pick: 0.40, chain_eff: 0.30, self_heal: 0.30 },
|
|
216
|
+
// v2.1: E1+E2 merged into one LLM-judged ENV dimension.
|
|
217
|
+
ENV: { knowledge: 0.40, tooling: 0.35, currency: 0.25 },
|
|
218
|
+
O1: { first_take: 0.45, code_style: 0.25, completeness: 0.30 },
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
// ---------------------------------------------------------------------------
|
|
222
|
+
// Level evaluation
|
|
223
|
+
// ---------------------------------------------------------------------------
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Resolve a numeric value to L1..L4 against an indicator spec.
|
|
227
|
+
*
|
|
228
|
+
* @param {Object} indicator one of H1.clarity, H2.override, etc.
|
|
229
|
+
* @param {number|null} value the raw measurement
|
|
230
|
+
* @param {number} difficulty 1-4 (ignored if the spec table is keyed by 'all')
|
|
231
|
+
* @returns {1|2|3|4|null}
|
|
232
|
+
*/
|
|
233
|
+
function evalLevel(indicator, value, difficulty = 2) {
|
|
234
|
+
if (!indicator || value == null || Number.isNaN(value)) return null;
|
|
235
|
+
|
|
236
|
+
const key = pickTableKey(indicator, difficulty);
|
|
237
|
+
|
|
238
|
+
if (indicator.direction === 'band') {
|
|
239
|
+
const bands = indicator.bands[key];
|
|
240
|
+
if (!bands) return null;
|
|
241
|
+
if (inBand(value, bands.L4)) return 4;
|
|
242
|
+
if (inBand(value, bands.L3)) return 3;
|
|
243
|
+
if (inBand(value, bands.L2)) return 2;
|
|
244
|
+
return 1;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const cells = indicator.table[key];
|
|
248
|
+
if (!cells) return null;
|
|
249
|
+
|
|
250
|
+
if (indicator.direction === 'lower') {
|
|
251
|
+
if (value <= cells.L4) return 4;
|
|
252
|
+
if (value <= cells.L3) return 3;
|
|
253
|
+
if (value <= cells.L2) return 2;
|
|
254
|
+
return 1;
|
|
255
|
+
}
|
|
256
|
+
// 'higher'
|
|
257
|
+
if (value >= cells.L4) return 4;
|
|
258
|
+
if (value >= cells.L3) return 3;
|
|
259
|
+
if (value >= cells.L2) return 2;
|
|
260
|
+
return 1;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function pickTableKey(indicator, difficulty) {
|
|
264
|
+
const t = indicator.table || indicator.bands || {};
|
|
265
|
+
if (t.all) return 'all';
|
|
266
|
+
if (t[difficulty] != null) return difficulty;
|
|
267
|
+
// fall back to the closest available bucket
|
|
268
|
+
const keys = Object.keys(t).map(Number).filter(Number.isFinite).sort((a, b) => a - b);
|
|
269
|
+
if (!keys.length) return null;
|
|
270
|
+
return keys.reduce((acc, k) => (Math.abs(k - difficulty) < Math.abs(acc - difficulty) ? k : acc), keys[0]);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function inBand(value, range) {
|
|
274
|
+
return Array.isArray(range) && value >= range[0] && value <= range[1];
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Convenience: given a raw measurement, return both the level and a
|
|
279
|
+
* centred numeric score useful for averaging into the dimension score.
|
|
280
|
+
*
|
|
281
|
+
* @param {Object} indicator
|
|
282
|
+
* @param {number} value
|
|
283
|
+
* @param {number} difficulty
|
|
284
|
+
* @returns {{ level: number|null, score: number|null }}
|
|
285
|
+
*/
|
|
286
|
+
function evalIndicator(indicator, value, difficulty = 2) {
|
|
287
|
+
const lvl = evalLevel(indicator, value, difficulty);
|
|
288
|
+
return { level: lvl, score: lvl == null ? null : LEVEL_SCORE[lvl] };
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Like evalIndicator but also returns a structured "why" payload the UI
|
|
293
|
+
* can render in a tooltip without re-implementing the threshold tables.
|
|
294
|
+
*
|
|
295
|
+
* { level, score, bounds, direction, table }
|
|
296
|
+
* - bounds.l4, l3, l2 : the inclusive boundaries used for this
|
|
297
|
+
* difficulty bucket (numbers or [lo, hi] for bands)
|
|
298
|
+
* - direction : 'lower' | 'higher' | 'band'
|
|
299
|
+
* - bucketKey : 'all' or 1-4 (which row of the table was used)
|
|
300
|
+
*
|
|
301
|
+
* @param {Object} indicator
|
|
302
|
+
* @param {number|null} value
|
|
303
|
+
* @param {number} difficulty
|
|
304
|
+
*/
|
|
305
|
+
function explainIndicator(indicator, value, difficulty = 2) {
|
|
306
|
+
const base = evalIndicator(indicator, value, difficulty);
|
|
307
|
+
if (!indicator) return { ...base, value, direction: null, bounds: null, bucketKey: null };
|
|
308
|
+
|
|
309
|
+
const bucketKey = pickTableKey(indicator, difficulty);
|
|
310
|
+
let bounds = null;
|
|
311
|
+
if (indicator.direction === 'band' && indicator.bands?.[bucketKey]) {
|
|
312
|
+
const b = indicator.bands[bucketKey];
|
|
313
|
+
bounds = { L4: b.L4 || null, L3: b.L3 || null, L2: b.L2 || null };
|
|
314
|
+
} else if (indicator.table?.[bucketKey]) {
|
|
315
|
+
const t = indicator.table[bucketKey];
|
|
316
|
+
bounds = { L4: t.L4 ?? null, L3: t.L3 ?? null, L2: t.L2 ?? null };
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
...base,
|
|
321
|
+
value,
|
|
322
|
+
direction: indicator.direction,
|
|
323
|
+
bounds,
|
|
324
|
+
bucketKey,
|
|
325
|
+
};
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Roll up a set of sub-scores into one dimension score using the
|
|
330
|
+
* declared weights. Missing sub-scores are skipped and the remaining
|
|
331
|
+
* weights are re-normalised so a partial measurement still produces a
|
|
332
|
+
* sensible number.
|
|
333
|
+
*
|
|
334
|
+
* @param {string} dimensionKey 'H1' | 'H2' | … | 'O1'
|
|
335
|
+
* @param {Object} subScores { clarity: 80, converge: 55, … }
|
|
336
|
+
* @returns {number|null}
|
|
337
|
+
*/
|
|
338
|
+
function rollupDimension(dimensionKey, subScores) {
|
|
339
|
+
const w = WEIGHTS[dimensionKey];
|
|
340
|
+
if (!w) return null;
|
|
341
|
+
|
|
342
|
+
let total = 0;
|
|
343
|
+
let weightSum = 0;
|
|
344
|
+
for (const key of Object.keys(w)) {
|
|
345
|
+
const v = subScores ? subScores[key] : null;
|
|
346
|
+
if (v == null || Number.isNaN(v)) continue;
|
|
347
|
+
total += v * w[key];
|
|
348
|
+
weightSum += w[key];
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
if (weightSum === 0) return null;
|
|
352
|
+
return Math.round(total / weightSum);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
module.exports = {
|
|
356
|
+
H1, H2, H3, E1, E2, O1, ALL,
|
|
357
|
+
WEIGHTS,
|
|
358
|
+
LEVEL_SCORE,
|
|
359
|
+
scoreToLevel,
|
|
360
|
+
evalLevel,
|
|
361
|
+
evalIndicator,
|
|
362
|
+
explainIndicator,
|
|
363
|
+
rollupDimension,
|
|
364
|
+
};
|