chekk 0.5.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +17 -0
- package/dist/index.js +448 -0
- package/package.json +18 -34
- package/bin/chekk.js +0 -62
- package/src/detect.js +0 -146
- package/src/display.js +0 -1153
- package/src/index.js +0 -301
- package/src/insights.js +0 -661
- package/src/metrics/ai-leverage.js +0 -186
- package/src/metrics/debug-cycles.js +0 -204
- package/src/metrics/decomposition.js +0 -158
- package/src/metrics/session-structure.js +0 -199
- package/src/metrics/token-efficiency.js +0 -258
- package/src/parsers/claude-code.js +0 -231
- package/src/parsers/codex.js +0 -188
- package/src/parsers/cursor.js +0 -281
- package/src/scorer.js +0 -228
- package/src/upload.js +0 -140
package/src/insights.js
DELETED
|
@@ -1,661 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Insights Engine
|
|
3
|
-
*
|
|
4
|
-
* Computes higher-order analysis from raw metrics and sessions:
|
|
5
|
-
* - Signatures: distinctive patterns that make an engineer unique
|
|
6
|
-
* - Watch Points: anti-patterns and areas for improvement
|
|
7
|
-
* - Trajectory: weekly score evolution over time
|
|
8
|
-
* - Project Complexity: classification of project sophistication
|
|
9
|
-
* - Assessment: narrative paragraph for the engineer's profile
|
|
10
|
-
* - Confidence: statistical confidence based on data volume
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import { computeDecomposition } from './metrics/decomposition.js';
|
|
14
|
-
import { computeDebugCycles } from './metrics/debug-cycles.js';
|
|
15
|
-
import { computeAILeverage } from './metrics/ai-leverage.js';
|
|
16
|
-
import { computeSessionStructure } from './metrics/session-structure.js';
|
|
17
|
-
import { computeOverallScore } from './scorer.js';
|
|
18
|
-
|
|
19
|
-
// ── Benchmarks (early-stage estimates, refined as data grows) ──
|
|
20
|
-
export const BENCHMARKS = {
|
|
21
|
-
avgExchangesPerSession: 34.2,
|
|
22
|
-
avgPromptLength: 187,
|
|
23
|
-
avgTurnsToResolve: 3.8,
|
|
24
|
-
specificReportRatio: 62,
|
|
25
|
-
highLevelRatio: 18,
|
|
26
|
-
contextSetRatio: 35,
|
|
27
|
-
refinementRatio: 15,
|
|
28
|
-
reviewEndRatio: 28,
|
|
29
|
-
};
|
|
30
|
-
|
|
31
|
-
// ── Dimension score ranges (observed distribution) ──
|
|
32
|
-
export const DIM_RANGES = {
|
|
33
|
-
decomposition: { min: 15, max: 95 },
|
|
34
|
-
debugCycles: { min: 20, max: 98 },
|
|
35
|
-
aiLeverage: { min: 10, max: 92 },
|
|
36
|
-
sessionStructure: { min: 12, max: 88 },
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
// ══════════════════════════════════════════════
|
|
40
|
-
// SIGNATURES — Distinctive patterns
|
|
41
|
-
// ══════════════════════════════════════════════
|
|
42
|
-
|
|
43
|
-
const constraintPatterns = /\b(don'?t|do not|never|avoid|without|no |not |shouldn'?t|must not|skip|exclude)\b/i;
|
|
44
|
-
const preflightPatterns = /^(before (we|you|i)|don'?t code|review (first|this|my|the plan)|let'?s (think|plan|discuss)|check my (approach|plan|thinking))/i;
|
|
45
|
-
const testFirstPatterns = /\b(write (the )?tests? (first|before)|test.?driven|TDD|spec first|start with (tests?|specs?))\b/i;
|
|
46
|
-
const negativeConstraintPatterns = /\b(don'?t|do not|never|avoid|must not|shouldn'?t)\b.*\b(add|create|use|include|change|modify|touch|remove)\b/i;
|
|
47
|
-
|
|
48
|
-
// Number formatting for insights text
|
|
49
|
-
function numberFormatInsight(n) {
|
|
50
|
-
if (n >= 1_000_000) return (n / 1_000_000).toFixed(1).replace(/\.0$/, '') + 'M';
|
|
51
|
-
if (n >= 1000) return (n / 1000).toFixed(1).replace(/\.0$/, '') + 'k';
|
|
52
|
-
return String(n);
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
// Evidence quality filter (same rules as metric parsers)
|
|
56
|
-
const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
|
|
57
|
-
function isGoodEvidence(prompt) {
|
|
58
|
-
if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
|
|
59
|
-
if (noisePatterns.test(prompt)) return false;
|
|
60
|
-
const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
|
|
61
|
-
if (alpha / prompt.length < 0.4) return false;
|
|
62
|
-
return true;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
export function computeSignatures(allSessions, metrics, tokenEfficiency = null) {
|
|
66
|
-
const signatures = [];
|
|
67
|
-
const d = metrics.decomposition.details;
|
|
68
|
-
const db = metrics.debugCycles.details;
|
|
69
|
-
const ai = metrics.aiLeverage.details;
|
|
70
|
-
const ss = metrics.sessionStructure.details;
|
|
71
|
-
|
|
72
|
-
let totalPrompts = 0;
|
|
73
|
-
let constraintPrompts = 0;
|
|
74
|
-
let preflightSessions = 0;
|
|
75
|
-
let testFirstSessions = 0;
|
|
76
|
-
let modificationCount = 0;
|
|
77
|
-
let acceptCount = 0;
|
|
78
|
-
|
|
79
|
-
// Capture evidence prompts for each signature type
|
|
80
|
-
let bestPreflightPrompt = null;
|
|
81
|
-
let bestConstraintPrompt = null;
|
|
82
|
-
let bestTestFirstPrompt = null;
|
|
83
|
-
let bestModifyPrompt = null;
|
|
84
|
-
|
|
85
|
-
for (const session of allSessions) {
|
|
86
|
-
const { exchanges } = session;
|
|
87
|
-
if (exchanges.length === 0) continue;
|
|
88
|
-
|
|
89
|
-
// Check first prompt for preflight review
|
|
90
|
-
const firstPrompt = exchanges[0].userPrompt || '';
|
|
91
|
-
if (preflightPatterns.test(firstPrompt)) {
|
|
92
|
-
preflightSessions++;
|
|
93
|
-
if (isGoodEvidence(firstPrompt) && (!bestPreflightPrompt || firstPrompt.length > bestPreflightPrompt.length)) {
|
|
94
|
-
bestPreflightPrompt = firstPrompt;
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
let hasTestFirst = false;
|
|
99
|
-
for (let i = 0; i < exchanges.length; i++) {
|
|
100
|
-
const prompt = exchanges[i].userPrompt || '';
|
|
101
|
-
totalPrompts++;
|
|
102
|
-
|
|
103
|
-
if (constraintPatterns.test(prompt) && negativeConstraintPatterns.test(prompt)) {
|
|
104
|
-
constraintPrompts++;
|
|
105
|
-
if (isGoodEvidence(prompt) && (!bestConstraintPrompt || prompt.length > bestConstraintPrompt.length)) {
|
|
106
|
-
bestConstraintPrompt = prompt;
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
if (testFirstPatterns.test(prompt)) {
|
|
111
|
-
hasTestFirst = true;
|
|
112
|
-
if (isGoodEvidence(prompt) && (!bestTestFirstPrompt || prompt.length > bestTestFirstPrompt.length)) {
|
|
113
|
-
bestTestFirstPrompt = prompt;
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
// Track modification vs acceptance
|
|
118
|
-
if (i > 0 && /\b(actually|wait|instead|change|no,?|not quite|modify|tweak)\b/i.test(prompt)) {
|
|
119
|
-
modificationCount++;
|
|
120
|
-
if (isGoodEvidence(prompt) && (!bestModifyPrompt || prompt.length > bestModifyPrompt.length)) {
|
|
121
|
-
bestModifyPrompt = prompt;
|
|
122
|
-
}
|
|
123
|
-
} else if (i > 0) {
|
|
124
|
-
acceptCount++;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
if (hasTestFirst) testFirstSessions++;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
const sessionsWithExchanges = allSessions.filter(s => s.exchanges.length > 0).length;
|
|
131
|
-
|
|
132
|
-
// Pre-flight reviews
|
|
133
|
-
const preflightRatio = sessionsWithExchanges > 0 ? preflightSessions / sessionsWithExchanges : 0;
|
|
134
|
-
if (preflightRatio > 0.15 && preflightSessions >= 3) {
|
|
135
|
-
signatures.push({
|
|
136
|
-
name: 'Pre-flight reviews',
|
|
137
|
-
detail: `You ask AI to review your plan before coding in ${Math.round(preflightRatio * 100)}% of sessions. Only 8% of engineers do this consistently. This correlates with fewer debug cycles.`,
|
|
138
|
-
evidence: bestPreflightPrompt,
|
|
139
|
-
});
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
// Constraint-first prompting
|
|
143
|
-
const constraintRatio = totalPrompts > 0 ? constraintPrompts / totalPrompts : 0;
|
|
144
|
-
if (constraintRatio > 0.1 && constraintPrompts >= 5) {
|
|
145
|
-
signatures.push({
|
|
146
|
-
name: 'Constraint-first prompting',
|
|
147
|
-
detail: `You specify what NOT to do in ${Math.round(constraintRatio * 100)}% of prompts. This is a hallmark of senior architectural thinking that prevents scope creep.`,
|
|
148
|
-
evidence: bestConstraintPrompt,
|
|
149
|
-
});
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
// Test-driven AI usage
|
|
153
|
-
const testFirstRatio = sessionsWithExchanges > 0 ? testFirstSessions / sessionsWithExchanges : 0;
|
|
154
|
-
if (testFirstRatio > 0.05 && testFirstSessions >= 2) {
|
|
155
|
-
signatures.push({
|
|
156
|
-
name: 'Test-driven AI usage',
|
|
157
|
-
detail: `You request tests before implementation in ${Math.round(testFirstRatio * 100)}% of sessions. Engineers who do this ship fewer bugs post-merge.`,
|
|
158
|
-
evidence: bestTestFirstPrompt,
|
|
159
|
-
});
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// Deep session marathons — evidence is metric-derived, no single prompt
|
|
163
|
-
if (d.avgExchangesPerSession > BENCHMARKS.avgExchangesPerSession * 2) {
|
|
164
|
-
signatures.push({
|
|
165
|
-
name: 'Marathon sessions',
|
|
166
|
-
detail: `Avg session depth of ${d.avgExchangesPerSession} exchanges is ${Math.round(d.avgExchangesPerSession / BENCHMARKS.avgExchangesPerSession)}x the benchmark (${BENCHMARKS.avgExchangesPerSession}). You sustain deep, focused work.`,
|
|
167
|
-
evidence: null,
|
|
168
|
-
});
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
// Zero vague debugging — evidence is the absence of something
|
|
172
|
-
if (db.vagueReports === 0 && db.totalDebugSequences > 5) {
|
|
173
|
-
signatures.push({
|
|
174
|
-
name: 'Precision debugging',
|
|
175
|
-
detail: `Zero vague error reports across ${db.totalDebugSequences} debug sequences. Every bug report includes specific context. This is rare.`,
|
|
176
|
-
evidence: metrics.debugCycles.examples?.[0]?.prompt || null,
|
|
177
|
-
});
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
// High architectural ratio
|
|
181
|
-
if (ai.highLevelRatio > 30) {
|
|
182
|
-
signatures.push({
|
|
183
|
-
name: 'Strategic AI usage',
|
|
184
|
-
detail: `${ai.highLevelRatio}% of prompts are architectural or planning-level (benchmark: ${BENCHMARKS.highLevelRatio}%). You use AI as a thinking partner, not just a code generator.`,
|
|
185
|
-
evidence: metrics.aiLeverage.examples?.[0]?.prompt || null,
|
|
186
|
-
});
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
// Critical reviewer
|
|
190
|
-
const totalFollowups = modificationCount + acceptCount;
|
|
191
|
-
const modRatio = totalFollowups > 0 ? modificationCount / totalFollowups : 0;
|
|
192
|
-
if (modRatio > 0.25 && modificationCount > 10) {
|
|
193
|
-
signatures.push({
|
|
194
|
-
name: 'Critical reviewer',
|
|
195
|
-
detail: `You modify or redirect AI output in ${Math.round(modRatio * 100)}% of follow-up prompts. This indicates active evaluation rather than passive acceptance.`,
|
|
196
|
-
evidence: bestModifyPrompt,
|
|
197
|
-
});
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
// ── Token-backed signature: efficient token usage ──
|
|
201
|
-
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
202
|
-
const te = tokenEfficiency;
|
|
203
|
-
// If context re-read ratio is below 90%, that's notably efficient
|
|
204
|
-
if (te.contextRereadRatio < 0.90 && te.sessionsAnalyzed >= 5) {
|
|
205
|
-
signatures.push({
|
|
206
|
-
name: 'Token-efficient prompting',
|
|
207
|
-
detail: `Only ${Math.round(te.contextRereadRatio * 100)}% of your tokens are context re-reads (typical: 95%+). Your focused sessions and clear prompts minimize wasted tokens. Estimated spend: $${te.estimatedCostTotal.toFixed(2)}.`,
|
|
208
|
-
evidence: null,
|
|
209
|
-
});
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
return signatures.slice(0, 4); // Max 4 signatures
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
// ══════════════════════════════════════════════
|
|
217
|
-
// WATCH POINTS — Anti-patterns
|
|
218
|
-
// ══════════════════════════════════════════════
|
|
219
|
-
|
|
220
|
-
export function computeWatchPoints(allSessions, metrics, tokenEfficiency = null) {
|
|
221
|
-
const watchPoints = [];
|
|
222
|
-
const d = metrics.decomposition.details;
|
|
223
|
-
const db = metrics.debugCycles.details;
|
|
224
|
-
const ai = metrics.aiLeverage.details;
|
|
225
|
-
const ss = metrics.sessionStructure.details;
|
|
226
|
-
|
|
227
|
-
// Context amnesia — restarting from scratch on same project
|
|
228
|
-
const projectSessions = {};
|
|
229
|
-
for (const s of allSessions) {
|
|
230
|
-
const p = s.project || 'unknown';
|
|
231
|
-
if (!projectSessions[p]) projectSessions[p] = [];
|
|
232
|
-
projectSessions[p].push(s);
|
|
233
|
-
}
|
|
234
|
-
let contextRestarts = 0;
|
|
235
|
-
let bestContextRestartPrompt = null;
|
|
236
|
-
let multiSessionProjects = 0;
|
|
237
|
-
for (const [, sessions] of Object.entries(projectSessions)) {
|
|
238
|
-
if (sessions.length < 2) continue;
|
|
239
|
-
multiSessionProjects++;
|
|
240
|
-
for (let i = 1; i < sessions.length; i++) {
|
|
241
|
-
const firstPrompt = sessions[i].exchanges[0]?.userPrompt || '';
|
|
242
|
-
// If first prompt doesn't reference previous work, it's a context restart
|
|
243
|
-
if (firstPrompt.length > 50 && !/\b(continuing|following up|as discussed|last time|previously|where we left|earlier)\b/i.test(firstPrompt)) {
|
|
244
|
-
contextRestarts++;
|
|
245
|
-
if (isGoodEvidence(firstPrompt) && (!bestContextRestartPrompt || firstPrompt.length > bestContextRestartPrompt.length)) {
|
|
246
|
-
bestContextRestartPrompt = firstPrompt;
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
const totalFollowupSessions = Object.values(projectSessions).reduce((sum, s) => sum + Math.max(0, s.length - 1), 0);
|
|
252
|
-
if (totalFollowupSessions > 3 && contextRestarts / totalFollowupSessions > 0.5) {
|
|
253
|
-
// Estimate cost of context restarts — each restart forces full context re-read
|
|
254
|
-
let costNote = '';
|
|
255
|
-
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
256
|
-
const avgCostPerSession = tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.sessionsAnalyzed);
|
|
257
|
-
// The re-read cost of each restart scales with context size — estimate as % of avg session cost
|
|
258
|
-
const rereadPct = tokenEfficiency.contextRereadRatio || 0.94;
|
|
259
|
-
const wastedPerRestart = avgCostPerSession * rereadPct * 0.3; // ~30% of re-read is avoidable with context continuity
|
|
260
|
-
const totalWasted = wastedPerRestart * contextRestarts;
|
|
261
|
-
if (totalWasted > 1) {
|
|
262
|
-
costNote = ` Estimated wasted re-read cost: ~$${totalWasted.toFixed(0)}.`;
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
watchPoints.push({
|
|
266
|
-
name: 'Context amnesia',
|
|
267
|
-
detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient.${costNote}`,
|
|
268
|
-
evidence: bestContextRestartPrompt,
|
|
269
|
-
});
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
// Low modification rate — accepting AI output without review
|
|
273
|
-
let modCount = 0;
|
|
274
|
-
let followupCount = 0;
|
|
275
|
-
for (const session of allSessions) {
|
|
276
|
-
for (let i = 1; i < session.exchanges.length; i++) {
|
|
277
|
-
followupCount++;
|
|
278
|
-
const prompt = session.exchanges[i].userPrompt || '';
|
|
279
|
-
if (/\b(actually|wait|instead|change|no,?|not quite|modify|tweak|hmm|but )\b/i.test(prompt)) {
|
|
280
|
-
modCount++;
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
}
|
|
284
|
-
const modRatio = followupCount > 10 ? modCount / followupCount : 0.5;
|
|
285
|
-
if (modRatio < 0.15 && followupCount > 20) {
|
|
286
|
-
watchPoints.push({
|
|
287
|
-
name: 'Acceptance without review',
|
|
288
|
-
detail: `You accept AI output without modification in ${Math.round((1 - modRatio) * 100)}% of cases. Top engineers modify or redirect 30%+ of initial suggestions.`,
|
|
289
|
-
evidence: null, // Anti-pattern is the absence of modification
|
|
290
|
-
});
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
// Monologue prompting — excessively long first prompts
|
|
294
|
-
if (d.avgPromptLength > 2000) {
|
|
295
|
-
// Find a representative long prompt
|
|
296
|
-
let bestLongPrompt = null;
|
|
297
|
-
for (const s of allSessions) {
|
|
298
|
-
for (const ex of s.exchanges) {
|
|
299
|
-
const p = ex.userPrompt || '';
|
|
300
|
-
if (p.length > 1500 && p.length < 3000 && isGoodEvidence(p)) {
|
|
301
|
-
if (!bestLongPrompt || p.length > bestLongPrompt.length) bestLongPrompt = p;
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
}
|
|
305
|
-
// Dollar annotation: long prompts trigger large context re-reads each time
|
|
306
|
-
let monologueCostNote = '';
|
|
307
|
-
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
308
|
-
// Compare avg tokens for long vs short prompts from the prompt length analysis
|
|
309
|
-
const longBucket = tokenEfficiency.promptLengthAnalysis.find(b => b.label === '500+ chars');
|
|
310
|
-
const shortBucket = tokenEfficiency.promptLengthAnalysis.find(b => b.label === '20-100 chars');
|
|
311
|
-
if (longBucket && shortBucket && longBucket.avgCost && shortBucket.avgCost) {
|
|
312
|
-
const ratio = (longBucket.avgCost / shortBucket.avgCost).toFixed(1);
|
|
313
|
-
monologueCostNote = ` Long prompts cost ${ratio}x more per exchange ($${longBucket.avgCost.toFixed(2)} vs $${shortBucket.avgCost.toFixed(2)}).`;
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
watchPoints.push({
|
|
317
|
-
name: 'Monologue prompting',
|
|
318
|
-
detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output.${monologueCostNote}`,
|
|
319
|
-
evidence: bestLongPrompt,
|
|
320
|
-
});
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
// Low context-setting
|
|
324
|
-
if (ss.contextSetRatio < 20) {
|
|
325
|
-
let contextCostNote = '';
|
|
326
|
-
if (ss.tokenEvidence?.avgTokensPerExchangeWithContext && ss.tokenEvidence?.avgTokensPerExchangeNoContext) {
|
|
327
|
-
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
328
|
-
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
329
|
-
if (pricing > 0) {
|
|
330
|
-
const withCtx = ss.tokenEvidence.avgTokensPerExchangeWithContext * pricing;
|
|
331
|
-
const noCtx = ss.tokenEvidence.avgTokensPerExchangeNoContext * pricing;
|
|
332
|
-
if (noCtx > withCtx * 1.1) {
|
|
333
|
-
contextCostNote = ` Sessions without context cost $${noCtx.toFixed(2)}/exchange vs $${withCtx.toFixed(2)} with context.`;
|
|
334
|
-
}
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
watchPoints.push({
|
|
338
|
-
name: 'Missing context',
|
|
339
|
-
detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections.${contextCostNote}`,
|
|
340
|
-
evidence: null,
|
|
341
|
-
});
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
// Extended debug spirals
|
|
345
|
-
if (db.longLoops > 2) {
|
|
346
|
-
let loopCostStr = '';
|
|
347
|
-
if (db.tokenEvidence?.avgTokensLongLoop) {
|
|
348
|
-
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
349
|
-
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
350
|
-
if (pricing > 0) {
|
|
351
|
-
const costPerLoop = db.tokenEvidence.avgTokensLongLoop * pricing;
|
|
352
|
-
loopCostStr = ` Each spiral costs ~$${costPerLoop.toFixed(2)}.`;
|
|
353
|
-
} else {
|
|
354
|
-
loopCostStr = ` Each spiral averages ${numberFormatInsight(db.tokenEvidence.avgTokensLongLoop)} tokens.`;
|
|
355
|
-
}
|
|
356
|
-
}
|
|
357
|
-
watchPoints.push({
|
|
358
|
-
name: 'Debug spirals',
|
|
359
|
-
detail: `${db.longLoops} extended debug loops (>5 turns) detected.${loopCostStr} When stuck, try providing more specific error context or breaking the problem differently.`,
|
|
360
|
-
evidence: null,
|
|
361
|
-
});
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
// ── Token-backed watch points ──
|
|
365
|
-
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
366
|
-
const te = tokenEfficiency;
|
|
367
|
-
|
|
368
|
-
// Marathon sessions burning disproportionate tokens
|
|
369
|
-
const marathonSessions = te.costliestSessions.filter(s => s.exchanges > 50);
|
|
370
|
-
if (marathonSessions.length >= 2) {
|
|
371
|
-
const marathonCost = marathonSessions.reduce((s, m) => s + m.estimatedCost, 0);
|
|
372
|
-
const marathonPct = te.estimatedCostTotal > 0 ? Math.round(marathonCost / te.estimatedCostTotal * 100) : 0;
|
|
373
|
-
if (marathonPct > 40) {
|
|
374
|
-
watchPoints.push({
|
|
375
|
-
name: 'Marathon session tax',
|
|
376
|
-
detail: `${marathonSessions.length} marathon sessions (50+ exchanges) consumed ~${marathonPct}% of your total spend (~$${marathonCost.toFixed(2)}). Context compounds — splitting into focused sessions would reduce token waste.`,
|
|
377
|
-
evidence: null,
|
|
378
|
-
});
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
// Vague prompts costing more than specific ones
|
|
383
|
-
const vagueAvg = db.tokenEvidence?.avgTokensVagueDebug;
|
|
384
|
-
const specificAvg = db.tokenEvidence?.avgTokensSpecificDebug;
|
|
385
|
-
if (vagueAvg && specificAvg && vagueAvg > specificAvg * 1.5 && db.vagueReports > 3) {
|
|
386
|
-
watchPoints.push({
|
|
387
|
-
name: 'Vague prompts are expensive',
|
|
388
|
-
detail: `Your vague debug prompts average ${numberFormatInsight(vagueAvg)} tokens vs ${numberFormatInsight(specificAvg)} for specific ones — ${(vagueAvg / specificAvg).toFixed(1)}x more expensive. Adding error details upfront saves real money.`,
|
|
389
|
-
evidence: null,
|
|
390
|
-
});
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
return watchPoints.slice(0, 4); // Max 4 watch points (was 3, expanded for token insights)
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
// ══════════════════════════════════════════════
|
|
398
|
-
// TRAJECTORY — Weekly score evolution
|
|
399
|
-
// ══════════════════════════════════════════════
|
|
400
|
-
|
|
401
|
-
export function computeTrajectory(allSessions) {
|
|
402
|
-
// Group sessions by week
|
|
403
|
-
const sessionsWithTime = allSessions.filter(s => s.startTime);
|
|
404
|
-
if (sessionsWithTime.length < 5) return null;
|
|
405
|
-
|
|
406
|
-
sessionsWithTime.sort((a, b) => new Date(a.startTime) - new Date(b.startTime));
|
|
407
|
-
|
|
408
|
-
const firstDate = new Date(sessionsWithTime[0].startTime);
|
|
409
|
-
const lastDate = new Date(sessionsWithTime[sessionsWithTime.length - 1].startTime);
|
|
410
|
-
|
|
411
|
-
// Need at least 2 weeks of data
|
|
412
|
-
const daySpan = (lastDate - firstDate) / (1000 * 60 * 60 * 24);
|
|
413
|
-
if (daySpan < 10) return null;
|
|
414
|
-
|
|
415
|
-
// Create weekly buckets
|
|
416
|
-
const weeks = [];
|
|
417
|
-
let weekStart = new Date(firstDate);
|
|
418
|
-
weekStart.setHours(0, 0, 0, 0);
|
|
419
|
-
// Align to Monday
|
|
420
|
-
weekStart.setDate(weekStart.getDate() - weekStart.getDay() + 1);
|
|
421
|
-
|
|
422
|
-
while (weekStart <= lastDate) {
|
|
423
|
-
const weekEnd = new Date(weekStart);
|
|
424
|
-
weekEnd.setDate(weekEnd.getDate() + 7);
|
|
425
|
-
const weekSessions = sessionsWithTime.filter(s => {
|
|
426
|
-
const t = new Date(s.startTime);
|
|
427
|
-
return t >= weekStart && t < weekEnd;
|
|
428
|
-
});
|
|
429
|
-
|
|
430
|
-
if (weekSessions.length >= 2) {
|
|
431
|
-
// Compute score for this week
|
|
432
|
-
const m = {
|
|
433
|
-
decomposition: computeDecomposition(weekSessions),
|
|
434
|
-
debugCycles: computeDebugCycles(weekSessions),
|
|
435
|
-
aiLeverage: computeAILeverage(weekSessions),
|
|
436
|
-
sessionStructure: computeSessionStructure(weekSessions),
|
|
437
|
-
};
|
|
438
|
-
const r = computeOverallScore(m);
|
|
439
|
-
const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
|
|
440
|
-
weeks.push({
|
|
441
|
-
label: `${months[weekStart.getMonth()]} ${weekStart.getDate()}-${weekEnd.getDate() - 1}`,
|
|
442
|
-
score: r.overall,
|
|
443
|
-
sessions: weekSessions.length,
|
|
444
|
-
});
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
weekStart = new Date(weekStart);
|
|
448
|
-
weekStart.setDate(weekStart.getDate() + 7);
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
if (weeks.length < 2) return null;
|
|
452
|
-
|
|
453
|
-
// Compute learning velocity
|
|
454
|
-
const firstScore = weeks[0].score;
|
|
455
|
-
const lastScore = weeks[weeks.length - 1].score;
|
|
456
|
-
const delta = lastScore - firstScore;
|
|
457
|
-
const weeksCount = weeks.length;
|
|
458
|
-
const velocityPerWeek = delta / weeksCount;
|
|
459
|
-
|
|
460
|
-
let velocityLabel;
|
|
461
|
-
if (velocityPerWeek > 3) velocityLabel = 'FAST';
|
|
462
|
-
else if (velocityPerWeek > 1) velocityLabel = 'STEADY';
|
|
463
|
-
else if (velocityPerWeek > -1) velocityLabel = 'STABLE';
|
|
464
|
-
else velocityLabel = 'DECLINING';
|
|
465
|
-
|
|
466
|
-
return {
|
|
467
|
-
weeks,
|
|
468
|
-
delta,
|
|
469
|
-
daysSpan: Math.round(daySpan),
|
|
470
|
-
velocityLabel,
|
|
471
|
-
velocityDetail: delta !== 0
|
|
472
|
-
? `${Math.abs(delta)} point ${delta > 0 ? 'improvement' : 'change'} over ${Math.round(daySpan)} days`
|
|
473
|
-
: `Stable over ${Math.round(daySpan)} days`,
|
|
474
|
-
};
|
|
475
|
-
}
|
|
476
|
-
|
|
477
|
-
// ══════════════════════════════════════════════
|
|
478
|
-
// PROJECT COMPLEXITY — What did they build?
|
|
479
|
-
// ══════════════════════════════════════════════
|
|
480
|
-
|
|
481
|
-
const complexitySignals = {
|
|
482
|
-
high: /\b(pipeline|distributed|real.?time|analytics|classification|machine learning|ml |auth|oauth|websocket|streaming|queue|worker|migration|microservice|kubernetes|docker|deployment|ci.?cd|infrastructure|database design|data model|schema design|api design|caching|rate limit)\b/i,
|
|
483
|
-
medium: /\b(api|crud|component|feature|integration|testing|refactor|database|query|endpoint|route|middleware|hook|state management|responsive|animation|chart|graph|dashboard)\b/i,
|
|
484
|
-
};
|
|
485
|
-
|
|
486
|
-
export function computeProjectComplexity(allSessions) {
|
|
487
|
-
const projectData = {};
|
|
488
|
-
|
|
489
|
-
for (const s of allSessions) {
|
|
490
|
-
const p = s.project || 'unknown';
|
|
491
|
-
if (!projectData[p]) {
|
|
492
|
-
projectData[p] = { sessions: 0, exchanges: 0, daysActive: new Set(), highSignals: new Set(), medSignals: new Set(), prompts: [] };
|
|
493
|
-
}
|
|
494
|
-
projectData[p].sessions++;
|
|
495
|
-
projectData[p].exchanges += s.exchangeCount;
|
|
496
|
-
if (s.startTime) {
|
|
497
|
-
projectData[p].daysActive.add(new Date(s.startTime).toISOString().split('T')[0]);
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
for (const ex of s.exchanges) {
|
|
501
|
-
const prompt = ex.userPrompt || '';
|
|
502
|
-
projectData[p].prompts.push(prompt);
|
|
503
|
-
|
|
504
|
-
// Extract complexity signals
|
|
505
|
-
const highMatches = prompt.match(complexitySignals.high);
|
|
506
|
-
const medMatches = prompt.match(complexitySignals.medium);
|
|
507
|
-
if (highMatches) {
|
|
508
|
-
for (const m of highMatches) projectData[p].highSignals.add(m.toLowerCase().trim());
|
|
509
|
-
}
|
|
510
|
-
if (medMatches) {
|
|
511
|
-
for (const m of medMatches) projectData[p].medSignals.add(m.toLowerCase().trim());
|
|
512
|
-
}
|
|
513
|
-
}
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
const projects = [];
|
|
517
|
-
for (const [name, data] of Object.entries(projectData)) {
|
|
518
|
-
if (data.exchanges < 3) continue; // Skip trivial projects
|
|
519
|
-
|
|
520
|
-
let complexity;
|
|
521
|
-
const signals = [...data.highSignals, ...data.medSignals].slice(0, 5);
|
|
522
|
-
if (data.highSignals.size >= 3 || (data.highSignals.size >= 1 && data.exchanges > 50)) {
|
|
523
|
-
complexity = 'HIGH';
|
|
524
|
-
} else if (data.medSignals.size >= 3 || data.highSignals.size >= 1 || data.exchanges > 30) {
|
|
525
|
-
complexity = 'MEDIUM';
|
|
526
|
-
} else {
|
|
527
|
-
complexity = 'LOW';
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
const shortName = name.length > 28 ? '...' + name.slice(-25) : name;
|
|
531
|
-
|
|
532
|
-
projects.push({
|
|
533
|
-
name: shortName,
|
|
534
|
-
complexity,
|
|
535
|
-
sessions: data.sessions,
|
|
536
|
-
exchanges: data.exchanges,
|
|
537
|
-
daysActive: data.daysActive.size,
|
|
538
|
-
signals,
|
|
539
|
-
});
|
|
540
|
-
}
|
|
541
|
-
|
|
542
|
-
// Sort by exchanges descending
|
|
543
|
-
projects.sort((a, b) => b.exchanges - a.exchanges);
|
|
544
|
-
return projects.slice(0, 5); // Top 5 projects
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
// ══════════════════════════════════════════════
|
|
548
|
-
// ASSESSMENT — Narrative paragraph
|
|
549
|
-
// ══════════════════════════════════════════════
|
|
550
|
-
|
|
551
|
-
export function generateAssessment(result, metrics, signatures, watchPoints) {
|
|
552
|
-
const { overall, scores, archetype, tier } = result;
|
|
553
|
-
const d = metrics.decomposition.details;
|
|
554
|
-
const db = metrics.debugCycles.details;
|
|
555
|
-
const ai = metrics.aiLeverage.details;
|
|
556
|
-
const ss = metrics.sessionStructure.details;
|
|
557
|
-
|
|
558
|
-
// Find strongest and weakest dimensions
|
|
559
|
-
const dims = [
|
|
560
|
-
{ key: 'decomposition', label: 'problem decomposition', score: scores.decomposition },
|
|
561
|
-
{ key: 'debugCycles', label: 'debugging efficiency', score: scores.debugCycles },
|
|
562
|
-
{ key: 'aiLeverage', label: 'AI leverage', score: scores.aiLeverage },
|
|
563
|
-
{ key: 'sessionStructure', label: 'workflow discipline', score: scores.sessionStructure },
|
|
564
|
-
];
|
|
565
|
-
dims.sort((a, b) => b.score - a.score);
|
|
566
|
-
const strongest = dims[0];
|
|
567
|
-
const weakest = dims[dims.length - 1];
|
|
568
|
-
|
|
569
|
-
// Build assessment parts
|
|
570
|
-
let assessment = `This engineer demonstrates ${dimQualitative(strongest.score).toLowerCase()} ${strongest.label}`;
|
|
571
|
-
|
|
572
|
-
// Add signature mention if available
|
|
573
|
-
if (signatures.length > 0) {
|
|
574
|
-
assessment += ` with a distinctive pattern of ${formatSignatureName(signatures[0].name)}`;
|
|
575
|
-
}
|
|
576
|
-
assessment += '.';
|
|
577
|
-
|
|
578
|
-
// Second sentence — second strength or debugging detail
|
|
579
|
-
if (dims[1].score >= 65) {
|
|
580
|
-
assessment += ` Their ${dims[1].label} is also ${dimQualitative(dims[1].score).toLowerCase()}`;
|
|
581
|
-
if (db.avgTurnsToResolve <= 2 && dims[1].key === 'debugCycles') {
|
|
582
|
-
assessment += ' \u2014 surgical and specific with ' + (db.longLoops === 0 ? 'zero' : 'minimal') + ' extended loops';
|
|
583
|
-
}
|
|
584
|
-
assessment += '.';
|
|
585
|
-
}
|
|
586
|
-
|
|
587
|
-
// Third sentence — growth area
|
|
588
|
-
if (weakest.score < 65) {
|
|
589
|
-
assessment += ` Primary growth opportunity is in ${weakest.label}`;
|
|
590
|
-
if (weakest.key === 'sessionStructure') {
|
|
591
|
-
assessment += ': context-setting and upfront planning are below benchmark';
|
|
592
|
-
if (ss.refinementRatio > 15) {
|
|
593
|
-
assessment += ', though iterative refinement partially compensates';
|
|
594
|
-
}
|
|
595
|
-
} else if (weakest.key === 'decomposition') {
|
|
596
|
-
assessment += ': more task breakdown and structured thinking would yield significant score improvement';
|
|
597
|
-
} else if (weakest.key === 'aiLeverage') {
|
|
598
|
-
assessment += ': using AI for architecture and planning, not just code generation, would increase impact';
|
|
599
|
-
} else {
|
|
600
|
-
assessment += ': stronger error reporting and systematic resolution would improve efficiency';
|
|
601
|
-
}
|
|
602
|
-
assessment += '.';
|
|
603
|
-
}
|
|
604
|
-
|
|
605
|
-
// Fourth sentence — best for
|
|
606
|
-
assessment += ' ' + archetype.bestFor;
|
|
607
|
-
|
|
608
|
-
return assessment;
|
|
609
|
-
}
|
|
610
|
-
|
|
611
|
-
function dimQualitative(score) {
|
|
612
|
-
if (score >= 80) return 'Exceptional';
|
|
613
|
-
if (score >= 65) return 'Strong';
|
|
614
|
-
if (score >= 50) return 'Solid';
|
|
615
|
-
if (score >= 35) return 'Developing';
|
|
616
|
-
return 'Early-stage';
|
|
617
|
-
}
|
|
618
|
-
|
|
619
|
-
// Lowercase a signature name for prose while preserving acronyms like "AI", "TDD"
|
|
620
|
-
function formatSignatureName(name) {
|
|
621
|
-
return name
|
|
622
|
-
.toLowerCase()
|
|
623
|
-
.replace(/\bai\b/g, 'AI')
|
|
624
|
-
.replace(/\btdd\b/g, 'TDD');
|
|
625
|
-
}
|
|
626
|
-
|
|
627
|
-
// ══════════════════════════════════════════════
|
|
628
|
-
// CONFIDENCE — Data volume indicator
|
|
629
|
-
// ══════════════════════════════════════════════
|
|
630
|
-
|
|
631
|
-
export function computeConfidence(sessionStats) {
|
|
632
|
-
const { totalSessions, totalExchanges, tools } = sessionStats;
|
|
633
|
-
const toolCount = tools.length;
|
|
634
|
-
|
|
635
|
-
// Score confidence on sessions, exchanges, and tool diversity
|
|
636
|
-
let score = 0;
|
|
637
|
-
if (totalSessions >= 50) score += 40;
|
|
638
|
-
else if (totalSessions >= 20) score += 30;
|
|
639
|
-
else if (totalSessions >= 10) score += 20;
|
|
640
|
-
else score += 10;
|
|
641
|
-
|
|
642
|
-
if (totalExchanges >= 500) score += 30;
|
|
643
|
-
else if (totalExchanges >= 200) score += 20;
|
|
644
|
-
else if (totalExchanges >= 50) score += 10;
|
|
645
|
-
|
|
646
|
-
if (toolCount >= 3) score += 20;
|
|
647
|
-
else if (toolCount >= 2) score += 15;
|
|
648
|
-
else score += 10;
|
|
649
|
-
|
|
650
|
-
// Bonus for enough data
|
|
651
|
-
if (totalSessions >= 30 && totalExchanges >= 300) score += 10;
|
|
652
|
-
|
|
653
|
-
score = Math.min(100, score);
|
|
654
|
-
|
|
655
|
-
let level;
|
|
656
|
-
if (score >= 80) level = 'HIGH';
|
|
657
|
-
else if (score >= 50) level = 'MODERATE';
|
|
658
|
-
else level = 'LOW';
|
|
659
|
-
|
|
660
|
-
return { score, level };
|
|
661
|
-
}
|