chekk 0.5.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/insights.js DELETED
@@ -1,661 +0,0 @@
1
- /**
2
- * Insights Engine
3
- *
4
- * Computes higher-order analysis from raw metrics and sessions:
5
- * - Signatures: distinctive patterns that make an engineer unique
6
- * - Watch Points: anti-patterns and areas for improvement
7
- * - Trajectory: weekly score evolution over time
8
- * - Project Complexity: classification of project sophistication
9
- * - Assessment: narrative paragraph for the engineer's profile
10
- * - Confidence: statistical confidence based on data volume
11
- */
12
-
13
- import { computeDecomposition } from './metrics/decomposition.js';
14
- import { computeDebugCycles } from './metrics/debug-cycles.js';
15
- import { computeAILeverage } from './metrics/ai-leverage.js';
16
- import { computeSessionStructure } from './metrics/session-structure.js';
17
- import { computeOverallScore } from './scorer.js';
18
-
19
- // ── Benchmarks (early-stage estimates, refined as data grows) ──
20
- export const BENCHMARKS = {
21
- avgExchangesPerSession: 34.2,
22
- avgPromptLength: 187,
23
- avgTurnsToResolve: 3.8,
24
- specificReportRatio: 62,
25
- highLevelRatio: 18,
26
- contextSetRatio: 35,
27
- refinementRatio: 15,
28
- reviewEndRatio: 28,
29
- };
30
-
31
- // ── Dimension score ranges (observed distribution) ──
32
- export const DIM_RANGES = {
33
- decomposition: { min: 15, max: 95 },
34
- debugCycles: { min: 20, max: 98 },
35
- aiLeverage: { min: 10, max: 92 },
36
- sessionStructure: { min: 12, max: 88 },
37
- };
38
-
39
- // ══════════════════════════════════════════════
40
- // SIGNATURES — Distinctive patterns
41
- // ══════════════════════════════════════════════
42
-
43
- const constraintPatterns = /\b(don'?t|do not|never|avoid|without|no |not |shouldn'?t|must not|skip|exclude)\b/i;
44
- const preflightPatterns = /^(before (we|you|i)|don'?t code|review (first|this|my|the plan)|let'?s (think|plan|discuss)|check my (approach|plan|thinking))/i;
45
- const testFirstPatterns = /\b(write (the )?tests? (first|before)|test.?driven|TDD|spec first|start with (tests?|specs?))\b/i;
46
- const negativeConstraintPatterns = /\b(don'?t|do not|never|avoid|must not|shouldn'?t)\b.*\b(add|create|use|include|change|modify|touch|remove)\b/i;
47
-
48
- // Number formatting for insights text
49
- function numberFormatInsight(n) {
50
- if (n >= 1_000_000) return (n / 1_000_000).toFixed(1).replace(/\.0$/, '') + 'M';
51
- if (n >= 1000) return (n / 1000).toFixed(1).replace(/\.0$/, '') + 'k';
52
- return String(n);
53
- }
54
-
55
- // Evidence quality filter (same rules as metric parsers)
56
- const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
57
- function isGoodEvidence(prompt) {
58
- if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
59
- if (noisePatterns.test(prompt)) return false;
60
- const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
61
- if (alpha / prompt.length < 0.4) return false;
62
- return true;
63
- }
64
-
65
- export function computeSignatures(allSessions, metrics, tokenEfficiency = null) {
66
- const signatures = [];
67
- const d = metrics.decomposition.details;
68
- const db = metrics.debugCycles.details;
69
- const ai = metrics.aiLeverage.details;
70
- const ss = metrics.sessionStructure.details;
71
-
72
- let totalPrompts = 0;
73
- let constraintPrompts = 0;
74
- let preflightSessions = 0;
75
- let testFirstSessions = 0;
76
- let modificationCount = 0;
77
- let acceptCount = 0;
78
-
79
- // Capture evidence prompts for each signature type
80
- let bestPreflightPrompt = null;
81
- let bestConstraintPrompt = null;
82
- let bestTestFirstPrompt = null;
83
- let bestModifyPrompt = null;
84
-
85
- for (const session of allSessions) {
86
- const { exchanges } = session;
87
- if (exchanges.length === 0) continue;
88
-
89
- // Check first prompt for preflight review
90
- const firstPrompt = exchanges[0].userPrompt || '';
91
- if (preflightPatterns.test(firstPrompt)) {
92
- preflightSessions++;
93
- if (isGoodEvidence(firstPrompt) && (!bestPreflightPrompt || firstPrompt.length > bestPreflightPrompt.length)) {
94
- bestPreflightPrompt = firstPrompt;
95
- }
96
- }
97
-
98
- let hasTestFirst = false;
99
- for (let i = 0; i < exchanges.length; i++) {
100
- const prompt = exchanges[i].userPrompt || '';
101
- totalPrompts++;
102
-
103
- if (constraintPatterns.test(prompt) && negativeConstraintPatterns.test(prompt)) {
104
- constraintPrompts++;
105
- if (isGoodEvidence(prompt) && (!bestConstraintPrompt || prompt.length > bestConstraintPrompt.length)) {
106
- bestConstraintPrompt = prompt;
107
- }
108
- }
109
-
110
- if (testFirstPatterns.test(prompt)) {
111
- hasTestFirst = true;
112
- if (isGoodEvidence(prompt) && (!bestTestFirstPrompt || prompt.length > bestTestFirstPrompt.length)) {
113
- bestTestFirstPrompt = prompt;
114
- }
115
- }
116
-
117
- // Track modification vs acceptance
118
- if (i > 0 && /\b(actually|wait|instead|change|no,?|not quite|modify|tweak)\b/i.test(prompt)) {
119
- modificationCount++;
120
- if (isGoodEvidence(prompt) && (!bestModifyPrompt || prompt.length > bestModifyPrompt.length)) {
121
- bestModifyPrompt = prompt;
122
- }
123
- } else if (i > 0) {
124
- acceptCount++;
125
- }
126
- }
127
- if (hasTestFirst) testFirstSessions++;
128
- }
129
-
130
- const sessionsWithExchanges = allSessions.filter(s => s.exchanges.length > 0).length;
131
-
132
- // Pre-flight reviews
133
- const preflightRatio = sessionsWithExchanges > 0 ? preflightSessions / sessionsWithExchanges : 0;
134
- if (preflightRatio > 0.15 && preflightSessions >= 3) {
135
- signatures.push({
136
- name: 'Pre-flight reviews',
137
- detail: `You ask AI to review your plan before coding in ${Math.round(preflightRatio * 100)}% of sessions. Only 8% of engineers do this consistently. This correlates with fewer debug cycles.`,
138
- evidence: bestPreflightPrompt,
139
- });
140
- }
141
-
142
- // Constraint-first prompting
143
- const constraintRatio = totalPrompts > 0 ? constraintPrompts / totalPrompts : 0;
144
- if (constraintRatio > 0.1 && constraintPrompts >= 5) {
145
- signatures.push({
146
- name: 'Constraint-first prompting',
147
- detail: `You specify what NOT to do in ${Math.round(constraintRatio * 100)}% of prompts. This is a hallmark of senior architectural thinking that prevents scope creep.`,
148
- evidence: bestConstraintPrompt,
149
- });
150
- }
151
-
152
- // Test-driven AI usage
153
- const testFirstRatio = sessionsWithExchanges > 0 ? testFirstSessions / sessionsWithExchanges : 0;
154
- if (testFirstRatio > 0.05 && testFirstSessions >= 2) {
155
- signatures.push({
156
- name: 'Test-driven AI usage',
157
- detail: `You request tests before implementation in ${Math.round(testFirstRatio * 100)}% of sessions. Engineers who do this ship fewer bugs post-merge.`,
158
- evidence: bestTestFirstPrompt,
159
- });
160
- }
161
-
162
- // Deep session marathons — evidence is metric-derived, no single prompt
163
- if (d.avgExchangesPerSession > BENCHMARKS.avgExchangesPerSession * 2) {
164
- signatures.push({
165
- name: 'Marathon sessions',
166
- detail: `Avg session depth of ${d.avgExchangesPerSession} exchanges is ${Math.round(d.avgExchangesPerSession / BENCHMARKS.avgExchangesPerSession)}x the benchmark (${BENCHMARKS.avgExchangesPerSession}). You sustain deep, focused work.`,
167
- evidence: null,
168
- });
169
- }
170
-
171
- // Zero vague debugging — evidence is the absence of something
172
- if (db.vagueReports === 0 && db.totalDebugSequences > 5) {
173
- signatures.push({
174
- name: 'Precision debugging',
175
- detail: `Zero vague error reports across ${db.totalDebugSequences} debug sequences. Every bug report includes specific context. This is rare.`,
176
- evidence: metrics.debugCycles.examples?.[0]?.prompt || null,
177
- });
178
- }
179
-
180
- // High architectural ratio
181
- if (ai.highLevelRatio > 30) {
182
- signatures.push({
183
- name: 'Strategic AI usage',
184
- detail: `${ai.highLevelRatio}% of prompts are architectural or planning-level (benchmark: ${BENCHMARKS.highLevelRatio}%). You use AI as a thinking partner, not just a code generator.`,
185
- evidence: metrics.aiLeverage.examples?.[0]?.prompt || null,
186
- });
187
- }
188
-
189
- // Critical reviewer
190
- const totalFollowups = modificationCount + acceptCount;
191
- const modRatio = totalFollowups > 0 ? modificationCount / totalFollowups : 0;
192
- if (modRatio > 0.25 && modificationCount > 10) {
193
- signatures.push({
194
- name: 'Critical reviewer',
195
- detail: `You modify or redirect AI output in ${Math.round(modRatio * 100)}% of follow-up prompts. This indicates active evaluation rather than passive acceptance.`,
196
- evidence: bestModifyPrompt,
197
- });
198
- }
199
-
200
- // ── Token-backed signature: efficient token usage ──
201
- if (tokenEfficiency && tokenEfficiency.hasData) {
202
- const te = tokenEfficiency;
203
- // If context re-read ratio is below 90%, that's notably efficient
204
- if (te.contextRereadRatio < 0.90 && te.sessionsAnalyzed >= 5) {
205
- signatures.push({
206
- name: 'Token-efficient prompting',
207
- detail: `Only ${Math.round(te.contextRereadRatio * 100)}% of your tokens are context re-reads (typical: 95%+). Your focused sessions and clear prompts minimize wasted tokens. Estimated spend: $${te.estimatedCostTotal.toFixed(2)}.`,
208
- evidence: null,
209
- });
210
- }
211
- }
212
-
213
- return signatures.slice(0, 4); // Max 4 signatures
214
- }
215
-
216
- // ══════════════════════════════════════════════
217
- // WATCH POINTS — Anti-patterns
218
- // ══════════════════════════════════════════════
219
-
220
- export function computeWatchPoints(allSessions, metrics, tokenEfficiency = null) {
221
- const watchPoints = [];
222
- const d = metrics.decomposition.details;
223
- const db = metrics.debugCycles.details;
224
- const ai = metrics.aiLeverage.details;
225
- const ss = metrics.sessionStructure.details;
226
-
227
- // Context amnesia — restarting from scratch on same project
228
- const projectSessions = {};
229
- for (const s of allSessions) {
230
- const p = s.project || 'unknown';
231
- if (!projectSessions[p]) projectSessions[p] = [];
232
- projectSessions[p].push(s);
233
- }
234
- let contextRestarts = 0;
235
- let bestContextRestartPrompt = null;
236
- let multiSessionProjects = 0;
237
- for (const [, sessions] of Object.entries(projectSessions)) {
238
- if (sessions.length < 2) continue;
239
- multiSessionProjects++;
240
- for (let i = 1; i < sessions.length; i++) {
241
- const firstPrompt = sessions[i].exchanges[0]?.userPrompt || '';
242
- // If first prompt doesn't reference previous work, it's a context restart
243
- if (firstPrompt.length > 50 && !/\b(continuing|following up|as discussed|last time|previously|where we left|earlier)\b/i.test(firstPrompt)) {
244
- contextRestarts++;
245
- if (isGoodEvidence(firstPrompt) && (!bestContextRestartPrompt || firstPrompt.length > bestContextRestartPrompt.length)) {
246
- bestContextRestartPrompt = firstPrompt;
247
- }
248
- }
249
- }
250
- }
251
- const totalFollowupSessions = Object.values(projectSessions).reduce((sum, s) => sum + Math.max(0, s.length - 1), 0);
252
- if (totalFollowupSessions > 3 && contextRestarts / totalFollowupSessions > 0.5) {
253
- // Estimate cost of context restarts — each restart forces full context re-read
254
- let costNote = '';
255
- if (tokenEfficiency && tokenEfficiency.hasData) {
256
- const avgCostPerSession = tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.sessionsAnalyzed);
257
- // The re-read cost of each restart scales with context size — estimate as % of avg session cost
258
- const rereadPct = tokenEfficiency.contextRereadRatio || 0.94;
259
- const wastedPerRestart = avgCostPerSession * rereadPct * 0.3; // ~30% of re-read is avoidable with context continuity
260
- const totalWasted = wastedPerRestart * contextRestarts;
261
- if (totalWasted > 1) {
262
- costNote = ` Estimated wasted re-read cost: ~$${totalWasted.toFixed(0)}.`;
263
- }
264
- }
265
- watchPoints.push({
266
- name: 'Context amnesia',
267
- detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient.${costNote}`,
268
- evidence: bestContextRestartPrompt,
269
- });
270
- }
271
-
272
- // Low modification rate — accepting AI output without review
273
- let modCount = 0;
274
- let followupCount = 0;
275
- for (const session of allSessions) {
276
- for (let i = 1; i < session.exchanges.length; i++) {
277
- followupCount++;
278
- const prompt = session.exchanges[i].userPrompt || '';
279
- if (/\b(actually|wait|instead|change|no,?|not quite|modify|tweak|hmm|but )\b/i.test(prompt)) {
280
- modCount++;
281
- }
282
- }
283
- }
284
- const modRatio = followupCount > 10 ? modCount / followupCount : 0.5;
285
- if (modRatio < 0.15 && followupCount > 20) {
286
- watchPoints.push({
287
- name: 'Acceptance without review',
288
- detail: `You accept AI output without modification in ${Math.round((1 - modRatio) * 100)}% of cases. Top engineers modify or redirect 30%+ of initial suggestions.`,
289
- evidence: null, // Anti-pattern is the absence of modification
290
- });
291
- }
292
-
293
- // Monologue prompting — excessively long first prompts
294
- if (d.avgPromptLength > 2000) {
295
- // Find a representative long prompt
296
- let bestLongPrompt = null;
297
- for (const s of allSessions) {
298
- for (const ex of s.exchanges) {
299
- const p = ex.userPrompt || '';
300
- if (p.length > 1500 && p.length < 3000 && isGoodEvidence(p)) {
301
- if (!bestLongPrompt || p.length > bestLongPrompt.length) bestLongPrompt = p;
302
- }
303
- }
304
- }
305
- // Dollar annotation: long prompts trigger large context re-reads each time
306
- let monologueCostNote = '';
307
- if (tokenEfficiency && tokenEfficiency.hasData) {
308
- // Compare avg tokens for long vs short prompts from the prompt length analysis
309
- const longBucket = tokenEfficiency.promptLengthAnalysis.find(b => b.label === '500+ chars');
310
- const shortBucket = tokenEfficiency.promptLengthAnalysis.find(b => b.label === '20-100 chars');
311
- if (longBucket && shortBucket && longBucket.avgCost && shortBucket.avgCost) {
312
- const ratio = (longBucket.avgCost / shortBucket.avgCost).toFixed(1);
313
- monologueCostNote = ` Long prompts cost ${ratio}x more per exchange ($${longBucket.avgCost.toFixed(2)} vs $${shortBucket.avgCost.toFixed(2)}).`;
314
- }
315
- }
316
- watchPoints.push({
317
- name: 'Monologue prompting',
318
- detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output.${monologueCostNote}`,
319
- evidence: bestLongPrompt,
320
- });
321
- }
322
-
323
- // Low context-setting
324
- if (ss.contextSetRatio < 20) {
325
- let contextCostNote = '';
326
- if (ss.tokenEvidence?.avgTokensPerExchangeWithContext && ss.tokenEvidence?.avgTokensPerExchangeNoContext) {
327
- const pricing = tokenEfficiency && tokenEfficiency.hasData
328
- ? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
329
- if (pricing > 0) {
330
- const withCtx = ss.tokenEvidence.avgTokensPerExchangeWithContext * pricing;
331
- const noCtx = ss.tokenEvidence.avgTokensPerExchangeNoContext * pricing;
332
- if (noCtx > withCtx * 1.1) {
333
- contextCostNote = ` Sessions without context cost $${noCtx.toFixed(2)}/exchange vs $${withCtx.toFixed(2)} with context.`;
334
- }
335
- }
336
- }
337
- watchPoints.push({
338
- name: 'Missing context',
339
- detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections.${contextCostNote}`,
340
- evidence: null,
341
- });
342
- }
343
-
344
- // Extended debug spirals
345
- if (db.longLoops > 2) {
346
- let loopCostStr = '';
347
- if (db.tokenEvidence?.avgTokensLongLoop) {
348
- const pricing = tokenEfficiency && tokenEfficiency.hasData
349
- ? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
350
- if (pricing > 0) {
351
- const costPerLoop = db.tokenEvidence.avgTokensLongLoop * pricing;
352
- loopCostStr = ` Each spiral costs ~$${costPerLoop.toFixed(2)}.`;
353
- } else {
354
- loopCostStr = ` Each spiral averages ${numberFormatInsight(db.tokenEvidence.avgTokensLongLoop)} tokens.`;
355
- }
356
- }
357
- watchPoints.push({
358
- name: 'Debug spirals',
359
- detail: `${db.longLoops} extended debug loops (>5 turns) detected.${loopCostStr} When stuck, try providing more specific error context or breaking the problem differently.`,
360
- evidence: null,
361
- });
362
- }
363
-
364
- // ── Token-backed watch points ──
365
- if (tokenEfficiency && tokenEfficiency.hasData) {
366
- const te = tokenEfficiency;
367
-
368
- // Marathon sessions burning disproportionate tokens
369
- const marathonSessions = te.costliestSessions.filter(s => s.exchanges > 50);
370
- if (marathonSessions.length >= 2) {
371
- const marathonCost = marathonSessions.reduce((s, m) => s + m.estimatedCost, 0);
372
- const marathonPct = te.estimatedCostTotal > 0 ? Math.round(marathonCost / te.estimatedCostTotal * 100) : 0;
373
- if (marathonPct > 40) {
374
- watchPoints.push({
375
- name: 'Marathon session tax',
376
- detail: `${marathonSessions.length} marathon sessions (50+ exchanges) consumed ~${marathonPct}% of your total spend (~$${marathonCost.toFixed(2)}). Context compounds — splitting into focused sessions would reduce token waste.`,
377
- evidence: null,
378
- });
379
- }
380
- }
381
-
382
- // Vague prompts costing more than specific ones
383
- const vagueAvg = db.tokenEvidence?.avgTokensVagueDebug;
384
- const specificAvg = db.tokenEvidence?.avgTokensSpecificDebug;
385
- if (vagueAvg && specificAvg && vagueAvg > specificAvg * 1.5 && db.vagueReports > 3) {
386
- watchPoints.push({
387
- name: 'Vague prompts are expensive',
388
- detail: `Your vague debug prompts average ${numberFormatInsight(vagueAvg)} tokens vs ${numberFormatInsight(specificAvg)} for specific ones — ${(vagueAvg / specificAvg).toFixed(1)}x more expensive. Adding error details upfront saves real money.`,
389
- evidence: null,
390
- });
391
- }
392
- }
393
-
394
- return watchPoints.slice(0, 4); // Max 4 watch points (was 3, expanded for token insights)
395
- }
396
-
397
- // ══════════════════════════════════════════════
398
- // TRAJECTORY — Weekly score evolution
399
- // ══════════════════════════════════════════════
400
-
401
- export function computeTrajectory(allSessions) {
402
- // Group sessions by week
403
- const sessionsWithTime = allSessions.filter(s => s.startTime);
404
- if (sessionsWithTime.length < 5) return null;
405
-
406
- sessionsWithTime.sort((a, b) => new Date(a.startTime) - new Date(b.startTime));
407
-
408
- const firstDate = new Date(sessionsWithTime[0].startTime);
409
- const lastDate = new Date(sessionsWithTime[sessionsWithTime.length - 1].startTime);
410
-
411
- // Need at least 2 weeks of data
412
- const daySpan = (lastDate - firstDate) / (1000 * 60 * 60 * 24);
413
- if (daySpan < 10) return null;
414
-
415
- // Create weekly buckets
416
- const weeks = [];
417
- let weekStart = new Date(firstDate);
418
- weekStart.setHours(0, 0, 0, 0);
419
- // Align to Monday
420
- weekStart.setDate(weekStart.getDate() - weekStart.getDay() + 1);
421
-
422
- while (weekStart <= lastDate) {
423
- const weekEnd = new Date(weekStart);
424
- weekEnd.setDate(weekEnd.getDate() + 7);
425
- const weekSessions = sessionsWithTime.filter(s => {
426
- const t = new Date(s.startTime);
427
- return t >= weekStart && t < weekEnd;
428
- });
429
-
430
- if (weekSessions.length >= 2) {
431
- // Compute score for this week
432
- const m = {
433
- decomposition: computeDecomposition(weekSessions),
434
- debugCycles: computeDebugCycles(weekSessions),
435
- aiLeverage: computeAILeverage(weekSessions),
436
- sessionStructure: computeSessionStructure(weekSessions),
437
- };
438
- const r = computeOverallScore(m);
439
- const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
440
- weeks.push({
441
- label: `${months[weekStart.getMonth()]} ${weekStart.getDate()}-${weekEnd.getDate() - 1}`,
442
- score: r.overall,
443
- sessions: weekSessions.length,
444
- });
445
- }
446
-
447
- weekStart = new Date(weekStart);
448
- weekStart.setDate(weekStart.getDate() + 7);
449
- }
450
-
451
- if (weeks.length < 2) return null;
452
-
453
- // Compute learning velocity
454
- const firstScore = weeks[0].score;
455
- const lastScore = weeks[weeks.length - 1].score;
456
- const delta = lastScore - firstScore;
457
- const weeksCount = weeks.length;
458
- const velocityPerWeek = delta / weeksCount;
459
-
460
- let velocityLabel;
461
- if (velocityPerWeek > 3) velocityLabel = 'FAST';
462
- else if (velocityPerWeek > 1) velocityLabel = 'STEADY';
463
- else if (velocityPerWeek > -1) velocityLabel = 'STABLE';
464
- else velocityLabel = 'DECLINING';
465
-
466
- return {
467
- weeks,
468
- delta,
469
- daysSpan: Math.round(daySpan),
470
- velocityLabel,
471
- velocityDetail: delta !== 0
472
- ? `${Math.abs(delta)} point ${delta > 0 ? 'improvement' : 'change'} over ${Math.round(daySpan)} days`
473
- : `Stable over ${Math.round(daySpan)} days`,
474
- };
475
- }
476
-
477
- // ══════════════════════════════════════════════
478
- // PROJECT COMPLEXITY — What did they build?
479
- // ══════════════════════════════════════════════
480
-
481
- const complexitySignals = {
482
- high: /\b(pipeline|distributed|real.?time|analytics|classification|machine learning|ml |auth|oauth|websocket|streaming|queue|worker|migration|microservice|kubernetes|docker|deployment|ci.?cd|infrastructure|database design|data model|schema design|api design|caching|rate limit)\b/i,
483
- medium: /\b(api|crud|component|feature|integration|testing|refactor|database|query|endpoint|route|middleware|hook|state management|responsive|animation|chart|graph|dashboard)\b/i,
484
- };
485
-
486
- export function computeProjectComplexity(allSessions) {
487
- const projectData = {};
488
-
489
- for (const s of allSessions) {
490
- const p = s.project || 'unknown';
491
- if (!projectData[p]) {
492
- projectData[p] = { sessions: 0, exchanges: 0, daysActive: new Set(), highSignals: new Set(), medSignals: new Set(), prompts: [] };
493
- }
494
- projectData[p].sessions++;
495
- projectData[p].exchanges += s.exchangeCount;
496
- if (s.startTime) {
497
- projectData[p].daysActive.add(new Date(s.startTime).toISOString().split('T')[0]);
498
- }
499
-
500
- for (const ex of s.exchanges) {
501
- const prompt = ex.userPrompt || '';
502
- projectData[p].prompts.push(prompt);
503
-
504
- // Extract complexity signals
505
- const highMatches = prompt.match(complexitySignals.high);
506
- const medMatches = prompt.match(complexitySignals.medium);
507
- if (highMatches) {
508
- for (const m of highMatches) projectData[p].highSignals.add(m.toLowerCase().trim());
509
- }
510
- if (medMatches) {
511
- for (const m of medMatches) projectData[p].medSignals.add(m.toLowerCase().trim());
512
- }
513
- }
514
- }
515
-
516
- const projects = [];
517
- for (const [name, data] of Object.entries(projectData)) {
518
- if (data.exchanges < 3) continue; // Skip trivial projects
519
-
520
- let complexity;
521
- const signals = [...data.highSignals, ...data.medSignals].slice(0, 5);
522
- if (data.highSignals.size >= 3 || (data.highSignals.size >= 1 && data.exchanges > 50)) {
523
- complexity = 'HIGH';
524
- } else if (data.medSignals.size >= 3 || data.highSignals.size >= 1 || data.exchanges > 30) {
525
- complexity = 'MEDIUM';
526
- } else {
527
- complexity = 'LOW';
528
- }
529
-
530
- const shortName = name.length > 28 ? '...' + name.slice(-25) : name;
531
-
532
- projects.push({
533
- name: shortName,
534
- complexity,
535
- sessions: data.sessions,
536
- exchanges: data.exchanges,
537
- daysActive: data.daysActive.size,
538
- signals,
539
- });
540
- }
541
-
542
- // Sort by exchanges descending
543
- projects.sort((a, b) => b.exchanges - a.exchanges);
544
- return projects.slice(0, 5); // Top 5 projects
545
- }
546
-
547
- // ══════════════════════════════════════════════
548
- // ASSESSMENT — Narrative paragraph
549
- // ══════════════════════════════════════════════
550
-
551
- export function generateAssessment(result, metrics, signatures, watchPoints) {
552
- const { overall, scores, archetype, tier } = result;
553
- const d = metrics.decomposition.details;
554
- const db = metrics.debugCycles.details;
555
- const ai = metrics.aiLeverage.details;
556
- const ss = metrics.sessionStructure.details;
557
-
558
- // Find strongest and weakest dimensions
559
- const dims = [
560
- { key: 'decomposition', label: 'problem decomposition', score: scores.decomposition },
561
- { key: 'debugCycles', label: 'debugging efficiency', score: scores.debugCycles },
562
- { key: 'aiLeverage', label: 'AI leverage', score: scores.aiLeverage },
563
- { key: 'sessionStructure', label: 'workflow discipline', score: scores.sessionStructure },
564
- ];
565
- dims.sort((a, b) => b.score - a.score);
566
- const strongest = dims[0];
567
- const weakest = dims[dims.length - 1];
568
-
569
- // Build assessment parts
570
- let assessment = `This engineer demonstrates ${dimQualitative(strongest.score).toLowerCase()} ${strongest.label}`;
571
-
572
- // Add signature mention if available
573
- if (signatures.length > 0) {
574
- assessment += ` with a distinctive pattern of ${formatSignatureName(signatures[0].name)}`;
575
- }
576
- assessment += '.';
577
-
578
- // Second sentence — second strength or debugging detail
579
- if (dims[1].score >= 65) {
580
- assessment += ` Their ${dims[1].label} is also ${dimQualitative(dims[1].score).toLowerCase()}`;
581
- if (db.avgTurnsToResolve <= 2 && dims[1].key === 'debugCycles') {
582
- assessment += ' \u2014 surgical and specific with ' + (db.longLoops === 0 ? 'zero' : 'minimal') + ' extended loops';
583
- }
584
- assessment += '.';
585
- }
586
-
587
- // Third sentence — growth area
588
- if (weakest.score < 65) {
589
- assessment += ` Primary growth opportunity is in ${weakest.label}`;
590
- if (weakest.key === 'sessionStructure') {
591
- assessment += ': context-setting and upfront planning are below benchmark';
592
- if (ss.refinementRatio > 15) {
593
- assessment += ', though iterative refinement partially compensates';
594
- }
595
- } else if (weakest.key === 'decomposition') {
596
- assessment += ': more task breakdown and structured thinking would yield significant score improvement';
597
- } else if (weakest.key === 'aiLeverage') {
598
- assessment += ': using AI for architecture and planning, not just code generation, would increase impact';
599
- } else {
600
- assessment += ': stronger error reporting and systematic resolution would improve efficiency';
601
- }
602
- assessment += '.';
603
- }
604
-
605
- // Fourth sentence — best for
606
- assessment += ' ' + archetype.bestFor;
607
-
608
- return assessment;
609
- }
610
-
611
- function dimQualitative(score) {
612
- if (score >= 80) return 'Exceptional';
613
- if (score >= 65) return 'Strong';
614
- if (score >= 50) return 'Solid';
615
- if (score >= 35) return 'Developing';
616
- return 'Early-stage';
617
- }
618
-
619
- // Lowercase a signature name for prose while preserving acronyms like "AI", "TDD"
620
- function formatSignatureName(name) {
621
- return name
622
- .toLowerCase()
623
- .replace(/\bai\b/g, 'AI')
624
- .replace(/\btdd\b/g, 'TDD');
625
- }
626
-
627
- // ══════════════════════════════════════════════
628
- // CONFIDENCE — Data volume indicator
629
- // ══════════════════════════════════════════════
630
-
631
- export function computeConfidence(sessionStats) {
632
- const { totalSessions, totalExchanges, tools } = sessionStats;
633
- const toolCount = tools.length;
634
-
635
- // Score confidence on sessions, exchanges, and tool diversity
636
- let score = 0;
637
- if (totalSessions >= 50) score += 40;
638
- else if (totalSessions >= 20) score += 30;
639
- else if (totalSessions >= 10) score += 20;
640
- else score += 10;
641
-
642
- if (totalExchanges >= 500) score += 30;
643
- else if (totalExchanges >= 200) score += 20;
644
- else if (totalExchanges >= 50) score += 10;
645
-
646
- if (toolCount >= 3) score += 20;
647
- else if (toolCount >= 2) score += 15;
648
- else score += 10;
649
-
650
- // Bonus for enough data
651
- if (totalSessions >= 30 && totalExchanges >= 300) score += 10;
652
-
653
- score = Math.min(100, score);
654
-
655
- let level;
656
- if (score >= 80) level = 'HIGH';
657
- else if (score >= 50) level = 'MODERATE';
658
- else level = 'LOW';
659
-
660
- return { score, level };
661
- }