chekk 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/chekk.js CHANGED
@@ -4,7 +4,7 @@ import { execSync, spawn } from 'child_process';
4
4
  import { Command } from 'commander';
5
5
  import { run } from '../src/index.js';
6
6
 
7
- const LOCAL_VERSION = '0.4.0';
7
+ const LOCAL_VERSION = '0.4.2';
8
8
 
9
9
  // ── Auto-update check ──
10
10
  // If running from a cached npx install, check if there's a newer version
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "chekk",
3
- "version": "0.4.0",
3
+ "version": "0.4.2",
4
4
  "description": "See how you prompt. Chekk analyzes your AI coding workflow and tells you what kind of engineer you are.",
5
5
  "bin": {
6
6
  "chekk": "./bin/chekk.js"
package/src/display.js CHANGED
@@ -41,9 +41,28 @@ function numberFormat(n) {
41
41
  return String(n);
42
42
  }
43
43
 
44
+ // Measure visible display width accounting for wide characters (emoji, CJK)
45
+ function visibleWidth(str) {
46
+ const stripped = str.replace(/\u001b\[[0-9;]*m/g, '');
47
+ let width = 0;
48
+ for (const ch of stripped) {
49
+ const code = ch.codePointAt(0);
50
+ // Emoji and symbols that take 2 terminal columns
51
+ if (code > 0x1F000 || // emoji block
52
+ (code >= 0x2600 && code <= 0x27BF) || // misc symbols
53
+ (code >= 0x2B50 && code <= 0x2B55) || // stars
54
+ (code >= 0xFE00 && code <= 0xFE0F) || // variation selectors
55
+ (code >= 0x1F300 && code <= 0x1FAFF)) { // extended emoji
56
+ width += 2;
57
+ } else {
58
+ width += 1;
59
+ }
60
+ }
61
+ return width;
62
+ }
63
+
44
64
  function pad(str, len) {
45
- const visible = str.replace(/\u001b\[[0-9;]*m/g, '');
46
- return str + ' '.repeat(Math.max(0, len - visible.length));
65
+ return str + ' '.repeat(Math.max(0, len - visibleWidth(str)));
47
66
  }
48
67
 
49
68
  // ── Qualitative tier labels for dimensions ──
@@ -65,25 +84,50 @@ function dimTierColor(score) {
65
84
 
66
85
  // ── Snippet helpers ──
67
86
 
68
- function cleanPrompt(prompt, maxLen = 120) {
87
+ function cleanPrompt(prompt) {
69
88
  if (!prompt) return null;
70
- let clean = prompt.replace(/\s+/g, ' ').trim();
71
- if (clean.length > maxLen) {
72
- clean = clean.slice(0, maxLen - 1) + '\u2026';
73
- }
74
- return clean;
89
+ return prompt.replace(/\s+/g, ' ').trim();
75
90
  }
76
91
 
77
- function displayLabeledSnippet(label, prompt, maxLen = 120) {
78
- const s = cleanPrompt(prompt, maxLen);
92
+ function displayLabeledSnippet(label, prompt) {
93
+ const s = cleanPrompt(prompt);
79
94
  if (!s) return;
80
- console.log(` ${dim('\u21B3')} ${dim(label + ':')} ${dim.italic('\u201C' + s + '\u201D')}`);
95
+ // Wrap the full prompt across multiple lines instead of truncating
96
+ const prefix = `${dim('\u21B3')} ${dim(label + ':')} `;
97
+ const quoted = `\u201C${s}\u201D`;
98
+ const lines = wrapText(quoted, 53);
99
+ console.log(` ${prefix}${dim.italic(lines[0])}`);
100
+ for (let i = 1; i < lines.length; i++) {
101
+ console.log(` ${dim.italic(lines[i])}`);
102
+ }
81
103
  }
82
104
 
105
+ // Cross-dimension filters: reject prompts that clearly belong to another dimension
106
+ const architecturalRe = /\b(architect|design|refactor|redesign|restructure|system design|data model|schema|api design|infrastructure|migration|strategy)\b/i;
107
+ const debugRe = /\b(error|bug|broken|crash|fail|exception|traceback|stack trace|doesn'?t work|not working|TypeError|SyntaxError|ImportError|ReferenceError|500|502|503|404|CORS)\b/i;
108
+ const planningRe = /\b(plan|breakdown|break down|think through|help me think|pros and cons|how should|code review|audit)\b/i;
109
+
110
+ // For each dimension, prompts matching these patterns are *excluded* as evidence
111
+ const dimensionExclusions = {
112
+ 'specific_report': [architecturalRe, planningRe],
113
+ 'quick_fix': [architecturalRe, planningRe],
114
+ 'architectural': [debugRe],
115
+ 'planning': [debugRe],
116
+ 'exploratory': [debugRe],
117
+ 'decomposition': [],
118
+ 'followup': [],
119
+ 'context_setting': [],
120
+ 'refinement': [],
121
+ };
122
+
83
123
  function pickExample(examples, type) {
84
124
  if (!examples || !examples.length) return null;
85
- const match = examples.find(e => e.type === type);
86
- return match ? match.prompt : null;
125
+ const exclusions = dimensionExclusions[type] || [];
126
+ // Prefer a match that doesn't trigger exclusion patterns
127
+ const candidates = examples.filter(e => e.type === type);
128
+ if (candidates.length === 0) return null;
129
+ const clean = candidates.find(e => !exclusions.some(re => re.test(e.prompt)));
130
+ return (clean || candidates[0]).prompt;
87
131
  }
88
132
 
89
133
  // ── Box drawing ──
@@ -92,8 +136,7 @@ function box(lines, width = 47) {
92
136
  const out = [];
93
137
  out.push(dim(' \u250C' + '\u2500'.repeat(width) + '\u2510'));
94
138
  for (const line of lines) {
95
- const visible = line.replace(/\u001b\[[0-9;]*m/g, '');
96
- const padding = Math.max(0, width - visible.length);
139
+ const padding = Math.max(0, width - visibleWidth(line));
97
140
  out.push(dim(' \u2502') + line + ' '.repeat(padding) + dim('\u2502'));
98
141
  }
99
142
  out.push(dim(' \u2514' + '\u2500'.repeat(width) + '\u2518'));
@@ -128,7 +171,7 @@ export function displayHeader() {
128
171
  console.log();
129
172
  const lines = [
130
173
  '',
131
- ` ${bold.white('chekk')}${dim(' v0.4.0')}`,
174
+ ` ${bold.white('chekk')}${dim(' v0.4.2')}`,
132
175
  ` ${dim('engineering capability profile')}`,
133
176
  '',
134
177
  ];
@@ -186,7 +229,7 @@ function displayProfileHeader(result, extra = {}) {
186
229
  console.log(` ${bold.white('ENGINEERING CAPABILITY PROFILE')}`);
187
230
  console.log();
188
231
  if (sessionStats) {
189
- console.log(` ${dim(`Generated ${dateStr} | chekk v0.4.0`)}`);
232
+ console.log(` ${dim(`Generated ${dateStr} | chekk v0.4.2`)}`);
190
233
  console.log(` ${dim(`Analysis: ${sessionStats.totalSessions} sessions \u00B7 ${sessionStats.tools.length} tool${sessionStats.tools.length > 1 ? 's' : ''} \u00B7 ${numberFormat(sessionStats.totalExchanges)} exchanges`)}`);
191
234
  if (sessionStats.dateRangeShort) {
192
235
  console.log(` ${dim(`Period: ${sessionStats.dateRangeShort}`)}`);
@@ -366,7 +409,7 @@ export function displayNarratives(metrics, prose) {
366
409
  const shownSnippets = new Set();
367
410
  function showLabeledSnippet(label, prompt) {
368
411
  if (!prompt) return;
369
- const s = cleanPrompt(prompt, 120);
412
+ const s = cleanPrompt(prompt);
370
413
  if (shownSnippets.has(s)) return;
371
414
  shownSnippets.add(s);
372
415
  displayLabeledSnippet(label, prompt);
@@ -404,7 +447,7 @@ function displayDataNarratives(metrics, shownSnippets) {
404
447
 
405
448
  function showSnippet(label, prompt) {
406
449
  if (!prompt) return;
407
- const s = cleanPrompt(prompt, 120);
450
+ const s = cleanPrompt(prompt);
408
451
  if (shownSnippets.has(s)) return;
409
452
  shownSnippets.add(s);
410
453
  displayLabeledSnippet(label, prompt);
@@ -458,6 +501,9 @@ function displaySignatures(insights) {
458
501
  for (const line of lines) {
459
502
  console.log(` ${dim(line)}`);
460
503
  }
504
+ if (sig.evidence) {
505
+ displayLabeledSnippet('Proof', sig.evidence);
506
+ }
461
507
  console.log();
462
508
  }
463
509
  }
@@ -477,6 +523,9 @@ function displayWatchPoints(insights) {
477
523
  for (const line of lines) {
478
524
  console.log(` ${dim(line)}`);
479
525
  }
526
+ if (wp.evidence) {
527
+ displayLabeledSnippet('Example', wp.evidence);
528
+ }
480
529
  console.log();
481
530
  }
482
531
  }
@@ -605,7 +654,32 @@ export function displayVerbose(metrics, sessions) {
605
654
  console.log(doubleRule());
606
655
  console.log(dim('\n DETAILED BREAKDOWN\n'));
607
656
 
608
- // Per-project stats
657
+ // Helper: show a metric row with value, benchmark comparison, and verdict
658
+ // lowerIsBetter: true for metrics where lower = better (e.g. turns to resolve)
659
+ function metricRow(label, value, benchmark, unit = '', lowerIsBetter = false) {
660
+ const valStr = typeof value === 'number' ? String(value) : value;
661
+ let verdict = '';
662
+ if (benchmark !== null && benchmark !== undefined && typeof value === 'number') {
663
+ const ratio = value / benchmark;
664
+ if (lowerIsBetter) {
665
+ if (ratio <= 0.5) verdict = green(' ++ faster than benchmark');
666
+ else if (ratio <= 0.8) verdict = cyan(' + faster than benchmark');
667
+ else if (ratio <= 1.1) verdict = dim(' ~ at benchmark');
668
+ else if (ratio <= 1.5) verdict = orange(' - slower than benchmark');
669
+ else verdict = red(' -- well above benchmark');
670
+ } else {
671
+ if (ratio >= 1.5) verdict = green(' ++ above benchmark');
672
+ else if (ratio >= 1.1) verdict = cyan(' + above benchmark');
673
+ else if (ratio >= 0.9) verdict = dim(' ~ at benchmark');
674
+ else if (ratio >= 0.6) verdict = orange(' - below benchmark');
675
+ else verdict = red(' -- well below benchmark');
676
+ }
677
+ }
678
+ const benchStr = benchmark !== null && benchmark !== undefined ? dim(` (benchmark: ${benchmark}${unit})`) : '';
679
+ console.log(` ${dim(pad(label, 28))} ${white(valStr + unit)}${benchStr}${verdict}`);
680
+ }
681
+
682
+ // ── Projects ──
609
683
  const projects = {};
610
684
  for (const s of sessions) {
611
685
  const p = s.project || 'unknown';
@@ -615,60 +689,71 @@ export function displayVerbose(metrics, sessions) {
615
689
  projects[p].minutes += s.durationMinutes || 0;
616
690
  }
617
691
 
618
- console.log(bold(' PROJECTS'));
692
+ console.log(` ${bold('PROJECTS')}`);
693
+ console.log(` ${dim('\u2500'.repeat(50))}`);
619
694
  for (const [name, data] of Object.entries(projects).sort((a, b) => b[1].exchanges - a[1].exchanges)) {
620
- const shortName = name.length > 30 ? '...' + name.slice(-27) : name;
621
- console.log(` ${dim(pad(shortName, 32))} ${dim(data.sessions + ' sessions')} ${dim(numberFormat(data.exchanges) + ' exchanges')}`);
695
+ const shortName = name.length > 28 ? '...' + name.slice(-25) : name;
696
+ console.log(` ${pad(white(shortName), 30)} ${dim(data.sessions + ' sessions')} ${dim(numberFormat(data.exchanges) + ' exchanges')}`);
622
697
  }
623
698
  console.log();
624
699
 
625
- // Decomposition
700
+ // ── Thinking / Decomposition ──
626
701
  const d = metrics.decomposition.details;
627
- console.log(bold(' DECOMPOSITION'));
628
- console.log(` ${dim(pad('Avg session depth', 30))} ${dim(String(d.avgExchangesPerSession))} ${dim(`(benchmark: ${BENCHMARKS.avgExchangesPerSession})`)}`);
629
- console.log(` ${dim(pad('Avg prompt length', 30))} ${dim(d.avgPromptLength + ' chars')} ${dim(`(benchmark: ${BENCHMARKS.avgPromptLength})`)}`);
630
- console.log(` ${dim(pad('Multi-step sessions', 30))} ${dim(String(d.multiStepSessions) + '/' + d.totalSessions)} ${dim(d.multiStepSessions > d.totalSessions * 0.5 ? '\u2014 strong' : '\u2014 room to grow')}`);
631
- console.log(` ${dim(pad('Single-shot sessions', 30))} ${dim(String(d.singleShotSessions))}`);
632
- console.log(` ${dim(pad('Contextual followups', 30))} ${dim(d.contextualFollowupRatio + '%')} ${dim(d.contextualFollowupRatio > 20 ? '\u2014 builds on context well' : '\u2014 could reference prior work more')}`);
702
+ console.log(` ${bold('\uD83E\uDDE0 THINKING')} ${dim('(weight: 25%)')}`);
703
+ console.log(` ${dim('\u2500'.repeat(50))}`);
704
+ metricRow('Session depth', d.avgExchangesPerSession, BENCHMARKS.avgExchangesPerSession, ' exchanges');
705
+ metricRow('Prompt length', d.avgPromptLength, BENCHMARKS.avgPromptLength, ' chars');
706
+ console.log(` ${dim(pad('Multi-step sessions', 28))} ${white(d.multiStepSessions + '/' + d.totalSessions)} ${dim('(' + Math.round(d.multiStepSessions / Math.max(1, d.totalSessions) * 100) + '%)')}`);
707
+ console.log(` ${dim(pad('Single-shot sessions', 28))} ${white(String(d.singleShotSessions))}`);
708
+ console.log(` ${dim(pad('Contextual followups', 28))} ${white(d.contextualFollowupRatio + '%')}${d.contextualFollowupRatio > 20 ? cyan(' builds on context well') : orange(' could reference prior work more')}`);
633
709
  console.log();
634
710
 
635
- // Debug
711
+ // ── Debugging ──
636
712
  const db = metrics.debugCycles.details;
637
- console.log(bold(' DEBUG CYCLES'));
638
- console.log(` ${dim(pad('Total sequences', 30))} ${dim(String(db.totalDebugSequences))}`);
639
- console.log(` ${dim(pad('Avg turns to resolve', 30))} ${dim(String(db.avgTurnsToResolve))} ${dim(`(benchmark: ${BENCHMARKS.avgTurnsToResolve})`)}`);
640
- console.log(` ${dim(pad('Quick fixes (\u22642 turns)', 30))} ${dim(String(db.quickFixes))}`);
641
- console.log(` ${dim(pad('Extended loops (>5 turns)', 30))} ${dim(String(db.longLoops))} ${dim(db.longLoops === 0 ? '\u2014 zero spirals' : '')}`);
642
- console.log(` ${dim(pad('Specific report ratio', 30))} ${dim(db.specificReportRatio + '%')} ${dim(`(benchmark: ${BENCHMARKS.specificReportRatio}%)`)}`);
643
- console.log(` ${dim(pad('Vague reports', 30))} ${dim(String(db.vagueReports))} ${dim(db.vagueReports === 0 ? '\u2014 never vague' : '')}`);
713
+ console.log(` ${bold('\u26A1 DEBUGGING')} ${dim('(weight: 25%)')}`);
714
+ console.log(` ${dim('\u2500'.repeat(50))}`);
715
+ console.log(` ${dim(pad('Debug sequences', 28))} ${white(String(db.totalDebugSequences))}`);
716
+ metricRow('Turns to resolve', db.avgTurnsToResolve, BENCHMARKS.avgTurnsToResolve, ' avg', true);
717
+ console.log(` ${dim(pad('Quick fixes (1-2 turns)', 28))} ${white(String(db.quickFixes))} ${dim('of ' + db.totalDebugSequences)}`);
718
+ console.log(` ${dim(pad('Extended loops (>5 turns)', 28))} ${db.longLoops === 0 ? green('0 -- zero spirals') : orange(String(db.longLoops))}`);
719
+ metricRow('Specific report ratio', db.specificReportRatio, BENCHMARKS.specificReportRatio, '%');
720
+ console.log(` ${dim(pad('Vague reports', 28))} ${db.vagueReports === 0 ? green('0 -- never vague') : orange(String(db.vagueReports))}`);
644
721
  console.log();
645
722
 
646
- // AI Leverage
723
+ // ── AI Leverage ──
647
724
  const ai = metrics.aiLeverage.details;
648
- console.log(bold(' AI LEVERAGE'));
649
- console.log(` ${dim(pad('Total prompts', 30))} ${dim(numberFormat(ai.totalPrompts))}`);
650
- console.log(` ${dim(pad('Architectural', 30))} ${dim(String(ai.architecturalPrompts))} ${dim(`(${Math.round(ai.architecturalPrompts / Math.max(1, ai.totalPrompts) * 100)}%)`)}`);
651
- console.log(` ${dim(pad('Planning', 30))} ${dim(String(ai.planningPrompts))} ${dim(`(${Math.round(ai.planningPrompts / Math.max(1, ai.totalPrompts) * 100)}%)`)}`);
652
- console.log(` ${dim(pad('Exploratory', 30))} ${dim(String(ai.exploratoryPrompts))} ${dim(`(${Math.round(ai.exploratoryPrompts / Math.max(1, ai.totalPrompts) * 100)}%)`)}`);
653
- console.log(` ${dim(pad('Boilerplate', 30))} ${dim(String(ai.boilerplatePrompts))} ${dim(`(${Math.round(ai.boilerplatePrompts / Math.max(1, ai.totalPrompts) * 100)}%)`)} ${dim(ai.boilerplatePrompts < ai.totalPrompts * 0.05 ? '\u2014 minimal' : '')}`);
654
- console.log(` ${dim(pad('Testing', 30))} ${dim(String(ai.testingPrompts))}`);
655
- console.log(` ${dim(pad('High-level ratio', 30))} ${dim(ai.highLevelRatio + '%')} ${dim(`(benchmark: ${BENCHMARKS.highLevelRatio}%)`)}`);
725
+ const total = Math.max(1, ai.totalPrompts);
726
+ console.log(` ${bold('\uD83D\uDD27 AI LEVERAGE')} ${dim('(weight: 30%)')}`);
727
+ console.log(` ${dim('\u2500'.repeat(50))}`);
728
+ console.log(` ${dim(pad('Total prompts analyzed', 28))} ${white(numberFormat(ai.totalPrompts))}`);
729
+ console.log();
730
+ console.log(` ${dim(' Prompt type breakdown:')}`);
731
+ console.log(` ${dim(pad(' Architectural', 28))} ${white(String(ai.architecturalPrompts))} ${dim('(' + Math.round(ai.architecturalPrompts / total * 100) + '%) design, schema, strategy')}`);
732
+ console.log(` ${dim(pad(' Planning', 28))} ${white(String(ai.planningPrompts))} ${dim('(' + Math.round(ai.planningPrompts / total * 100) + '%) how-should-I, trade-offs')}`);
733
+ console.log(` ${dim(pad(' Exploratory', 28))} ${white(String(ai.exploratoryPrompts))} ${dim('(' + Math.round(ai.exploratoryPrompts / total * 100) + '%) explain, investigate')}`);
734
+ console.log(` ${dim(pad(' Boilerplate', 28))} ${white(String(ai.boilerplatePrompts))} ${dim('(' + Math.round(ai.boilerplatePrompts / total * 100) + '%) CRUD, templates')}${ai.boilerplatePrompts < total * 0.05 ? green(' minimal') : ''}`);
735
+ console.log(` ${dim(pad(' Testing', 28))} ${white(String(ai.testingPrompts))} ${dim('(' + Math.round(ai.testingPrompts / total * 100) + '%)')}`);
736
+ console.log();
737
+ metricRow('High-level ratio', ai.highLevelRatio, BENCHMARKS.highLevelRatio, '%');
656
738
  console.log();
657
739
 
658
- // Session structure
740
+ // ── Session Structure ──
659
741
  const ss = metrics.sessionStructure.details;
660
- console.log(bold(' SESSION STRUCTURE'));
661
- console.log(` ${dim(pad('Context-setting', 30))} ${dim(ss.contextSetRatio + '%')} ${dim(`(benchmark: ${BENCHMARKS.contextSetRatio}%)`)}`);
662
- console.log(` ${dim(pad('Plan before code', 30))} ${dim(ss.planBeforeCodeRatio + '%')}`);
663
- console.log(` ${dim(pad('Review at end', 30))} ${dim(ss.reviewEndRatio + '%')} ${dim(`(benchmark: ${BENCHMARKS.reviewEndRatio}%)`)}`);
664
- console.log(` ${dim(pad('Refinement rate', 30))} ${dim(ss.refinementRatio + '%')} ${dim(`(benchmark: ${BENCHMARKS.refinementRatio}%)`)}`);
665
- console.log(` ${dim(pad('Avg first prompt', 30))} ${dim(ss.avgFirstPromptLength + ' chars')}`);
742
+ console.log(` ${bold('\uD83D\uDCD0 WORKFLOW')} ${dim('(weight: 20%)')}`);
743
+ console.log(` ${dim('\u2500'.repeat(50))}`);
744
+ metricRow('Context-setting', ss.contextSetRatio, BENCHMARKS.contextSetRatio, '%');
745
+ console.log(` ${dim(pad('Plan before code', 28))} ${white(ss.planBeforeCodeRatio + '%')}`);
746
+ metricRow('Review at end', ss.reviewEndRatio, BENCHMARKS.reviewEndRatio, '%');
747
+ metricRow('Refinement rate', ss.refinementRatio, BENCHMARKS.refinementRatio, '%');
748
+ console.log(` ${dim(pad('Avg first prompt length', 28))} ${white(ss.avgFirstPromptLength + ' chars')}`);
666
749
  if (ss.durationDistribution) {
667
750
  const dur = ss.durationDistribution;
668
- console.log(` ${dim(pad('Focused sessions (10-45m)', 30))} ${dim(String(dur.focused))}`);
669
- console.log(` ${dim(pad('Short (<5m)', 30))} ${dim(String(dur.short))}`);
670
- console.log(` ${dim(pad('Medium (5-60m)', 30))} ${dim(String(dur.medium))}`);
671
- console.log(` ${dim(pad('Long (>60m)', 30))} ${dim(String(dur.long))}`);
751
+ console.log();
752
+ console.log(` ${dim(' Session duration:')}`);
753
+ console.log(` ${dim(pad(' Focused (10-45m)', 28))} ${white(String(dur.focused))} ${dim('-- ideal range')}`);
754
+ console.log(` ${dim(pad(' Short (<5m)', 28))} ${white(String(dur.short))}`);
755
+ console.log(` ${dim(pad(' Medium (5-60m)', 28))} ${white(String(dur.medium))}`);
756
+ console.log(` ${dim(pad(' Long (>60m)', 28))} ${white(String(dur.long))}`);
672
757
  }
673
758
  console.log();
674
759
  }
package/src/insights.js CHANGED
@@ -45,6 +45,16 @@ const preflightPatterns = /^(before (we|you|i)|don'?t code|review (first|this|my
45
45
  const testFirstPatterns = /\b(write (the )?tests? (first|before)|test.?driven|TDD|spec first|start with (tests?|specs?))\b/i;
46
46
  const negativeConstraintPatterns = /\b(don'?t|do not|never|avoid|must not|shouldn'?t)\b.*\b(add|create|use|include|change|modify|touch|remove)\b/i;
47
47
 
48
+ // Evidence quality filter (same rules as metric parsers)
49
+ const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
50
+ function isGoodEvidence(prompt) {
51
+ if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
52
+ if (noisePatterns.test(prompt)) return false;
53
+ const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
54
+ if (alpha / prompt.length < 0.4) return false;
55
+ return true;
56
+ }
57
+
48
58
  export function computeSignatures(allSessions, metrics) {
49
59
  const signatures = [];
50
60
  const d = metrics.decomposition.details;
@@ -59,6 +69,12 @@ export function computeSignatures(allSessions, metrics) {
59
69
  let modificationCount = 0;
60
70
  let acceptCount = 0;
61
71
 
72
+ // Capture evidence prompts for each signature type
73
+ let bestPreflightPrompt = null;
74
+ let bestConstraintPrompt = null;
75
+ let bestTestFirstPrompt = null;
76
+ let bestModifyPrompt = null;
77
+
62
78
  for (const session of allSessions) {
63
79
  const { exchanges } = session;
64
80
  if (exchanges.length === 0) continue;
@@ -67,6 +83,9 @@ export function computeSignatures(allSessions, metrics) {
67
83
  const firstPrompt = exchanges[0].userPrompt || '';
68
84
  if (preflightPatterns.test(firstPrompt)) {
69
85
  preflightSessions++;
86
+ if (isGoodEvidence(firstPrompt) && (!bestPreflightPrompt || firstPrompt.length > bestPreflightPrompt.length)) {
87
+ bestPreflightPrompt = firstPrompt;
88
+ }
70
89
  }
71
90
 
72
91
  let hasTestFirst = false;
@@ -76,15 +95,24 @@ export function computeSignatures(allSessions, metrics) {
76
95
 
77
96
  if (constraintPatterns.test(prompt) && negativeConstraintPatterns.test(prompt)) {
78
97
  constraintPrompts++;
98
+ if (isGoodEvidence(prompt) && (!bestConstraintPrompt || prompt.length > bestConstraintPrompt.length)) {
99
+ bestConstraintPrompt = prompt;
100
+ }
79
101
  }
80
102
 
81
103
  if (testFirstPatterns.test(prompt)) {
82
104
  hasTestFirst = true;
105
+ if (isGoodEvidence(prompt) && (!bestTestFirstPrompt || prompt.length > bestTestFirstPrompt.length)) {
106
+ bestTestFirstPrompt = prompt;
107
+ }
83
108
  }
84
109
 
85
110
  // Track modification vs acceptance
86
111
  if (i > 0 && /\b(actually|wait|instead|change|no,?|not quite|modify|tweak)\b/i.test(prompt)) {
87
112
  modificationCount++;
113
+ if (isGoodEvidence(prompt) && (!bestModifyPrompt || prompt.length > bestModifyPrompt.length)) {
114
+ bestModifyPrompt = prompt;
115
+ }
88
116
  } else if (i > 0) {
89
117
  acceptCount++;
90
118
  }
@@ -100,6 +128,7 @@ export function computeSignatures(allSessions, metrics) {
100
128
  signatures.push({
101
129
  name: 'Pre-flight reviews',
102
130
  detail: `You ask AI to review your plan before coding in ${Math.round(preflightRatio * 100)}% of sessions. Only 8% of engineers do this consistently. This correlates with fewer debug cycles.`,
131
+ evidence: bestPreflightPrompt,
103
132
  });
104
133
  }
105
134
 
@@ -109,6 +138,7 @@ export function computeSignatures(allSessions, metrics) {
109
138
  signatures.push({
110
139
  name: 'Constraint-first prompting',
111
140
  detail: `You specify what NOT to do in ${Math.round(constraintRatio * 100)}% of prompts. This is a hallmark of senior architectural thinking that prevents scope creep.`,
141
+ evidence: bestConstraintPrompt,
112
142
  });
113
143
  }
114
144
 
@@ -118,22 +148,25 @@ export function computeSignatures(allSessions, metrics) {
118
148
  signatures.push({
119
149
  name: 'Test-driven AI usage',
120
150
  detail: `You request tests before implementation in ${Math.round(testFirstRatio * 100)}% of sessions. Engineers who do this ship fewer bugs post-merge.`,
151
+ evidence: bestTestFirstPrompt,
121
152
  });
122
153
  }
123
154
 
124
- // Deep session marathons
155
+ // Deep session marathons — evidence is metric-derived, no single prompt
125
156
  if (d.avgExchangesPerSession > BENCHMARKS.avgExchangesPerSession * 2) {
126
157
  signatures.push({
127
158
  name: 'Marathon sessions',
128
159
  detail: `Avg session depth of ${d.avgExchangesPerSession} exchanges is ${Math.round(d.avgExchangesPerSession / BENCHMARKS.avgExchangesPerSession)}x the benchmark (${BENCHMARKS.avgExchangesPerSession}). You sustain deep, focused work.`,
160
+ evidence: null,
129
161
  });
130
162
  }
131
163
 
132
- // Zero vague debugging
164
+ // Zero vague debugging — evidence is the absence of something
133
165
  if (db.vagueReports === 0 && db.totalDebugSequences > 5) {
134
166
  signatures.push({
135
167
  name: 'Precision debugging',
136
168
  detail: `Zero vague error reports across ${db.totalDebugSequences} debug sequences. Every bug report includes specific context. This is rare.`,
169
+ evidence: metrics.debugCycles.examples?.[0]?.prompt || null,
137
170
  });
138
171
  }
139
172
 
@@ -142,6 +175,7 @@ export function computeSignatures(allSessions, metrics) {
142
175
  signatures.push({
143
176
  name: 'Strategic AI usage',
144
177
  detail: `${ai.highLevelRatio}% of prompts are architectural or planning-level (benchmark: ${BENCHMARKS.highLevelRatio}%). You use AI as a thinking partner, not just a code generator.`,
178
+ evidence: metrics.aiLeverage.examples?.[0]?.prompt || null,
145
179
  });
146
180
  }
147
181
 
@@ -152,6 +186,7 @@ export function computeSignatures(allSessions, metrics) {
152
186
  signatures.push({
153
187
  name: 'Critical reviewer',
154
188
  detail: `You modify or redirect AI output in ${Math.round(modRatio * 100)}% of follow-up prompts. This indicates active evaluation rather than passive acceptance.`,
189
+ evidence: bestModifyPrompt,
155
190
  });
156
191
  }
157
192
 
@@ -177,6 +212,7 @@ export function computeWatchPoints(allSessions, metrics) {
177
212
  projectSessions[p].push(s);
178
213
  }
179
214
  let contextRestarts = 0;
215
+ let bestContextRestartPrompt = null;
180
216
  let multiSessionProjects = 0;
181
217
  for (const [, sessions] of Object.entries(projectSessions)) {
182
218
  if (sessions.length < 2) continue;
@@ -186,6 +222,9 @@ export function computeWatchPoints(allSessions, metrics) {
186
222
  // If first prompt doesn't reference previous work, it's a context restart
187
223
  if (firstPrompt.length > 50 && !/\b(continuing|following up|as discussed|last time|previously|where we left|earlier)\b/i.test(firstPrompt)) {
188
224
  contextRestarts++;
225
+ if (isGoodEvidence(firstPrompt) && (!bestContextRestartPrompt || firstPrompt.length > bestContextRestartPrompt.length)) {
226
+ bestContextRestartPrompt = firstPrompt;
227
+ }
189
228
  }
190
229
  }
191
230
  }
@@ -194,6 +233,7 @@ export function computeWatchPoints(allSessions, metrics) {
194
233
  watchPoints.push({
195
234
  name: 'Context amnesia',
196
235
  detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient.`,
236
+ evidence: bestContextRestartPrompt,
197
237
  });
198
238
  }
199
239
 
@@ -214,14 +254,26 @@ export function computeWatchPoints(allSessions, metrics) {
214
254
  watchPoints.push({
215
255
  name: 'Acceptance without review',
216
256
  detail: `You accept AI output without modification in ${Math.round((1 - modRatio) * 100)}% of cases. Top engineers modify or redirect 30%+ of initial suggestions.`,
257
+ evidence: null, // Anti-pattern is the absence of modification
217
258
  });
218
259
  }
219
260
 
220
261
  // Monologue prompting — excessively long first prompts
221
262
  if (d.avgPromptLength > 2000) {
263
+ // Find a representative long prompt
264
+ let bestLongPrompt = null;
265
+ for (const s of allSessions) {
266
+ for (const ex of s.exchanges) {
267
+ const p = ex.userPrompt || '';
268
+ if (p.length > 1500 && p.length < 3000 && isGoodEvidence(p)) {
269
+ if (!bestLongPrompt || p.length > bestLongPrompt.length) bestLongPrompt = p;
270
+ }
271
+ }
272
+ }
222
273
  watchPoints.push({
223
274
  name: 'Monologue prompting',
224
275
  detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output.`,
276
+ evidence: bestLongPrompt,
225
277
  });
226
278
  }
227
279
 
@@ -230,6 +282,7 @@ export function computeWatchPoints(allSessions, metrics) {
230
282
  watchPoints.push({
231
283
  name: 'Missing context',
232
284
  detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections.`,
285
+ evidence: null,
233
286
  });
234
287
  }
235
288
 
@@ -238,6 +291,7 @@ export function computeWatchPoints(allSessions, metrics) {
238
291
  watchPoints.push({
239
292
  name: 'Debug spirals',
240
293
  detail: `${db.longLoops} extended debug loops (>5 turns) detected. When stuck, try providing more specific error context or breaking the problem differently.`,
294
+ evidence: null,
241
295
  });
242
296
  }
243
297
 
@@ -417,11 +471,11 @@ export function generateAssessment(result, metrics, signatures, watchPoints) {
417
471
  const weakest = dims[dims.length - 1];
418
472
 
419
473
  // Build assessment parts
420
- let assessment = `This engineer demonstrates ${dimQualitative(strongest.score)} ${strongest.label}`;
474
+ let assessment = `This engineer demonstrates ${dimQualitative(strongest.score).toLowerCase()} ${strongest.label}`;
421
475
 
422
476
  // Add signature mention if available
423
477
  if (signatures.length > 0) {
424
- assessment += ` with a distinctive pattern of ${signatures[0].name.toLowerCase()}`;
478
+ assessment += ` with a distinctive pattern of ${formatSignatureName(signatures[0].name)}`;
425
479
  }
426
480
  assessment += '.';
427
481
 
@@ -466,6 +520,14 @@ function dimQualitative(score) {
466
520
  return 'Early-stage';
467
521
  }
468
522
 
523
+ // Lowercase a signature name for prose while preserving acronyms like "AI", "TDD"
524
+ function formatSignatureName(name) {
525
+ return name
526
+ .toLowerCase()
527
+ .replace(/\bai\b/g, 'AI')
528
+ .replace(/\btdd\b/g, 'TDD');
529
+ }
530
+
469
531
  // ══════════════════════════════════════════════
470
532
  // CONFIDENCE — Data volume indicator
471
533
  // ══════════════════════════════════════════════
@@ -11,6 +11,16 @@
11
11
  * - Diversity of tool usage (not just "write code" but also explore, analyze, test)
12
12
  */
13
13
 
14
+ // ── Evidence quality filter ──
15
+ const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
16
+ function isGoodEvidence(prompt) {
17
+ if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
18
+ if (noisePatterns.test(prompt)) return false;
19
+ const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
20
+ if (alpha / prompt.length < 0.4) return false;
21
+ return true;
22
+ }
23
+
14
24
  const architecturalPatterns = /\b(architect|design|refactor|redesign|restructure|system design|data model|schema|api design|interface|abstract|pattern|trade-?off|scalab|approach|strategy|migration|infrastructure)\b/i;
15
25
  const planningPatterns = /\b(plan|breakdown|break down|think through|help me think|what('?s| is) the best (way|approach)|how should (i|we)|pros and cons|options for|compare|evaluate|review my|code review|audit)\b/i;
16
26
  const exploratoryPatterns = /\b(explain|understand|how does|what does|why does|walk me through|investigate|diagnose|analyze|explore|deep dive|look into)\b/i;
@@ -53,15 +63,15 @@ export function computeAILeverage(sessions) {
53
63
  // Categorize prompt type
54
64
  if (architecturalPatterns.test(prompt)) {
55
65
  architecturalPrompts++;
56
- if (prompt.length > bestArchLen) { bestArchLen = prompt.length; bestArchPrompt = prompt; }
66
+ if (isGoodEvidence(prompt) && prompt.length > bestArchLen) { bestArchLen = prompt.length; bestArchPrompt = prompt; }
57
67
  }
58
68
  if (planningPatterns.test(prompt)) {
59
69
  planningPrompts++;
60
- if (prompt.length > bestPlanLen) { bestPlanLen = prompt.length; bestPlanPrompt = prompt; }
70
+ if (isGoodEvidence(prompt) && prompt.length > bestPlanLen) { bestPlanLen = prompt.length; bestPlanPrompt = prompt; }
61
71
  }
62
72
  if (exploratoryPatterns.test(prompt)) {
63
73
  exploratoryPrompts++;
64
- if (prompt.length > bestExploreLen) { bestExploreLen = prompt.length; bestExplorePrompt = prompt; }
74
+ if (isGoodEvidence(prompt) && prompt.length > bestExploreLen) { bestExploreLen = prompt.length; bestExplorePrompt = prompt; }
65
75
  }
66
76
  if (boilerplatePatterns.test(prompt)) boilerplatePrompts++;
67
77
  if (testingPatterns.test(prompt)) testingPrompts++;
@@ -10,6 +10,19 @@
10
10
  * - "it's still broken" vs targeted debug prompts
11
11
  */
12
12
 
13
+ // ── Evidence quality filter ──
14
+ // Prompts used as evidence should be human-written, readable, and illustrative.
15
+ // Reject system-generated context, raw log pastes, and extreme lengths.
16
+ const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
17
+ function isGoodEvidence(prompt) {
18
+ if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
19
+ if (noisePatterns.test(prompt)) return false;
20
+ // Reject if >40% of content is non-alpha (log lines, stack traces, JSON blobs)
21
+ const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
22
+ if (alpha / prompt.length < 0.4) return false;
23
+ return true;
24
+ }
25
+
13
26
  const errorPatterns = /\b(error|bug|broken|crash|fail|exception|traceback|stack trace|doesn'?t work|not working|issue|problem|wrong)\b/i;
14
27
  const vaguePhrases = /^(it'?s? (?:still )?(?:not working|broken|wrong|failing))|^(fix it|try again|still (?:the same|broken|failing|not working))|^(same (?:error|issue|problem|thing))/i;
15
28
  const specificDebugPatterns = /\b(line \d+|TypeError|SyntaxError|ImportError|ReferenceError|ValueError|KeyError|AttributeError|NoneType|undefined is not|cannot read prop|stack trace|traceback|\.py:\d+|\.ts:\d+|\.js:\d+|status (?:code )?\d{3}|HTTP \d{3}|ENOENT|EACCES|CORS|404|500|502|503)\b/i;
@@ -57,8 +70,9 @@ export function computeDebugCycles(sessions) {
57
70
  }
58
71
  if (specificDebugPatterns.test(prompt) || prompt.length > 200) {
59
72
  specificReports++;
60
- // Track best specific report
61
- if (prompt.length > bestSpecificLen) {
73
+ // Track best specific report — require actual debug pattern match
74
+ // and readable evidence quality
75
+ if (specificDebugPatterns.test(prompt) && isGoodEvidence(prompt) && prompt.length > bestSpecificLen) {
62
76
  bestSpecificLen = prompt.length;
63
77
  bestSpecificReport = prompt;
64
78
  }
@@ -69,7 +83,7 @@ export function computeDebugCycles(sessions) {
69
83
  totalTurnsToResolve += debugTurnCount;
70
84
  if (debugTurnCount <= 2) {
71
85
  quickFixes++;
72
- if (!bestQuickFix) bestQuickFix = debugStartPrompt;
86
+ if (!bestQuickFix && isGoodEvidence(debugStartPrompt)) bestQuickFix = debugStartPrompt;
73
87
  }
74
88
  if (debugTurnCount > 5) longLoops++;
75
89
  inDebugMode = false;
@@ -11,6 +11,16 @@
11
11
  * - Follow-up prompts that reference or build on previous context
12
12
  */
13
13
 
14
+ // ── Evidence quality filter ──
15
+ const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
16
+ function isGoodEvidence(prompt) {
17
+ if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
18
+ if (noisePatterns.test(prompt)) return false;
19
+ const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
20
+ if (alpha / prompt.length < 0.4) return false;
21
+ return true;
22
+ }
23
+
14
24
  export function computeDecomposition(sessions) {
15
25
  if (sessions.length === 0) return { score: 50, details: {} };
16
26
 
@@ -51,7 +61,7 @@ export function computeDecomposition(sessions) {
51
61
  if (len < 100) shortPromptCount++;
52
62
 
53
63
  // Track decomposition examples (multi-sentence prompts showing task breakdown)
54
- if (len > 150 && len < 2000) {
64
+ if (isGoodEvidence(prompt)) {
55
65
  decompCandidates.push(prompt);
56
66
  }
57
67
 
@@ -60,7 +70,7 @@ export function computeDecomposition(sessions) {
60
70
  if (followupPatterns.test(prompt) || refinementPatterns.test(prompt)) {
61
71
  contextualFollowups++;
62
72
  // Capture best followup example
63
- if (!bestFollowupPrompt || prompt.length > bestFollowupPrompt.length) {
73
+ if (isGoodEvidence(prompt) && (!bestFollowupPrompt || prompt.length > bestFollowupPrompt.length)) {
64
74
  bestFollowupPrompt = prompt;
65
75
  }
66
76
  }
@@ -11,6 +11,16 @@
11
11
  * - Modification rate of AI output (shows critical review)
12
12
  */
13
13
 
14
+ // ── Evidence quality filter ──
15
+ const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
16
+ function isGoodEvidence(prompt) {
17
+ if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
18
+ if (noisePatterns.test(prompt)) return false;
19
+ const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
20
+ if (alpha / prompt.length < 0.4) return false;
21
+ return true;
22
+ }
23
+
14
24
  const contextSettingPatterns = /^(i('?m| am) (working on|building|trying to|looking at)|we need to|the goal is|here'?s (the|what)|context:|background:|i have a|there'?s a|i want to|let me explain)/i;
15
25
  const planningStartPatterns = /^(let'?s (plan|think|figure|start by)|first,? (let'?s|we should)|before we (start|begin|code)|the plan is|step 1|here'?s (my|the) plan)/i;
16
26
  const reviewPatterns = /\b(looks good|ship it|deploy|push it|commit|merge|let'?s go|lgtm|approved|test it|run (the )?tests|build it|does this look|review this|check this)\b/i;
@@ -52,7 +62,7 @@ export function computeSessionStructure(sessions) {
52
62
  if (contextSettingPatterns.test(firstPrompt) || firstPrompt.length > 200) {
53
63
  contextSetSessions++;
54
64
  // Track best context-setting prompt
55
- if (firstPrompt.length > bestContextLen) {
65
+ if (isGoodEvidence(firstPrompt) && firstPrompt.length > bestContextLen) {
56
66
  bestContextLen = firstPrompt.length;
57
67
  bestContextPrompt = firstPrompt;
58
68
  }
@@ -77,7 +87,7 @@ export function computeSessionStructure(sessions) {
77
87
  if (refinementPatterns.test(prompt)) {
78
88
  refinementCount++;
79
89
  // Track best refinement example
80
- if (!bestRefinementPrompt || prompt.length > bestRefinementPrompt.length) {
90
+ if (isGoodEvidence(prompt) && (!bestRefinementPrompt || prompt.length > bestRefinementPrompt.length)) {
81
91
  bestRefinementPrompt = prompt;
82
92
  }
83
93
  }