chekk 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/chekk.js CHANGED
@@ -4,7 +4,7 @@ import { execSync, spawn } from 'child_process';
4
4
  import { Command } from 'commander';
5
5
  import { run } from '../src/index.js';
6
6
 
7
- const LOCAL_VERSION = '0.4.1';
7
+ const LOCAL_VERSION = '0.4.3';
8
8
 
9
9
  // ── Auto-update check ──
10
10
  // If running from a cached npx install, check if there's a newer version
@@ -48,7 +48,7 @@ if (!handedOff) {
48
48
 
49
49
  program
50
50
  .name('chekk')
51
- .description('The engineering capability score. See how you prompt.')
51
+ .description('The prompt engineering capability score. See how you prompt.')
52
52
  .version(LOCAL_VERSION)
53
53
  .option('--offline', 'Skip AI prose generation, show data-driven output')
54
54
  .option('--verbose', 'Show detailed per-project and per-metric breakdowns')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "chekk",
3
- "version": "0.4.1",
3
+ "version": "0.4.3",
4
4
  "description": "See how you prompt. Chekk analyzes your AI coding workflow and tells you what kind of engineer you are.",
5
5
  "bin": {
6
6
  "chekk": "./bin/chekk.js"
package/src/display.js CHANGED
@@ -41,9 +41,28 @@ function numberFormat(n) {
41
41
  return String(n);
42
42
  }
43
43
 
44
+ // Measure visible display width accounting for wide characters (emoji, CJK)
45
+ function visibleWidth(str) {
46
+ const stripped = str.replace(/\u001b\[[0-9;]*m/g, '');
47
+ let width = 0;
48
+ for (const ch of stripped) {
49
+ const code = ch.codePointAt(0);
50
+ // Emoji and symbols that take 2 terminal columns
51
+ if (code > 0x1F000 || // emoji block
52
+ (code >= 0x2600 && code <= 0x27BF) || // misc symbols
53
+ (code >= 0x2B50 && code <= 0x2B55) || // stars
54
+ (code >= 0xFE00 && code <= 0xFE0F) || // variation selectors
55
+ (code >= 0x1F300 && code <= 0x1FAFF)) { // extended emoji
56
+ width += 2;
57
+ } else {
58
+ width += 1;
59
+ }
60
+ }
61
+ return width;
62
+ }
63
+
44
64
  function pad(str, len) {
45
- const visible = str.replace(/\u001b\[[0-9;]*m/g, '');
46
- return str + ' '.repeat(Math.max(0, len - visible.length));
65
+ return str + ' '.repeat(Math.max(0, len - visibleWidth(str)));
47
66
  }
48
67
 
49
68
  // ── Qualitative tier labels for dimensions ──
@@ -65,19 +84,22 @@ function dimTierColor(score) {
65
84
 
66
85
  // ── Snippet helpers ──
67
86
 
68
- function cleanPrompt(prompt, maxLen = 120) {
87
+ function cleanPrompt(prompt) {
69
88
  if (!prompt) return null;
70
- let clean = prompt.replace(/\s+/g, ' ').trim();
71
- if (clean.length > maxLen) {
72
- clean = clean.slice(0, maxLen - 1) + '\u2026';
73
- }
74
- return clean;
89
+ return prompt.replace(/\s+/g, ' ').trim();
75
90
  }
76
91
 
77
- function displayLabeledSnippet(label, prompt, maxLen = 120) {
78
- const s = cleanPrompt(prompt, maxLen);
92
+ function displayLabeledSnippet(label, prompt) {
93
+ const s = cleanPrompt(prompt);
79
94
  if (!s) return;
80
- console.log(` ${dim('\u21B3')} ${dim(label + ':')} ${dim.italic('\u201C' + s + '\u201D')}`);
95
+ // Wrap the full prompt across multiple lines instead of truncating
96
+ const prefix = `${dim('\u21B3')} ${dim(label + ':')} `;
97
+ const quoted = `\u201C${s}\u201D`;
98
+ const lines = wrapText(quoted, 53);
99
+ console.log(` ${prefix}${dim.italic(lines[0])}`);
100
+ for (let i = 1; i < lines.length; i++) {
101
+ console.log(` ${dim.italic(lines[i])}`);
102
+ }
81
103
  }
82
104
 
83
105
  // Cross-dimension filters: reject prompts that clearly belong to another dimension
@@ -114,8 +136,7 @@ function box(lines, width = 47) {
114
136
  const out = [];
115
137
  out.push(dim(' \u250C' + '\u2500'.repeat(width) + '\u2510'));
116
138
  for (const line of lines) {
117
- const visible = line.replace(/\u001b\[[0-9;]*m/g, '');
118
- const padding = Math.max(0, width - visible.length);
139
+ const padding = Math.max(0, width - visibleWidth(line));
119
140
  out.push(dim(' \u2502') + line + ' '.repeat(padding) + dim('\u2502'));
120
141
  }
121
142
  out.push(dim(' \u2514' + '\u2500'.repeat(width) + '\u2518'));
@@ -150,8 +171,8 @@ export function displayHeader() {
150
171
  console.log();
151
172
  const lines = [
152
173
  '',
153
- ` ${bold.white('chekk')}${dim(' v0.4.1')}`,
154
- ` ${dim('engineering capability profile')}`,
174
+ ` ${bold.white('chekk')}${dim(' v0.4.3')}`,
175
+ ` ${dim('prompt engineering capability profile')}`,
155
176
  '',
156
177
  ];
157
178
  for (const l of box(lines, 45)) console.log(l);
@@ -205,10 +226,10 @@ function displayProfileHeader(result, extra = {}) {
205
226
 
206
227
  console.log(doubleRule());
207
228
  console.log();
208
- console.log(` ${bold.white('ENGINEERING CAPABILITY PROFILE')}`);
229
+ console.log(` ${bold.white('PROMPT ENGINEERING CAPABILITY PROFILE')}`);
209
230
  console.log();
210
231
  if (sessionStats) {
211
- console.log(` ${dim(`Generated ${dateStr} | chekk v0.4.1`)}`);
232
+ console.log(` ${dim(`Generated ${dateStr} | chekk v0.4.3`)}`);
212
233
  console.log(` ${dim(`Analysis: ${sessionStats.totalSessions} sessions \u00B7 ${sessionStats.tools.length} tool${sessionStats.tools.length > 1 ? 's' : ''} \u00B7 ${numberFormat(sessionStats.totalExchanges)} exchanges`)}`);
213
234
  if (sessionStats.dateRangeShort) {
214
235
  console.log(` ${dim(`Period: ${sessionStats.dateRangeShort}`)}`);
@@ -388,7 +409,7 @@ export function displayNarratives(metrics, prose) {
388
409
  const shownSnippets = new Set();
389
410
  function showLabeledSnippet(label, prompt) {
390
411
  if (!prompt) return;
391
- const s = cleanPrompt(prompt, 120);
412
+ const s = cleanPrompt(prompt);
392
413
  if (shownSnippets.has(s)) return;
393
414
  shownSnippets.add(s);
394
415
  displayLabeledSnippet(label, prompt);
@@ -426,7 +447,7 @@ function displayDataNarratives(metrics, shownSnippets) {
426
447
 
427
448
  function showSnippet(label, prompt) {
428
449
  if (!prompt) return;
429
- const s = cleanPrompt(prompt, 120);
450
+ const s = cleanPrompt(prompt);
430
451
  if (shownSnippets.has(s)) return;
431
452
  shownSnippets.add(s);
432
453
  displayLabeledSnippet(label, prompt);
@@ -480,6 +501,9 @@ function displaySignatures(insights) {
480
501
  for (const line of lines) {
481
502
  console.log(` ${dim(line)}`);
482
503
  }
504
+ if (sig.evidence) {
505
+ displayLabeledSnippet('Proof', sig.evidence);
506
+ }
483
507
  console.log();
484
508
  }
485
509
  }
@@ -499,6 +523,9 @@ function displayWatchPoints(insights) {
499
523
  for (const line of lines) {
500
524
  console.log(` ${dim(line)}`);
501
525
  }
526
+ if (wp.evidence) {
527
+ displayLabeledSnippet('Example', wp.evidence);
528
+ }
502
529
  console.log();
503
530
  }
504
531
  }
@@ -615,7 +642,7 @@ function displayNextSteps(result) {
615
642
  console.log(` ${dim('\u2191 Copy this to share')}`);
616
643
  console.log();
617
644
  console.log(doubleRule());
618
- console.log(` ${dim('chekk.dev \u2014 engineering capability profiles')}`);
645
+ console.log(` ${dim('chekk.dev \u2014 prompt engineering capability profiles')}`);
619
646
  console.log();
620
647
  }
621
648
 
@@ -627,7 +654,32 @@ export function displayVerbose(metrics, sessions) {
627
654
  console.log(doubleRule());
628
655
  console.log(dim('\n DETAILED BREAKDOWN\n'));
629
656
 
630
- // Per-project stats
657
+ // Helper: show a metric row with value, benchmark comparison, and verdict
658
+ // lowerIsBetter: true for metrics where lower = better (e.g. turns to resolve)
659
+ function metricRow(label, value, benchmark, unit = '', lowerIsBetter = false) {
660
+ const valStr = typeof value === 'number' ? String(value) : value;
661
+ let verdict = '';
662
+ if (benchmark !== null && benchmark !== undefined && typeof value === 'number') {
663
+ const ratio = value / benchmark;
664
+ if (lowerIsBetter) {
665
+ if (ratio <= 0.5) verdict = green(' ++ faster than benchmark');
666
+ else if (ratio <= 0.8) verdict = cyan(' + faster than benchmark');
667
+ else if (ratio <= 1.1) verdict = dim(' ~ at benchmark');
668
+ else if (ratio <= 1.5) verdict = orange(' - slower than benchmark');
669
+ else verdict = red(' -- well above benchmark');
670
+ } else {
671
+ if (ratio >= 1.5) verdict = green(' ++ above benchmark');
672
+ else if (ratio >= 1.1) verdict = cyan(' + above benchmark');
673
+ else if (ratio >= 0.9) verdict = dim(' ~ at benchmark');
674
+ else if (ratio >= 0.6) verdict = orange(' - below benchmark');
675
+ else verdict = red(' -- well below benchmark');
676
+ }
677
+ }
678
+ const benchStr = benchmark !== null && benchmark !== undefined ? dim(` (benchmark: ${benchmark}${unit})`) : '';
679
+ console.log(` ${dim(pad(label, 28))} ${white(valStr + unit)}${benchStr}${verdict}`);
680
+ }
681
+
682
+ // ── Projects ──
631
683
  const projects = {};
632
684
  for (const s of sessions) {
633
685
  const p = s.project || 'unknown';
@@ -637,60 +689,71 @@ export function displayVerbose(metrics, sessions) {
637
689
  projects[p].minutes += s.durationMinutes || 0;
638
690
  }
639
691
 
640
- console.log(bold(' PROJECTS'));
692
+ console.log(` ${bold('PROJECTS')}`);
693
+ console.log(` ${dim('\u2500'.repeat(50))}`);
641
694
  for (const [name, data] of Object.entries(projects).sort((a, b) => b[1].exchanges - a[1].exchanges)) {
642
- const shortName = name.length > 30 ? '...' + name.slice(-27) : name;
643
- console.log(` ${dim(pad(shortName, 32))} ${dim(data.sessions + ' sessions')} ${dim(numberFormat(data.exchanges) + ' exchanges')}`);
695
+ const shortName = name.length > 28 ? '...' + name.slice(-25) : name;
696
+ console.log(` ${pad(white(shortName), 30)} ${dim(data.sessions + ' sessions')} ${dim(numberFormat(data.exchanges) + ' exchanges')}`);
644
697
  }
645
698
  console.log();
646
699
 
647
- // Decomposition
700
+ // ── Thinking / Decomposition ──
648
701
  const d = metrics.decomposition.details;
649
- console.log(bold(' DECOMPOSITION'));
650
- console.log(` ${dim(pad('Avg session depth', 30))} ${dim(String(d.avgExchangesPerSession))} ${dim(`(benchmark: ${BENCHMARKS.avgExchangesPerSession})`)}`);
651
- console.log(` ${dim(pad('Avg prompt length', 30))} ${dim(d.avgPromptLength + ' chars')} ${dim(`(benchmark: ${BENCHMARKS.avgPromptLength})`)}`);
652
- console.log(` ${dim(pad('Multi-step sessions', 30))} ${dim(String(d.multiStepSessions) + '/' + d.totalSessions)} ${dim(d.multiStepSessions > d.totalSessions * 0.5 ? '\u2014 strong' : '\u2014 room to grow')}`);
653
- console.log(` ${dim(pad('Single-shot sessions', 30))} ${dim(String(d.singleShotSessions))}`);
654
- console.log(` ${dim(pad('Contextual followups', 30))} ${dim(d.contextualFollowupRatio + '%')} ${dim(d.contextualFollowupRatio > 20 ? '\u2014 builds on context well' : '\u2014 could reference prior work more')}`);
702
+ console.log(` ${bold('\uD83E\uDDE0 THINKING')} ${dim('(weight: 25%)')}`);
703
+ console.log(` ${dim('\u2500'.repeat(50))}`);
704
+ metricRow('Session depth', d.avgExchangesPerSession, BENCHMARKS.avgExchangesPerSession, ' exchanges');
705
+ metricRow('Prompt length', d.avgPromptLength, BENCHMARKS.avgPromptLength, ' chars');
706
+ console.log(` ${dim(pad('Multi-step sessions', 28))} ${white(d.multiStepSessions + '/' + d.totalSessions)} ${dim('(' + Math.round(d.multiStepSessions / Math.max(1, d.totalSessions) * 100) + '%)')}`);
707
+ console.log(` ${dim(pad('Single-shot sessions', 28))} ${white(String(d.singleShotSessions))}`);
708
+ console.log(` ${dim(pad('Contextual followups', 28))} ${white(d.contextualFollowupRatio + '%')}${d.contextualFollowupRatio > 20 ? cyan(' builds on context well') : orange(' could reference prior work more')}`);
655
709
  console.log();
656
710
 
657
- // Debug
711
+ // ── Debugging ──
658
712
  const db = metrics.debugCycles.details;
659
- console.log(bold(' DEBUG CYCLES'));
660
- console.log(` ${dim(pad('Total sequences', 30))} ${dim(String(db.totalDebugSequences))}`);
661
- console.log(` ${dim(pad('Avg turns to resolve', 30))} ${dim(String(db.avgTurnsToResolve))} ${dim(`(benchmark: ${BENCHMARKS.avgTurnsToResolve})`)}`);
662
- console.log(` ${dim(pad('Quick fixes (\u22642 turns)', 30))} ${dim(String(db.quickFixes))}`);
663
- console.log(` ${dim(pad('Extended loops (>5 turns)', 30))} ${dim(String(db.longLoops))} ${dim(db.longLoops === 0 ? '\u2014 zero spirals' : '')}`);
664
- console.log(` ${dim(pad('Specific report ratio', 30))} ${dim(db.specificReportRatio + '%')} ${dim(`(benchmark: ${BENCHMARKS.specificReportRatio}%)`)}`);
665
- console.log(` ${dim(pad('Vague reports', 30))} ${dim(String(db.vagueReports))} ${dim(db.vagueReports === 0 ? '\u2014 never vague' : '')}`);
713
+ console.log(` ${bold('\u26A1 DEBUGGING')} ${dim('(weight: 25%)')}`);
714
+ console.log(` ${dim('\u2500'.repeat(50))}`);
715
+ console.log(` ${dim(pad('Debug sequences', 28))} ${white(String(db.totalDebugSequences))}`);
716
+ metricRow('Turns to resolve', db.avgTurnsToResolve, BENCHMARKS.avgTurnsToResolve, ' avg', true);
717
+ console.log(` ${dim(pad('Quick fixes (1-2 turns)', 28))} ${white(String(db.quickFixes))} ${dim('of ' + db.totalDebugSequences)}`);
718
+ console.log(` ${dim(pad('Extended loops (>5 turns)', 28))} ${db.longLoops === 0 ? green('0 -- zero spirals') : orange(String(db.longLoops))}`);
719
+ metricRow('Specific report ratio', db.specificReportRatio, BENCHMARKS.specificReportRatio, '%');
720
+ console.log(` ${dim(pad('Vague reports', 28))} ${db.vagueReports === 0 ? green('0 -- never vague') : orange(String(db.vagueReports))}`);
666
721
  console.log();
667
722
 
668
- // AI Leverage
723
+ // ── AI Leverage ──
669
724
  const ai = metrics.aiLeverage.details;
670
- console.log(bold(' AI LEVERAGE'));
671
- console.log(` ${dim(pad('Total prompts', 30))} ${dim(numberFormat(ai.totalPrompts))}`);
672
- console.log(` ${dim(pad('Architectural', 30))} ${dim(String(ai.architecturalPrompts))} ${dim(`(${Math.round(ai.architecturalPrompts / Math.max(1, ai.totalPrompts) * 100)}%)`)}`);
673
- console.log(` ${dim(pad('Planning', 30))} ${dim(String(ai.planningPrompts))} ${dim(`(${Math.round(ai.planningPrompts / Math.max(1, ai.totalPrompts) * 100)}%)`)}`);
674
- console.log(` ${dim(pad('Exploratory', 30))} ${dim(String(ai.exploratoryPrompts))} ${dim(`(${Math.round(ai.exploratoryPrompts / Math.max(1, ai.totalPrompts) * 100)}%)`)}`);
675
- console.log(` ${dim(pad('Boilerplate', 30))} ${dim(String(ai.boilerplatePrompts))} ${dim(`(${Math.round(ai.boilerplatePrompts / Math.max(1, ai.totalPrompts) * 100)}%)`)} ${dim(ai.boilerplatePrompts < ai.totalPrompts * 0.05 ? '\u2014 minimal' : '')}`);
676
- console.log(` ${dim(pad('Testing', 30))} ${dim(String(ai.testingPrompts))}`);
677
- console.log(` ${dim(pad('High-level ratio', 30))} ${dim(ai.highLevelRatio + '%')} ${dim(`(benchmark: ${BENCHMARKS.highLevelRatio}%)`)}`);
725
+ const total = Math.max(1, ai.totalPrompts);
726
+ console.log(` ${bold('\uD83D\uDD27 AI LEVERAGE')} ${dim('(weight: 30%)')}`);
727
+ console.log(` ${dim('\u2500'.repeat(50))}`);
728
+ console.log(` ${dim(pad('Total prompts analyzed', 28))} ${white(numberFormat(ai.totalPrompts))}`);
729
+ console.log();
730
+ console.log(` ${dim(' Prompt type breakdown:')}`);
731
+ console.log(` ${dim(pad(' Architectural', 28))} ${white(String(ai.architecturalPrompts))} ${dim('(' + Math.round(ai.architecturalPrompts / total * 100) + '%) design, schema, strategy')}`);
732
+ console.log(` ${dim(pad(' Planning', 28))} ${white(String(ai.planningPrompts))} ${dim('(' + Math.round(ai.planningPrompts / total * 100) + '%) how-should-I, trade-offs')}`);
733
+ console.log(` ${dim(pad(' Exploratory', 28))} ${white(String(ai.exploratoryPrompts))} ${dim('(' + Math.round(ai.exploratoryPrompts / total * 100) + '%) explain, investigate')}`);
734
+ console.log(` ${dim(pad(' Boilerplate', 28))} ${white(String(ai.boilerplatePrompts))} ${dim('(' + Math.round(ai.boilerplatePrompts / total * 100) + '%) CRUD, templates')}${ai.boilerplatePrompts < total * 0.05 ? green(' minimal') : ''}`);
735
+ console.log(` ${dim(pad(' Testing', 28))} ${white(String(ai.testingPrompts))} ${dim('(' + Math.round(ai.testingPrompts / total * 100) + '%)')}`);
736
+ console.log();
737
+ metricRow('High-level ratio', ai.highLevelRatio, BENCHMARKS.highLevelRatio, '%');
678
738
  console.log();
679
739
 
680
- // Session structure
740
+ // ── Session Structure ──
681
741
  const ss = metrics.sessionStructure.details;
682
- console.log(bold(' SESSION STRUCTURE'));
683
- console.log(` ${dim(pad('Context-setting', 30))} ${dim(ss.contextSetRatio + '%')} ${dim(`(benchmark: ${BENCHMARKS.contextSetRatio}%)`)}`);
684
- console.log(` ${dim(pad('Plan before code', 30))} ${dim(ss.planBeforeCodeRatio + '%')}`);
685
- console.log(` ${dim(pad('Review at end', 30))} ${dim(ss.reviewEndRatio + '%')} ${dim(`(benchmark: ${BENCHMARKS.reviewEndRatio}%)`)}`);
686
- console.log(` ${dim(pad('Refinement rate', 30))} ${dim(ss.refinementRatio + '%')} ${dim(`(benchmark: ${BENCHMARKS.refinementRatio}%)`)}`);
687
- console.log(` ${dim(pad('Avg first prompt', 30))} ${dim(ss.avgFirstPromptLength + ' chars')}`);
742
+ console.log(` ${bold('\uD83D\uDCD0 WORKFLOW')} ${dim('(weight: 20%)')}`);
743
+ console.log(` ${dim('\u2500'.repeat(50))}`);
744
+ metricRow('Context-setting', ss.contextSetRatio, BENCHMARKS.contextSetRatio, '%');
745
+ console.log(` ${dim(pad('Plan before code', 28))} ${white(ss.planBeforeCodeRatio + '%')}`);
746
+ metricRow('Review at end', ss.reviewEndRatio, BENCHMARKS.reviewEndRatio, '%');
747
+ metricRow('Refinement rate', ss.refinementRatio, BENCHMARKS.refinementRatio, '%');
748
+ console.log(` ${dim(pad('Avg first prompt length', 28))} ${white(ss.avgFirstPromptLength + ' chars')}`);
688
749
  if (ss.durationDistribution) {
689
750
  const dur = ss.durationDistribution;
690
- console.log(` ${dim(pad('Focused sessions (10-45m)', 30))} ${dim(String(dur.focused))}`);
691
- console.log(` ${dim(pad('Short (<5m)', 30))} ${dim(String(dur.short))}`);
692
- console.log(` ${dim(pad('Medium (5-60m)', 30))} ${dim(String(dur.medium))}`);
693
- console.log(` ${dim(pad('Long (>60m)', 30))} ${dim(String(dur.long))}`);
751
+ console.log();
752
+ console.log(` ${dim(' Session duration:')}`);
753
+ console.log(` ${dim(pad(' Focused (10-45m)', 28))} ${white(String(dur.focused))} ${dim('-- ideal range')}`);
754
+ console.log(` ${dim(pad(' Short (<5m)', 28))} ${white(String(dur.short))}`);
755
+ console.log(` ${dim(pad(' Medium (5-60m)', 28))} ${white(String(dur.medium))}`);
756
+ console.log(` ${dim(pad(' Long (>60m)', 28))} ${white(String(dur.long))}`);
694
757
  }
695
758
  console.log();
696
759
  }
package/src/insights.js CHANGED
@@ -45,6 +45,16 @@ const preflightPatterns = /^(before (we|you|i)|don'?t code|review (first|this|my
45
45
  const testFirstPatterns = /\b(write (the )?tests? (first|before)|test.?driven|TDD|spec first|start with (tests?|specs?))\b/i;
46
46
  const negativeConstraintPatterns = /\b(don'?t|do not|never|avoid|must not|shouldn'?t)\b.*\b(add|create|use|include|change|modify|touch|remove)\b/i;
47
47
 
48
+ // Evidence quality filter (same rules as metric parsers)
49
+ const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
50
+ function isGoodEvidence(prompt) {
51
+ if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
52
+ if (noisePatterns.test(prompt)) return false;
53
+ const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
54
+ if (alpha / prompt.length < 0.4) return false;
55
+ return true;
56
+ }
57
+
48
58
  export function computeSignatures(allSessions, metrics) {
49
59
  const signatures = [];
50
60
  const d = metrics.decomposition.details;
@@ -59,6 +69,12 @@ export function computeSignatures(allSessions, metrics) {
59
69
  let modificationCount = 0;
60
70
  let acceptCount = 0;
61
71
 
72
+ // Capture evidence prompts for each signature type
73
+ let bestPreflightPrompt = null;
74
+ let bestConstraintPrompt = null;
75
+ let bestTestFirstPrompt = null;
76
+ let bestModifyPrompt = null;
77
+
62
78
  for (const session of allSessions) {
63
79
  const { exchanges } = session;
64
80
  if (exchanges.length === 0) continue;
@@ -67,6 +83,9 @@ export function computeSignatures(allSessions, metrics) {
67
83
  const firstPrompt = exchanges[0].userPrompt || '';
68
84
  if (preflightPatterns.test(firstPrompt)) {
69
85
  preflightSessions++;
86
+ if (isGoodEvidence(firstPrompt) && (!bestPreflightPrompt || firstPrompt.length > bestPreflightPrompt.length)) {
87
+ bestPreflightPrompt = firstPrompt;
88
+ }
70
89
  }
71
90
 
72
91
  let hasTestFirst = false;
@@ -76,15 +95,24 @@ export function computeSignatures(allSessions, metrics) {
76
95
 
77
96
  if (constraintPatterns.test(prompt) && negativeConstraintPatterns.test(prompt)) {
78
97
  constraintPrompts++;
98
+ if (isGoodEvidence(prompt) && (!bestConstraintPrompt || prompt.length > bestConstraintPrompt.length)) {
99
+ bestConstraintPrompt = prompt;
100
+ }
79
101
  }
80
102
 
81
103
  if (testFirstPatterns.test(prompt)) {
82
104
  hasTestFirst = true;
105
+ if (isGoodEvidence(prompt) && (!bestTestFirstPrompt || prompt.length > bestTestFirstPrompt.length)) {
106
+ bestTestFirstPrompt = prompt;
107
+ }
83
108
  }
84
109
 
85
110
  // Track modification vs acceptance
86
111
  if (i > 0 && /\b(actually|wait|instead|change|no,?|not quite|modify|tweak)\b/i.test(prompt)) {
87
112
  modificationCount++;
113
+ if (isGoodEvidence(prompt) && (!bestModifyPrompt || prompt.length > bestModifyPrompt.length)) {
114
+ bestModifyPrompt = prompt;
115
+ }
88
116
  } else if (i > 0) {
89
117
  acceptCount++;
90
118
  }
@@ -100,6 +128,7 @@ export function computeSignatures(allSessions, metrics) {
100
128
  signatures.push({
101
129
  name: 'Pre-flight reviews',
102
130
  detail: `You ask AI to review your plan before coding in ${Math.round(preflightRatio * 100)}% of sessions. Only 8% of engineers do this consistently. This correlates with fewer debug cycles.`,
131
+ evidence: bestPreflightPrompt,
103
132
  });
104
133
  }
105
134
 
@@ -109,6 +138,7 @@ export function computeSignatures(allSessions, metrics) {
109
138
  signatures.push({
110
139
  name: 'Constraint-first prompting',
111
140
  detail: `You specify what NOT to do in ${Math.round(constraintRatio * 100)}% of prompts. This is a hallmark of senior architectural thinking that prevents scope creep.`,
141
+ evidence: bestConstraintPrompt,
112
142
  });
113
143
  }
114
144
 
@@ -118,22 +148,25 @@ export function computeSignatures(allSessions, metrics) {
118
148
  signatures.push({
119
149
  name: 'Test-driven AI usage',
120
150
  detail: `You request tests before implementation in ${Math.round(testFirstRatio * 100)}% of sessions. Engineers who do this ship fewer bugs post-merge.`,
151
+ evidence: bestTestFirstPrompt,
121
152
  });
122
153
  }
123
154
 
124
- // Deep session marathons
155
+ // Deep session marathons — evidence is metric-derived, no single prompt
125
156
  if (d.avgExchangesPerSession > BENCHMARKS.avgExchangesPerSession * 2) {
126
157
  signatures.push({
127
158
  name: 'Marathon sessions',
128
159
  detail: `Avg session depth of ${d.avgExchangesPerSession} exchanges is ${Math.round(d.avgExchangesPerSession / BENCHMARKS.avgExchangesPerSession)}x the benchmark (${BENCHMARKS.avgExchangesPerSession}). You sustain deep, focused work.`,
160
+ evidence: null,
129
161
  });
130
162
  }
131
163
 
132
- // Zero vague debugging
164
+ // Zero vague debugging — evidence is the absence of something
133
165
  if (db.vagueReports === 0 && db.totalDebugSequences > 5) {
134
166
  signatures.push({
135
167
  name: 'Precision debugging',
136
168
  detail: `Zero vague error reports across ${db.totalDebugSequences} debug sequences. Every bug report includes specific context. This is rare.`,
169
+ evidence: metrics.debugCycles.examples?.[0]?.prompt || null,
137
170
  });
138
171
  }
139
172
 
@@ -142,6 +175,7 @@ export function computeSignatures(allSessions, metrics) {
142
175
  signatures.push({
143
176
  name: 'Strategic AI usage',
144
177
  detail: `${ai.highLevelRatio}% of prompts are architectural or planning-level (benchmark: ${BENCHMARKS.highLevelRatio}%). You use AI as a thinking partner, not just a code generator.`,
178
+ evidence: metrics.aiLeverage.examples?.[0]?.prompt || null,
145
179
  });
146
180
  }
147
181
 
@@ -152,6 +186,7 @@ export function computeSignatures(allSessions, metrics) {
152
186
  signatures.push({
153
187
  name: 'Critical reviewer',
154
188
  detail: `You modify or redirect AI output in ${Math.round(modRatio * 100)}% of follow-up prompts. This indicates active evaluation rather than passive acceptance.`,
189
+ evidence: bestModifyPrompt,
155
190
  });
156
191
  }
157
192
 
@@ -177,6 +212,7 @@ export function computeWatchPoints(allSessions, metrics) {
177
212
  projectSessions[p].push(s);
178
213
  }
179
214
  let contextRestarts = 0;
215
+ let bestContextRestartPrompt = null;
180
216
  let multiSessionProjects = 0;
181
217
  for (const [, sessions] of Object.entries(projectSessions)) {
182
218
  if (sessions.length < 2) continue;
@@ -186,6 +222,9 @@ export function computeWatchPoints(allSessions, metrics) {
186
222
  // If first prompt doesn't reference previous work, it's a context restart
187
223
  if (firstPrompt.length > 50 && !/\b(continuing|following up|as discussed|last time|previously|where we left|earlier)\b/i.test(firstPrompt)) {
188
224
  contextRestarts++;
225
+ if (isGoodEvidence(firstPrompt) && (!bestContextRestartPrompt || firstPrompt.length > bestContextRestartPrompt.length)) {
226
+ bestContextRestartPrompt = firstPrompt;
227
+ }
189
228
  }
190
229
  }
191
230
  }
@@ -194,6 +233,7 @@ export function computeWatchPoints(allSessions, metrics) {
194
233
  watchPoints.push({
195
234
  name: 'Context amnesia',
196
235
  detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient.`,
236
+ evidence: bestContextRestartPrompt,
197
237
  });
198
238
  }
199
239
 
@@ -214,14 +254,26 @@ export function computeWatchPoints(allSessions, metrics) {
214
254
  watchPoints.push({
215
255
  name: 'Acceptance without review',
216
256
  detail: `You accept AI output without modification in ${Math.round((1 - modRatio) * 100)}% of cases. Top engineers modify or redirect 30%+ of initial suggestions.`,
257
+ evidence: null, // Anti-pattern is the absence of modification
217
258
  });
218
259
  }
219
260
 
220
261
  // Monologue prompting — excessively long first prompts
221
262
  if (d.avgPromptLength > 2000) {
263
+ // Find a representative long prompt
264
+ let bestLongPrompt = null;
265
+ for (const s of allSessions) {
266
+ for (const ex of s.exchanges) {
267
+ const p = ex.userPrompt || '';
268
+ if (p.length > 1500 && p.length < 3000 && isGoodEvidence(p)) {
269
+ if (!bestLongPrompt || p.length > bestLongPrompt.length) bestLongPrompt = p;
270
+ }
271
+ }
272
+ }
222
273
  watchPoints.push({
223
274
  name: 'Monologue prompting',
224
275
  detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output.`,
276
+ evidence: bestLongPrompt,
225
277
  });
226
278
  }
227
279
 
@@ -230,6 +282,7 @@ export function computeWatchPoints(allSessions, metrics) {
230
282
  watchPoints.push({
231
283
  name: 'Missing context',
232
284
  detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections.`,
285
+ evidence: null,
233
286
  });
234
287
  }
235
288
 
@@ -238,6 +291,7 @@ export function computeWatchPoints(allSessions, metrics) {
238
291
  watchPoints.push({
239
292
  name: 'Debug spirals',
240
293
  detail: `${db.longLoops} extended debug loops (>5 turns) detected. When stuck, try providing more specific error context or breaking the problem differently.`,
294
+ evidence: null,
241
295
  });
242
296
  }
243
297