chekk 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/chekk.js +2 -2
- package/package.json +1 -1
- package/src/display.js +122 -59
- package/src/insights.js +56 -2
package/bin/chekk.js
CHANGED
|
@@ -4,7 +4,7 @@ import { execSync, spawn } from 'child_process';
|
|
|
4
4
|
import { Command } from 'commander';
|
|
5
5
|
import { run } from '../src/index.js';
|
|
6
6
|
|
|
7
|
-
const LOCAL_VERSION = '0.4.
|
|
7
|
+
const LOCAL_VERSION = '0.4.3';
|
|
8
8
|
|
|
9
9
|
// ── Auto-update check ──
|
|
10
10
|
// If running from a cached npx install, check if there's a newer version
|
|
@@ -48,7 +48,7 @@ if (!handedOff) {
|
|
|
48
48
|
|
|
49
49
|
program
|
|
50
50
|
.name('chekk')
|
|
51
|
-
.description('The engineering capability score. See how you prompt.')
|
|
51
|
+
.description('The prompt engineering capability score. See how you prompt.')
|
|
52
52
|
.version(LOCAL_VERSION)
|
|
53
53
|
.option('--offline', 'Skip AI prose generation, show data-driven output')
|
|
54
54
|
.option('--verbose', 'Show detailed per-project and per-metric breakdowns')
|
package/package.json
CHANGED
package/src/display.js
CHANGED
|
@@ -41,9 +41,28 @@ function numberFormat(n) {
|
|
|
41
41
|
return String(n);
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
+
// Measure visible display width accounting for wide characters (emoji, CJK)
|
|
45
|
+
function visibleWidth(str) {
|
|
46
|
+
const stripped = str.replace(/\u001b\[[0-9;]*m/g, '');
|
|
47
|
+
let width = 0;
|
|
48
|
+
for (const ch of stripped) {
|
|
49
|
+
const code = ch.codePointAt(0);
|
|
50
|
+
// Emoji and symbols that take 2 terminal columns
|
|
51
|
+
if (code > 0x1F000 || // emoji block
|
|
52
|
+
(code >= 0x2600 && code <= 0x27BF) || // misc symbols
|
|
53
|
+
(code >= 0x2B50 && code <= 0x2B55) || // stars
|
|
54
|
+
(code >= 0xFE00 && code <= 0xFE0F) || // variation selectors
|
|
55
|
+
(code >= 0x1F300 && code <= 0x1FAFF)) { // extended emoji
|
|
56
|
+
width += 2;
|
|
57
|
+
} else {
|
|
58
|
+
width += 1;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return width;
|
|
62
|
+
}
|
|
63
|
+
|
|
44
64
|
function pad(str, len) {
|
|
45
|
-
|
|
46
|
-
return str + ' '.repeat(Math.max(0, len - visible.length));
|
|
65
|
+
return str + ' '.repeat(Math.max(0, len - visibleWidth(str)));
|
|
47
66
|
}
|
|
48
67
|
|
|
49
68
|
// ── Qualitative tier labels for dimensions ──
|
|
@@ -65,19 +84,22 @@ function dimTierColor(score) {
|
|
|
65
84
|
|
|
66
85
|
// ── Snippet helpers ──
|
|
67
86
|
|
|
68
|
-
function cleanPrompt(prompt
|
|
87
|
+
function cleanPrompt(prompt) {
|
|
69
88
|
if (!prompt) return null;
|
|
70
|
-
|
|
71
|
-
if (clean.length > maxLen) {
|
|
72
|
-
clean = clean.slice(0, maxLen - 1) + '\u2026';
|
|
73
|
-
}
|
|
74
|
-
return clean;
|
|
89
|
+
return prompt.replace(/\s+/g, ' ').trim();
|
|
75
90
|
}
|
|
76
91
|
|
|
77
|
-
function displayLabeledSnippet(label, prompt
|
|
78
|
-
const s = cleanPrompt(prompt
|
|
92
|
+
function displayLabeledSnippet(label, prompt) {
|
|
93
|
+
const s = cleanPrompt(prompt);
|
|
79
94
|
if (!s) return;
|
|
80
|
-
|
|
95
|
+
// Wrap the full prompt across multiple lines instead of truncating
|
|
96
|
+
const prefix = `${dim('\u21B3')} ${dim(label + ':')} `;
|
|
97
|
+
const quoted = `\u201C${s}\u201D`;
|
|
98
|
+
const lines = wrapText(quoted, 53);
|
|
99
|
+
console.log(` ${prefix}${dim.italic(lines[0])}`);
|
|
100
|
+
for (let i = 1; i < lines.length; i++) {
|
|
101
|
+
console.log(` ${dim.italic(lines[i])}`);
|
|
102
|
+
}
|
|
81
103
|
}
|
|
82
104
|
|
|
83
105
|
// Cross-dimension filters: reject prompts that clearly belong to another dimension
|
|
@@ -114,8 +136,7 @@ function box(lines, width = 47) {
|
|
|
114
136
|
const out = [];
|
|
115
137
|
out.push(dim(' \u250C' + '\u2500'.repeat(width) + '\u2510'));
|
|
116
138
|
for (const line of lines) {
|
|
117
|
-
const
|
|
118
|
-
const padding = Math.max(0, width - visible.length);
|
|
139
|
+
const padding = Math.max(0, width - visibleWidth(line));
|
|
119
140
|
out.push(dim(' \u2502') + line + ' '.repeat(padding) + dim('\u2502'));
|
|
120
141
|
}
|
|
121
142
|
out.push(dim(' \u2514' + '\u2500'.repeat(width) + '\u2518'));
|
|
@@ -150,8 +171,8 @@ export function displayHeader() {
|
|
|
150
171
|
console.log();
|
|
151
172
|
const lines = [
|
|
152
173
|
'',
|
|
153
|
-
` ${bold.white('chekk')}${dim(' v0.4.
|
|
154
|
-
` ${dim('engineering capability profile')}`,
|
|
174
|
+
` ${bold.white('chekk')}${dim(' v0.4.3')}`,
|
|
175
|
+
` ${dim('prompt engineering capability profile')}`,
|
|
155
176
|
'',
|
|
156
177
|
];
|
|
157
178
|
for (const l of box(lines, 45)) console.log(l);
|
|
@@ -205,10 +226,10 @@ function displayProfileHeader(result, extra = {}) {
|
|
|
205
226
|
|
|
206
227
|
console.log(doubleRule());
|
|
207
228
|
console.log();
|
|
208
|
-
console.log(` ${bold.white('ENGINEERING CAPABILITY PROFILE')}`);
|
|
229
|
+
console.log(` ${bold.white('PROMPT ENGINEERING CAPABILITY PROFILE')}`);
|
|
209
230
|
console.log();
|
|
210
231
|
if (sessionStats) {
|
|
211
|
-
console.log(` ${dim(`Generated ${dateStr} | chekk v0.4.
|
|
232
|
+
console.log(` ${dim(`Generated ${dateStr} | chekk v0.4.3`)}`);
|
|
212
233
|
console.log(` ${dim(`Analysis: ${sessionStats.totalSessions} sessions \u00B7 ${sessionStats.tools.length} tool${sessionStats.tools.length > 1 ? 's' : ''} \u00B7 ${numberFormat(sessionStats.totalExchanges)} exchanges`)}`);
|
|
213
234
|
if (sessionStats.dateRangeShort) {
|
|
214
235
|
console.log(` ${dim(`Period: ${sessionStats.dateRangeShort}`)}`);
|
|
@@ -388,7 +409,7 @@ export function displayNarratives(metrics, prose) {
|
|
|
388
409
|
const shownSnippets = new Set();
|
|
389
410
|
function showLabeledSnippet(label, prompt) {
|
|
390
411
|
if (!prompt) return;
|
|
391
|
-
const s = cleanPrompt(prompt
|
|
412
|
+
const s = cleanPrompt(prompt);
|
|
392
413
|
if (shownSnippets.has(s)) return;
|
|
393
414
|
shownSnippets.add(s);
|
|
394
415
|
displayLabeledSnippet(label, prompt);
|
|
@@ -426,7 +447,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
426
447
|
|
|
427
448
|
function showSnippet(label, prompt) {
|
|
428
449
|
if (!prompt) return;
|
|
429
|
-
const s = cleanPrompt(prompt
|
|
450
|
+
const s = cleanPrompt(prompt);
|
|
430
451
|
if (shownSnippets.has(s)) return;
|
|
431
452
|
shownSnippets.add(s);
|
|
432
453
|
displayLabeledSnippet(label, prompt);
|
|
@@ -480,6 +501,9 @@ function displaySignatures(insights) {
|
|
|
480
501
|
for (const line of lines) {
|
|
481
502
|
console.log(` ${dim(line)}`);
|
|
482
503
|
}
|
|
504
|
+
if (sig.evidence) {
|
|
505
|
+
displayLabeledSnippet('Proof', sig.evidence);
|
|
506
|
+
}
|
|
483
507
|
console.log();
|
|
484
508
|
}
|
|
485
509
|
}
|
|
@@ -499,6 +523,9 @@ function displayWatchPoints(insights) {
|
|
|
499
523
|
for (const line of lines) {
|
|
500
524
|
console.log(` ${dim(line)}`);
|
|
501
525
|
}
|
|
526
|
+
if (wp.evidence) {
|
|
527
|
+
displayLabeledSnippet('Example', wp.evidence);
|
|
528
|
+
}
|
|
502
529
|
console.log();
|
|
503
530
|
}
|
|
504
531
|
}
|
|
@@ -615,7 +642,7 @@ function displayNextSteps(result) {
|
|
|
615
642
|
console.log(` ${dim('\u2191 Copy this to share')}`);
|
|
616
643
|
console.log();
|
|
617
644
|
console.log(doubleRule());
|
|
618
|
-
console.log(` ${dim('chekk.dev \u2014 engineering capability profiles')}`);
|
|
645
|
+
console.log(` ${dim('chekk.dev \u2014 prompt engineering capability profiles')}`);
|
|
619
646
|
console.log();
|
|
620
647
|
}
|
|
621
648
|
|
|
@@ -627,7 +654,32 @@ export function displayVerbose(metrics, sessions) {
|
|
|
627
654
|
console.log(doubleRule());
|
|
628
655
|
console.log(dim('\n DETAILED BREAKDOWN\n'));
|
|
629
656
|
|
|
630
|
-
//
|
|
657
|
+
// Helper: show a metric row with value, benchmark comparison, and verdict
|
|
658
|
+
// lowerIsBetter: true for metrics where lower = better (e.g. turns to resolve)
|
|
659
|
+
function metricRow(label, value, benchmark, unit = '', lowerIsBetter = false) {
|
|
660
|
+
const valStr = typeof value === 'number' ? String(value) : value;
|
|
661
|
+
let verdict = '';
|
|
662
|
+
if (benchmark !== null && benchmark !== undefined && typeof value === 'number') {
|
|
663
|
+
const ratio = value / benchmark;
|
|
664
|
+
if (lowerIsBetter) {
|
|
665
|
+
if (ratio <= 0.5) verdict = green(' ++ faster than benchmark');
|
|
666
|
+
else if (ratio <= 0.8) verdict = cyan(' + faster than benchmark');
|
|
667
|
+
else if (ratio <= 1.1) verdict = dim(' ~ at benchmark');
|
|
668
|
+
else if (ratio <= 1.5) verdict = orange(' - slower than benchmark');
|
|
669
|
+
else verdict = red(' -- well above benchmark');
|
|
670
|
+
} else {
|
|
671
|
+
if (ratio >= 1.5) verdict = green(' ++ above benchmark');
|
|
672
|
+
else if (ratio >= 1.1) verdict = cyan(' + above benchmark');
|
|
673
|
+
else if (ratio >= 0.9) verdict = dim(' ~ at benchmark');
|
|
674
|
+
else if (ratio >= 0.6) verdict = orange(' - below benchmark');
|
|
675
|
+
else verdict = red(' -- well below benchmark');
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
const benchStr = benchmark !== null && benchmark !== undefined ? dim(` (benchmark: ${benchmark}${unit})`) : '';
|
|
679
|
+
console.log(` ${dim(pad(label, 28))} ${white(valStr + unit)}${benchStr}${verdict}`);
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// ── Projects ──
|
|
631
683
|
const projects = {};
|
|
632
684
|
for (const s of sessions) {
|
|
633
685
|
const p = s.project || 'unknown';
|
|
@@ -637,60 +689,71 @@ export function displayVerbose(metrics, sessions) {
|
|
|
637
689
|
projects[p].minutes += s.durationMinutes || 0;
|
|
638
690
|
}
|
|
639
691
|
|
|
640
|
-
console.log(bold('
|
|
692
|
+
console.log(` ${bold('PROJECTS')}`);
|
|
693
|
+
console.log(` ${dim('\u2500'.repeat(50))}`);
|
|
641
694
|
for (const [name, data] of Object.entries(projects).sort((a, b) => b[1].exchanges - a[1].exchanges)) {
|
|
642
|
-
const shortName = name.length >
|
|
643
|
-
console.log(` ${
|
|
695
|
+
const shortName = name.length > 28 ? '...' + name.slice(-25) : name;
|
|
696
|
+
console.log(` ${pad(white(shortName), 30)} ${dim(data.sessions + ' sessions')} ${dim(numberFormat(data.exchanges) + ' exchanges')}`);
|
|
644
697
|
}
|
|
645
698
|
console.log();
|
|
646
699
|
|
|
647
|
-
// Decomposition
|
|
700
|
+
// ── Thinking / Decomposition ──
|
|
648
701
|
const d = metrics.decomposition.details;
|
|
649
|
-
console.log(bold('
|
|
650
|
-
console.log(` ${dim(
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
console.log(` ${dim(pad('
|
|
654
|
-
console.log(` ${dim(pad('
|
|
702
|
+
console.log(` ${bold('\uD83E\uDDE0 THINKING')} ${dim('(weight: 25%)')}`);
|
|
703
|
+
console.log(` ${dim('\u2500'.repeat(50))}`);
|
|
704
|
+
metricRow('Session depth', d.avgExchangesPerSession, BENCHMARKS.avgExchangesPerSession, ' exchanges');
|
|
705
|
+
metricRow('Prompt length', d.avgPromptLength, BENCHMARKS.avgPromptLength, ' chars');
|
|
706
|
+
console.log(` ${dim(pad('Multi-step sessions', 28))} ${white(d.multiStepSessions + '/' + d.totalSessions)} ${dim('(' + Math.round(d.multiStepSessions / Math.max(1, d.totalSessions) * 100) + '%)')}`);
|
|
707
|
+
console.log(` ${dim(pad('Single-shot sessions', 28))} ${white(String(d.singleShotSessions))}`);
|
|
708
|
+
console.log(` ${dim(pad('Contextual followups', 28))} ${white(d.contextualFollowupRatio + '%')}${d.contextualFollowupRatio > 20 ? cyan(' builds on context well') : orange(' could reference prior work more')}`);
|
|
655
709
|
console.log();
|
|
656
710
|
|
|
657
|
-
//
|
|
711
|
+
// ── Debugging ──
|
|
658
712
|
const db = metrics.debugCycles.details;
|
|
659
|
-
console.log(bold('
|
|
660
|
-
console.log(` ${dim(
|
|
661
|
-
console.log(` ${dim(pad('
|
|
662
|
-
|
|
663
|
-
console.log(` ${dim(pad('
|
|
664
|
-
console.log(` ${dim(pad('
|
|
665
|
-
|
|
713
|
+
console.log(` ${bold('\u26A1 DEBUGGING')} ${dim('(weight: 25%)')}`);
|
|
714
|
+
console.log(` ${dim('\u2500'.repeat(50))}`);
|
|
715
|
+
console.log(` ${dim(pad('Debug sequences', 28))} ${white(String(db.totalDebugSequences))}`);
|
|
716
|
+
metricRow('Turns to resolve', db.avgTurnsToResolve, BENCHMARKS.avgTurnsToResolve, ' avg', true);
|
|
717
|
+
console.log(` ${dim(pad('Quick fixes (1-2 turns)', 28))} ${white(String(db.quickFixes))} ${dim('of ' + db.totalDebugSequences)}`);
|
|
718
|
+
console.log(` ${dim(pad('Extended loops (>5 turns)', 28))} ${db.longLoops === 0 ? green('0 -- zero spirals') : orange(String(db.longLoops))}`);
|
|
719
|
+
metricRow('Specific report ratio', db.specificReportRatio, BENCHMARKS.specificReportRatio, '%');
|
|
720
|
+
console.log(` ${dim(pad('Vague reports', 28))} ${db.vagueReports === 0 ? green('0 -- never vague') : orange(String(db.vagueReports))}`);
|
|
666
721
|
console.log();
|
|
667
722
|
|
|
668
|
-
// AI Leverage
|
|
723
|
+
// ── AI Leverage ──
|
|
669
724
|
const ai = metrics.aiLeverage.details;
|
|
670
|
-
|
|
671
|
-
console.log(` ${
|
|
672
|
-
console.log(` ${dim(
|
|
673
|
-
console.log(` ${dim(pad('
|
|
674
|
-
console.log(
|
|
675
|
-
console.log(` ${dim(
|
|
676
|
-
console.log(` ${dim(pad('
|
|
677
|
-
console.log(` ${dim(pad('
|
|
725
|
+
const total = Math.max(1, ai.totalPrompts);
|
|
726
|
+
console.log(` ${bold('\uD83D\uDD27 AI LEVERAGE')} ${dim('(weight: 30%)')}`);
|
|
727
|
+
console.log(` ${dim('\u2500'.repeat(50))}`);
|
|
728
|
+
console.log(` ${dim(pad('Total prompts analyzed', 28))} ${white(numberFormat(ai.totalPrompts))}`);
|
|
729
|
+
console.log();
|
|
730
|
+
console.log(` ${dim(' Prompt type breakdown:')}`);
|
|
731
|
+
console.log(` ${dim(pad(' Architectural', 28))} ${white(String(ai.architecturalPrompts))} ${dim('(' + Math.round(ai.architecturalPrompts / total * 100) + '%) design, schema, strategy')}`);
|
|
732
|
+
console.log(` ${dim(pad(' Planning', 28))} ${white(String(ai.planningPrompts))} ${dim('(' + Math.round(ai.planningPrompts / total * 100) + '%) how-should-I, trade-offs')}`);
|
|
733
|
+
console.log(` ${dim(pad(' Exploratory', 28))} ${white(String(ai.exploratoryPrompts))} ${dim('(' + Math.round(ai.exploratoryPrompts / total * 100) + '%) explain, investigate')}`);
|
|
734
|
+
console.log(` ${dim(pad(' Boilerplate', 28))} ${white(String(ai.boilerplatePrompts))} ${dim('(' + Math.round(ai.boilerplatePrompts / total * 100) + '%) CRUD, templates')}${ai.boilerplatePrompts < total * 0.05 ? green(' minimal') : ''}`);
|
|
735
|
+
console.log(` ${dim(pad(' Testing', 28))} ${white(String(ai.testingPrompts))} ${dim('(' + Math.round(ai.testingPrompts / total * 100) + '%)')}`);
|
|
736
|
+
console.log();
|
|
737
|
+
metricRow('High-level ratio', ai.highLevelRatio, BENCHMARKS.highLevelRatio, '%');
|
|
678
738
|
console.log();
|
|
679
739
|
|
|
680
|
-
// Session
|
|
740
|
+
// ── Session Structure ──
|
|
681
741
|
const ss = metrics.sessionStructure.details;
|
|
682
|
-
console.log(bold('
|
|
683
|
-
console.log(` ${dim(
|
|
684
|
-
|
|
685
|
-
console.log(` ${dim(pad('
|
|
686
|
-
|
|
687
|
-
|
|
742
|
+
console.log(` ${bold('\uD83D\uDCD0 WORKFLOW')} ${dim('(weight: 20%)')}`);
|
|
743
|
+
console.log(` ${dim('\u2500'.repeat(50))}`);
|
|
744
|
+
metricRow('Context-setting', ss.contextSetRatio, BENCHMARKS.contextSetRatio, '%');
|
|
745
|
+
console.log(` ${dim(pad('Plan before code', 28))} ${white(ss.planBeforeCodeRatio + '%')}`);
|
|
746
|
+
metricRow('Review at end', ss.reviewEndRatio, BENCHMARKS.reviewEndRatio, '%');
|
|
747
|
+
metricRow('Refinement rate', ss.refinementRatio, BENCHMARKS.refinementRatio, '%');
|
|
748
|
+
console.log(` ${dim(pad('Avg first prompt length', 28))} ${white(ss.avgFirstPromptLength + ' chars')}`);
|
|
688
749
|
if (ss.durationDistribution) {
|
|
689
750
|
const dur = ss.durationDistribution;
|
|
690
|
-
console.log(
|
|
691
|
-
console.log(` ${dim(
|
|
692
|
-
console.log(` ${dim(pad('
|
|
693
|
-
console.log(` ${dim(pad('
|
|
751
|
+
console.log();
|
|
752
|
+
console.log(` ${dim(' Session duration:')}`);
|
|
753
|
+
console.log(` ${dim(pad(' Focused (10-45m)', 28))} ${white(String(dur.focused))} ${dim('-- ideal range')}`);
|
|
754
|
+
console.log(` ${dim(pad(' Short (<5m)', 28))} ${white(String(dur.short))}`);
|
|
755
|
+
console.log(` ${dim(pad(' Medium (5-60m)', 28))} ${white(String(dur.medium))}`);
|
|
756
|
+
console.log(` ${dim(pad(' Long (>60m)', 28))} ${white(String(dur.long))}`);
|
|
694
757
|
}
|
|
695
758
|
console.log();
|
|
696
759
|
}
|
package/src/insights.js
CHANGED
|
@@ -45,6 +45,16 @@ const preflightPatterns = /^(before (we|you|i)|don'?t code|review (first|this|my
|
|
|
45
45
|
const testFirstPatterns = /\b(write (the )?tests? (first|before)|test.?driven|TDD|spec first|start with (tests?|specs?))\b/i;
|
|
46
46
|
const negativeConstraintPatterns = /\b(don'?t|do not|never|avoid|must not|shouldn'?t)\b.*\b(add|create|use|include|change|modify|touch|remove)\b/i;
|
|
47
47
|
|
|
48
|
+
// Evidence quality filter (same rules as metric parsers)
|
|
49
|
+
const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
|
|
50
|
+
function isGoodEvidence(prompt) {
|
|
51
|
+
if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
|
|
52
|
+
if (noisePatterns.test(prompt)) return false;
|
|
53
|
+
const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
|
|
54
|
+
if (alpha / prompt.length < 0.4) return false;
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
|
|
48
58
|
export function computeSignatures(allSessions, metrics) {
|
|
49
59
|
const signatures = [];
|
|
50
60
|
const d = metrics.decomposition.details;
|
|
@@ -59,6 +69,12 @@ export function computeSignatures(allSessions, metrics) {
|
|
|
59
69
|
let modificationCount = 0;
|
|
60
70
|
let acceptCount = 0;
|
|
61
71
|
|
|
72
|
+
// Capture evidence prompts for each signature type
|
|
73
|
+
let bestPreflightPrompt = null;
|
|
74
|
+
let bestConstraintPrompt = null;
|
|
75
|
+
let bestTestFirstPrompt = null;
|
|
76
|
+
let bestModifyPrompt = null;
|
|
77
|
+
|
|
62
78
|
for (const session of allSessions) {
|
|
63
79
|
const { exchanges } = session;
|
|
64
80
|
if (exchanges.length === 0) continue;
|
|
@@ -67,6 +83,9 @@ export function computeSignatures(allSessions, metrics) {
|
|
|
67
83
|
const firstPrompt = exchanges[0].userPrompt || '';
|
|
68
84
|
if (preflightPatterns.test(firstPrompt)) {
|
|
69
85
|
preflightSessions++;
|
|
86
|
+
if (isGoodEvidence(firstPrompt) && (!bestPreflightPrompt || firstPrompt.length > bestPreflightPrompt.length)) {
|
|
87
|
+
bestPreflightPrompt = firstPrompt;
|
|
88
|
+
}
|
|
70
89
|
}
|
|
71
90
|
|
|
72
91
|
let hasTestFirst = false;
|
|
@@ -76,15 +95,24 @@ export function computeSignatures(allSessions, metrics) {
|
|
|
76
95
|
|
|
77
96
|
if (constraintPatterns.test(prompt) && negativeConstraintPatterns.test(prompt)) {
|
|
78
97
|
constraintPrompts++;
|
|
98
|
+
if (isGoodEvidence(prompt) && (!bestConstraintPrompt || prompt.length > bestConstraintPrompt.length)) {
|
|
99
|
+
bestConstraintPrompt = prompt;
|
|
100
|
+
}
|
|
79
101
|
}
|
|
80
102
|
|
|
81
103
|
if (testFirstPatterns.test(prompt)) {
|
|
82
104
|
hasTestFirst = true;
|
|
105
|
+
if (isGoodEvidence(prompt) && (!bestTestFirstPrompt || prompt.length > bestTestFirstPrompt.length)) {
|
|
106
|
+
bestTestFirstPrompt = prompt;
|
|
107
|
+
}
|
|
83
108
|
}
|
|
84
109
|
|
|
85
110
|
// Track modification vs acceptance
|
|
86
111
|
if (i > 0 && /\b(actually|wait|instead|change|no,?|not quite|modify|tweak)\b/i.test(prompt)) {
|
|
87
112
|
modificationCount++;
|
|
113
|
+
if (isGoodEvidence(prompt) && (!bestModifyPrompt || prompt.length > bestModifyPrompt.length)) {
|
|
114
|
+
bestModifyPrompt = prompt;
|
|
115
|
+
}
|
|
88
116
|
} else if (i > 0) {
|
|
89
117
|
acceptCount++;
|
|
90
118
|
}
|
|
@@ -100,6 +128,7 @@ export function computeSignatures(allSessions, metrics) {
|
|
|
100
128
|
signatures.push({
|
|
101
129
|
name: 'Pre-flight reviews',
|
|
102
130
|
detail: `You ask AI to review your plan before coding in ${Math.round(preflightRatio * 100)}% of sessions. Only 8% of engineers do this consistently. This correlates with fewer debug cycles.`,
|
|
131
|
+
evidence: bestPreflightPrompt,
|
|
103
132
|
});
|
|
104
133
|
}
|
|
105
134
|
|
|
@@ -109,6 +138,7 @@ export function computeSignatures(allSessions, metrics) {
|
|
|
109
138
|
signatures.push({
|
|
110
139
|
name: 'Constraint-first prompting',
|
|
111
140
|
detail: `You specify what NOT to do in ${Math.round(constraintRatio * 100)}% of prompts. This is a hallmark of senior architectural thinking that prevents scope creep.`,
|
|
141
|
+
evidence: bestConstraintPrompt,
|
|
112
142
|
});
|
|
113
143
|
}
|
|
114
144
|
|
|
@@ -118,22 +148,25 @@ export function computeSignatures(allSessions, metrics) {
|
|
|
118
148
|
signatures.push({
|
|
119
149
|
name: 'Test-driven AI usage',
|
|
120
150
|
detail: `You request tests before implementation in ${Math.round(testFirstRatio * 100)}% of sessions. Engineers who do this ship fewer bugs post-merge.`,
|
|
151
|
+
evidence: bestTestFirstPrompt,
|
|
121
152
|
});
|
|
122
153
|
}
|
|
123
154
|
|
|
124
|
-
// Deep session marathons
|
|
155
|
+
// Deep session marathons — evidence is metric-derived, no single prompt
|
|
125
156
|
if (d.avgExchangesPerSession > BENCHMARKS.avgExchangesPerSession * 2) {
|
|
126
157
|
signatures.push({
|
|
127
158
|
name: 'Marathon sessions',
|
|
128
159
|
detail: `Avg session depth of ${d.avgExchangesPerSession} exchanges is ${Math.round(d.avgExchangesPerSession / BENCHMARKS.avgExchangesPerSession)}x the benchmark (${BENCHMARKS.avgExchangesPerSession}). You sustain deep, focused work.`,
|
|
160
|
+
evidence: null,
|
|
129
161
|
});
|
|
130
162
|
}
|
|
131
163
|
|
|
132
|
-
// Zero vague debugging
|
|
164
|
+
// Zero vague debugging — evidence is the absence of something
|
|
133
165
|
if (db.vagueReports === 0 && db.totalDebugSequences > 5) {
|
|
134
166
|
signatures.push({
|
|
135
167
|
name: 'Precision debugging',
|
|
136
168
|
detail: `Zero vague error reports across ${db.totalDebugSequences} debug sequences. Every bug report includes specific context. This is rare.`,
|
|
169
|
+
evidence: metrics.debugCycles.examples?.[0]?.prompt || null,
|
|
137
170
|
});
|
|
138
171
|
}
|
|
139
172
|
|
|
@@ -142,6 +175,7 @@ export function computeSignatures(allSessions, metrics) {
|
|
|
142
175
|
signatures.push({
|
|
143
176
|
name: 'Strategic AI usage',
|
|
144
177
|
detail: `${ai.highLevelRatio}% of prompts are architectural or planning-level (benchmark: ${BENCHMARKS.highLevelRatio}%). You use AI as a thinking partner, not just a code generator.`,
|
|
178
|
+
evidence: metrics.aiLeverage.examples?.[0]?.prompt || null,
|
|
145
179
|
});
|
|
146
180
|
}
|
|
147
181
|
|
|
@@ -152,6 +186,7 @@ export function computeSignatures(allSessions, metrics) {
|
|
|
152
186
|
signatures.push({
|
|
153
187
|
name: 'Critical reviewer',
|
|
154
188
|
detail: `You modify or redirect AI output in ${Math.round(modRatio * 100)}% of follow-up prompts. This indicates active evaluation rather than passive acceptance.`,
|
|
189
|
+
evidence: bestModifyPrompt,
|
|
155
190
|
});
|
|
156
191
|
}
|
|
157
192
|
|
|
@@ -177,6 +212,7 @@ export function computeWatchPoints(allSessions, metrics) {
|
|
|
177
212
|
projectSessions[p].push(s);
|
|
178
213
|
}
|
|
179
214
|
let contextRestarts = 0;
|
|
215
|
+
let bestContextRestartPrompt = null;
|
|
180
216
|
let multiSessionProjects = 0;
|
|
181
217
|
for (const [, sessions] of Object.entries(projectSessions)) {
|
|
182
218
|
if (sessions.length < 2) continue;
|
|
@@ -186,6 +222,9 @@ export function computeWatchPoints(allSessions, metrics) {
|
|
|
186
222
|
// If first prompt doesn't reference previous work, it's a context restart
|
|
187
223
|
if (firstPrompt.length > 50 && !/\b(continuing|following up|as discussed|last time|previously|where we left|earlier)\b/i.test(firstPrompt)) {
|
|
188
224
|
contextRestarts++;
|
|
225
|
+
if (isGoodEvidence(firstPrompt) && (!bestContextRestartPrompt || firstPrompt.length > bestContextRestartPrompt.length)) {
|
|
226
|
+
bestContextRestartPrompt = firstPrompt;
|
|
227
|
+
}
|
|
189
228
|
}
|
|
190
229
|
}
|
|
191
230
|
}
|
|
@@ -194,6 +233,7 @@ export function computeWatchPoints(allSessions, metrics) {
|
|
|
194
233
|
watchPoints.push({
|
|
195
234
|
name: 'Context amnesia',
|
|
196
235
|
detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient.`,
|
|
236
|
+
evidence: bestContextRestartPrompt,
|
|
197
237
|
});
|
|
198
238
|
}
|
|
199
239
|
|
|
@@ -214,14 +254,26 @@ export function computeWatchPoints(allSessions, metrics) {
|
|
|
214
254
|
watchPoints.push({
|
|
215
255
|
name: 'Acceptance without review',
|
|
216
256
|
detail: `You accept AI output without modification in ${Math.round((1 - modRatio) * 100)}% of cases. Top engineers modify or redirect 30%+ of initial suggestions.`,
|
|
257
|
+
evidence: null, // Anti-pattern is the absence of modification
|
|
217
258
|
});
|
|
218
259
|
}
|
|
219
260
|
|
|
220
261
|
// Monologue prompting — excessively long first prompts
|
|
221
262
|
if (d.avgPromptLength > 2000) {
|
|
263
|
+
// Find a representative long prompt
|
|
264
|
+
let bestLongPrompt = null;
|
|
265
|
+
for (const s of allSessions) {
|
|
266
|
+
for (const ex of s.exchanges) {
|
|
267
|
+
const p = ex.userPrompt || '';
|
|
268
|
+
if (p.length > 1500 && p.length < 3000 && isGoodEvidence(p)) {
|
|
269
|
+
if (!bestLongPrompt || p.length > bestLongPrompt.length) bestLongPrompt = p;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
}
|
|
222
273
|
watchPoints.push({
|
|
223
274
|
name: 'Monologue prompting',
|
|
224
275
|
detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output.`,
|
|
276
|
+
evidence: bestLongPrompt,
|
|
225
277
|
});
|
|
226
278
|
}
|
|
227
279
|
|
|
@@ -230,6 +282,7 @@ export function computeWatchPoints(allSessions, metrics) {
|
|
|
230
282
|
watchPoints.push({
|
|
231
283
|
name: 'Missing context',
|
|
232
284
|
detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections.`,
|
|
285
|
+
evidence: null,
|
|
233
286
|
});
|
|
234
287
|
}
|
|
235
288
|
|
|
@@ -238,6 +291,7 @@ export function computeWatchPoints(allSessions, metrics) {
|
|
|
238
291
|
watchPoints.push({
|
|
239
292
|
name: 'Debug spirals',
|
|
240
293
|
detail: `${db.longLoops} extended debug loops (>5 turns) detected. When stuck, try providing more specific error context or breaking the problem differently.`,
|
|
294
|
+
evidence: null,
|
|
241
295
|
});
|
|
242
296
|
}
|
|
243
297
|
|