chekk 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/chekk.js +1 -1
- package/package.json +1 -1
- package/src/display.js +26 -4
- package/src/insights.js +10 -2
- package/src/metrics/ai-leverage.js +13 -3
- package/src/metrics/debug-cycles.js +17 -3
- package/src/metrics/decomposition.js +12 -2
- package/src/metrics/session-structure.js +12 -2
package/bin/chekk.js
CHANGED
|
@@ -4,7 +4,7 @@ import { execSync, spawn } from 'child_process';
|
|
|
4
4
|
import { Command } from 'commander';
|
|
5
5
|
import { run } from '../src/index.js';
|
|
6
6
|
|
|
7
|
-
const LOCAL_VERSION = '0.4.
|
|
7
|
+
const LOCAL_VERSION = '0.4.1';
|
|
8
8
|
|
|
9
9
|
// ── Auto-update check ──
|
|
10
10
|
// If running from a cached npx install, check if there's a newer version
|
package/package.json
CHANGED
package/src/display.js
CHANGED
|
@@ -80,10 +80,32 @@ function displayLabeledSnippet(label, prompt, maxLen = 120) {
|
|
|
80
80
|
console.log(` ${dim('\u21B3')} ${dim(label + ':')} ${dim.italic('\u201C' + s + '\u201D')}`);
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
// Cross-dimension filters: reject prompts that clearly belong to another dimension
|
|
84
|
+
const architecturalRe = /\b(architect|design|refactor|redesign|restructure|system design|data model|schema|api design|infrastructure|migration|strategy)\b/i;
|
|
85
|
+
const debugRe = /\b(error|bug|broken|crash|fail|exception|traceback|stack trace|doesn'?t work|not working|TypeError|SyntaxError|ImportError|ReferenceError|500|502|503|404|CORS)\b/i;
|
|
86
|
+
const planningRe = /\b(plan|breakdown|break down|think through|help me think|pros and cons|how should|code review|audit)\b/i;
|
|
87
|
+
|
|
88
|
+
// For each dimension, prompts matching these patterns are *excluded* as evidence
|
|
89
|
+
const dimensionExclusions = {
|
|
90
|
+
'specific_report': [architecturalRe, planningRe],
|
|
91
|
+
'quick_fix': [architecturalRe, planningRe],
|
|
92
|
+
'architectural': [debugRe],
|
|
93
|
+
'planning': [debugRe],
|
|
94
|
+
'exploratory': [debugRe],
|
|
95
|
+
'decomposition': [],
|
|
96
|
+
'followup': [],
|
|
97
|
+
'context_setting': [],
|
|
98
|
+
'refinement': [],
|
|
99
|
+
};
|
|
100
|
+
|
|
83
101
|
function pickExample(examples, type) {
|
|
84
102
|
if (!examples || !examples.length) return null;
|
|
85
|
-
const
|
|
86
|
-
|
|
103
|
+
const exclusions = dimensionExclusions[type] || [];
|
|
104
|
+
// Prefer a match that doesn't trigger exclusion patterns
|
|
105
|
+
const candidates = examples.filter(e => e.type === type);
|
|
106
|
+
if (candidates.length === 0) return null;
|
|
107
|
+
const clean = candidates.find(e => !exclusions.some(re => re.test(e.prompt)));
|
|
108
|
+
return (clean || candidates[0]).prompt;
|
|
87
109
|
}
|
|
88
110
|
|
|
89
111
|
// ── Box drawing ──
|
|
@@ -128,7 +150,7 @@ export function displayHeader() {
|
|
|
128
150
|
console.log();
|
|
129
151
|
const lines = [
|
|
130
152
|
'',
|
|
131
|
-
` ${bold.white('chekk')}${dim(' v0.4.
|
|
153
|
+
` ${bold.white('chekk')}${dim(' v0.4.1')}`,
|
|
132
154
|
` ${dim('engineering capability profile')}`,
|
|
133
155
|
'',
|
|
134
156
|
];
|
|
@@ -186,7 +208,7 @@ function displayProfileHeader(result, extra = {}) {
|
|
|
186
208
|
console.log(` ${bold.white('ENGINEERING CAPABILITY PROFILE')}`);
|
|
187
209
|
console.log();
|
|
188
210
|
if (sessionStats) {
|
|
189
|
-
console.log(` ${dim(`Generated ${dateStr} | chekk v0.4.
|
|
211
|
+
console.log(` ${dim(`Generated ${dateStr} | chekk v0.4.1`)}`);
|
|
190
212
|
console.log(` ${dim(`Analysis: ${sessionStats.totalSessions} sessions \u00B7 ${sessionStats.tools.length} tool${sessionStats.tools.length > 1 ? 's' : ''} \u00B7 ${numberFormat(sessionStats.totalExchanges)} exchanges`)}`);
|
|
191
213
|
if (sessionStats.dateRangeShort) {
|
|
192
214
|
console.log(` ${dim(`Period: ${sessionStats.dateRangeShort}`)}`);
|
package/src/insights.js
CHANGED
|
@@ -417,11 +417,11 @@ export function generateAssessment(result, metrics, signatures, watchPoints) {
|
|
|
417
417
|
const weakest = dims[dims.length - 1];
|
|
418
418
|
|
|
419
419
|
// Build assessment parts
|
|
420
|
-
let assessment = `This engineer demonstrates ${dimQualitative(strongest.score)} ${strongest.label}`;
|
|
420
|
+
let assessment = `This engineer demonstrates ${dimQualitative(strongest.score).toLowerCase()} ${strongest.label}`;
|
|
421
421
|
|
|
422
422
|
// Add signature mention if available
|
|
423
423
|
if (signatures.length > 0) {
|
|
424
|
-
assessment += ` with a distinctive pattern of ${signatures[0].name
|
|
424
|
+
assessment += ` with a distinctive pattern of ${formatSignatureName(signatures[0].name)}`;
|
|
425
425
|
}
|
|
426
426
|
assessment += '.';
|
|
427
427
|
|
|
@@ -466,6 +466,14 @@ function dimQualitative(score) {
|
|
|
466
466
|
return 'Early-stage';
|
|
467
467
|
}
|
|
468
468
|
|
|
469
|
+
// Lowercase a signature name for prose while preserving acronyms like "AI", "TDD"
|
|
470
|
+
function formatSignatureName(name) {
|
|
471
|
+
return name
|
|
472
|
+
.toLowerCase()
|
|
473
|
+
.replace(/\bai\b/g, 'AI')
|
|
474
|
+
.replace(/\btdd\b/g, 'TDD');
|
|
475
|
+
}
|
|
476
|
+
|
|
469
477
|
// ══════════════════════════════════════════════
|
|
470
478
|
// CONFIDENCE — Data volume indicator
|
|
471
479
|
// ══════════════════════════════════════════════
|
|
@@ -11,6 +11,16 @@
|
|
|
11
11
|
* - Diversity of tool usage (not just "write code" but also explore, analyze, test)
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
|
+
// ── Evidence quality filter ──
|
|
15
|
+
const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
|
|
16
|
+
function isGoodEvidence(prompt) {
|
|
17
|
+
if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
|
|
18
|
+
if (noisePatterns.test(prompt)) return false;
|
|
19
|
+
const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
|
|
20
|
+
if (alpha / prompt.length < 0.4) return false;
|
|
21
|
+
return true;
|
|
22
|
+
}
|
|
23
|
+
|
|
14
24
|
const architecturalPatterns = /\b(architect|design|refactor|redesign|restructure|system design|data model|schema|api design|interface|abstract|pattern|trade-?off|scalab|approach|strategy|migration|infrastructure)\b/i;
|
|
15
25
|
const planningPatterns = /\b(plan|breakdown|break down|think through|help me think|what('?s| is) the best (way|approach)|how should (i|we)|pros and cons|options for|compare|evaluate|review my|code review|audit)\b/i;
|
|
16
26
|
const exploratoryPatterns = /\b(explain|understand|how does|what does|why does|walk me through|investigate|diagnose|analyze|explore|deep dive|look into)\b/i;
|
|
@@ -53,15 +63,15 @@ export function computeAILeverage(sessions) {
|
|
|
53
63
|
// Categorize prompt type
|
|
54
64
|
if (architecturalPatterns.test(prompt)) {
|
|
55
65
|
architecturalPrompts++;
|
|
56
|
-
if (prompt.length > bestArchLen) { bestArchLen = prompt.length; bestArchPrompt = prompt; }
|
|
66
|
+
if (isGoodEvidence(prompt) && prompt.length > bestArchLen) { bestArchLen = prompt.length; bestArchPrompt = prompt; }
|
|
57
67
|
}
|
|
58
68
|
if (planningPatterns.test(prompt)) {
|
|
59
69
|
planningPrompts++;
|
|
60
|
-
if (prompt.length > bestPlanLen) { bestPlanLen = prompt.length; bestPlanPrompt = prompt; }
|
|
70
|
+
if (isGoodEvidence(prompt) && prompt.length > bestPlanLen) { bestPlanLen = prompt.length; bestPlanPrompt = prompt; }
|
|
61
71
|
}
|
|
62
72
|
if (exploratoryPatterns.test(prompt)) {
|
|
63
73
|
exploratoryPrompts++;
|
|
64
|
-
if (prompt.length > bestExploreLen) { bestExploreLen = prompt.length; bestExplorePrompt = prompt; }
|
|
74
|
+
if (isGoodEvidence(prompt) && prompt.length > bestExploreLen) { bestExploreLen = prompt.length; bestExplorePrompt = prompt; }
|
|
65
75
|
}
|
|
66
76
|
if (boilerplatePatterns.test(prompt)) boilerplatePrompts++;
|
|
67
77
|
if (testingPatterns.test(prompt)) testingPrompts++;
|
|
@@ -10,6 +10,19 @@
|
|
|
10
10
|
* - "it's still broken" vs targeted debug prompts
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
+
// ── Evidence quality filter ──
|
|
14
|
+
// Prompts used as evidence should be human-written, readable, and illustrative.
|
|
15
|
+
// Reject system-generated context, raw log pastes, and extreme lengths.
|
|
16
|
+
const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
|
|
17
|
+
function isGoodEvidence(prompt) {
|
|
18
|
+
if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
|
|
19
|
+
if (noisePatterns.test(prompt)) return false;
|
|
20
|
+
// Reject if >40% of content is non-alpha (log lines, stack traces, JSON blobs)
|
|
21
|
+
const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
|
|
22
|
+
if (alpha / prompt.length < 0.4) return false;
|
|
23
|
+
return true;
|
|
24
|
+
}
|
|
25
|
+
|
|
13
26
|
const errorPatterns = /\b(error|bug|broken|crash|fail|exception|traceback|stack trace|doesn'?t work|not working|issue|problem|wrong)\b/i;
|
|
14
27
|
const vaguePhrases = /^(it'?s? (?:still )?(?:not working|broken|wrong|failing))|^(fix it|try again|still (?:the same|broken|failing|not working))|^(same (?:error|issue|problem|thing))/i;
|
|
15
28
|
const specificDebugPatterns = /\b(line \d+|TypeError|SyntaxError|ImportError|ReferenceError|ValueError|KeyError|AttributeError|NoneType|undefined is not|cannot read prop|stack trace|traceback|\.py:\d+|\.ts:\d+|\.js:\d+|status (?:code )?\d{3}|HTTP \d{3}|ENOENT|EACCES|CORS|404|500|502|503)\b/i;
|
|
@@ -57,8 +70,9 @@ export function computeDebugCycles(sessions) {
|
|
|
57
70
|
}
|
|
58
71
|
if (specificDebugPatterns.test(prompt) || prompt.length > 200) {
|
|
59
72
|
specificReports++;
|
|
60
|
-
// Track best specific report
|
|
61
|
-
|
|
73
|
+
// Track best specific report — require actual debug pattern match
|
|
74
|
+
// and readable evidence quality
|
|
75
|
+
if (specificDebugPatterns.test(prompt) && isGoodEvidence(prompt) && prompt.length > bestSpecificLen) {
|
|
62
76
|
bestSpecificLen = prompt.length;
|
|
63
77
|
bestSpecificReport = prompt;
|
|
64
78
|
}
|
|
@@ -69,7 +83,7 @@ export function computeDebugCycles(sessions) {
|
|
|
69
83
|
totalTurnsToResolve += debugTurnCount;
|
|
70
84
|
if (debugTurnCount <= 2) {
|
|
71
85
|
quickFixes++;
|
|
72
|
-
if (!bestQuickFix) bestQuickFix = debugStartPrompt;
|
|
86
|
+
if (!bestQuickFix && isGoodEvidence(debugStartPrompt)) bestQuickFix = debugStartPrompt;
|
|
73
87
|
}
|
|
74
88
|
if (debugTurnCount > 5) longLoops++;
|
|
75
89
|
inDebugMode = false;
|
|
@@ -11,6 +11,16 @@
|
|
|
11
11
|
* - Follow-up prompts that reference or build on previous context
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
|
+
// ── Evidence quality filter ──
|
|
15
|
+
const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
|
|
16
|
+
function isGoodEvidence(prompt) {
|
|
17
|
+
if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
|
|
18
|
+
if (noisePatterns.test(prompt)) return false;
|
|
19
|
+
const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
|
|
20
|
+
if (alpha / prompt.length < 0.4) return false;
|
|
21
|
+
return true;
|
|
22
|
+
}
|
|
23
|
+
|
|
14
24
|
export function computeDecomposition(sessions) {
|
|
15
25
|
if (sessions.length === 0) return { score: 50, details: {} };
|
|
16
26
|
|
|
@@ -51,7 +61,7 @@ export function computeDecomposition(sessions) {
|
|
|
51
61
|
if (len < 100) shortPromptCount++;
|
|
52
62
|
|
|
53
63
|
// Track decomposition examples (multi-sentence prompts showing task breakdown)
|
|
54
|
-
if (
|
|
64
|
+
if (isGoodEvidence(prompt)) {
|
|
55
65
|
decompCandidates.push(prompt);
|
|
56
66
|
}
|
|
57
67
|
|
|
@@ -60,7 +70,7 @@ export function computeDecomposition(sessions) {
|
|
|
60
70
|
if (followupPatterns.test(prompt) || refinementPatterns.test(prompt)) {
|
|
61
71
|
contextualFollowups++;
|
|
62
72
|
// Capture best followup example
|
|
63
|
-
if (!bestFollowupPrompt || prompt.length > bestFollowupPrompt.length) {
|
|
73
|
+
if (isGoodEvidence(prompt) && (!bestFollowupPrompt || prompt.length > bestFollowupPrompt.length)) {
|
|
64
74
|
bestFollowupPrompt = prompt;
|
|
65
75
|
}
|
|
66
76
|
}
|
|
@@ -11,6 +11,16 @@
|
|
|
11
11
|
* - Modification rate of AI output (shows critical review)
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
|
+
// ── Evidence quality filter ──
|
|
15
|
+
const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
|
|
16
|
+
function isGoodEvidence(prompt) {
|
|
17
|
+
if (!prompt || prompt.length < 40 || prompt.length > 600) return false;
|
|
18
|
+
if (noisePatterns.test(prompt)) return false;
|
|
19
|
+
const alpha = prompt.replace(/[^a-zA-Z]/g, '').length;
|
|
20
|
+
if (alpha / prompt.length < 0.4) return false;
|
|
21
|
+
return true;
|
|
22
|
+
}
|
|
23
|
+
|
|
14
24
|
const contextSettingPatterns = /^(i('?m| am) (working on|building|trying to|looking at)|we need to|the goal is|here'?s (the|what)|context:|background:|i have a|there'?s a|i want to|let me explain)/i;
|
|
15
25
|
const planningStartPatterns = /^(let'?s (plan|think|figure|start by)|first,? (let'?s|we should)|before we (start|begin|code)|the plan is|step 1|here'?s (my|the) plan)/i;
|
|
16
26
|
const reviewPatterns = /\b(looks good|ship it|deploy|push it|commit|merge|let'?s go|lgtm|approved|test it|run (the )?tests|build it|does this look|review this|check this)\b/i;
|
|
@@ -52,7 +62,7 @@ export function computeSessionStructure(sessions) {
|
|
|
52
62
|
if (contextSettingPatterns.test(firstPrompt) || firstPrompt.length > 200) {
|
|
53
63
|
contextSetSessions++;
|
|
54
64
|
// Track best context-setting prompt
|
|
55
|
-
if (firstPrompt.length > bestContextLen) {
|
|
65
|
+
if (isGoodEvidence(firstPrompt) && firstPrompt.length > bestContextLen) {
|
|
56
66
|
bestContextLen = firstPrompt.length;
|
|
57
67
|
bestContextPrompt = firstPrompt;
|
|
58
68
|
}
|
|
@@ -77,7 +87,7 @@ export function computeSessionStructure(sessions) {
|
|
|
77
87
|
if (refinementPatterns.test(prompt)) {
|
|
78
88
|
refinementCount++;
|
|
79
89
|
// Track best refinement example
|
|
80
|
-
if (!bestRefinementPrompt || prompt.length > bestRefinementPrompt.length) {
|
|
90
|
+
if (isGoodEvidence(prompt) && (!bestRefinementPrompt || prompt.length > bestRefinementPrompt.length)) {
|
|
81
91
|
bestRefinementPrompt = prompt;
|
|
82
92
|
}
|
|
83
93
|
}
|