chekk 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/chekk.js +1 -1
- package/package.json +1 -1
- package/src/display.js +183 -28
- package/src/index.js +2 -2
- package/src/insights.js +49 -6
package/bin/chekk.js
CHANGED
|
@@ -4,7 +4,7 @@ import { execSync, spawn } from 'child_process';
|
|
|
4
4
|
import { Command } from 'commander';
|
|
5
5
|
import { run } from '../src/index.js';
|
|
6
6
|
|
|
7
|
-
const LOCAL_VERSION = '0.5.
|
|
7
|
+
const LOCAL_VERSION = '0.5.1';
|
|
8
8
|
|
|
9
9
|
// ── Auto-update check ──
|
|
10
10
|
// If running from a cached npx install, check if there's a newer version
|
package/package.json
CHANGED
package/src/display.js
CHANGED
|
@@ -173,7 +173,7 @@ export function displayHeader() {
|
|
|
173
173
|
console.log();
|
|
174
174
|
const lines = [
|
|
175
175
|
'',
|
|
176
|
-
` ${bold.white('chekk')}${dim(' v0.5.
|
|
176
|
+
` ${bold.white('chekk')}${dim(' v0.5.1')}`,
|
|
177
177
|
` ${dim('prompt engineering capability profile')}`,
|
|
178
178
|
'',
|
|
179
179
|
];
|
|
@@ -231,7 +231,7 @@ function displayProfileHeader(result, extra = {}) {
|
|
|
231
231
|
console.log(` ${bold.white('PROMPT ENGINEERING CAPABILITY PROFILE')}`);
|
|
232
232
|
console.log();
|
|
233
233
|
if (sessionStats) {
|
|
234
|
-
console.log(` ${dim(`Generated ${dateStr} | chekk v0.5.
|
|
234
|
+
console.log(` ${dim(`Generated ${dateStr} | chekk v0.5.1`)}`);
|
|
235
235
|
console.log(` ${dim(`Analysis: ${sessionStats.totalSessions} sessions \u00B7 ${sessionStats.tools.length} tool${sessionStats.tools.length > 1 ? 's' : ''} \u00B7 ${numberFormat(sessionStats.totalExchanges)} exchanges`)}`);
|
|
236
236
|
if (sessionStats.dateRangeShort) {
|
|
237
237
|
console.log(` ${dim(`Period: ${sessionStats.dateRangeShort}`)}`);
|
|
@@ -412,6 +412,7 @@ export function displayTokenEfficiency(tokenEfficiency, metrics) {
|
|
|
412
412
|
|
|
413
413
|
// Format percentages with appropriate precision
|
|
414
414
|
function fmtPct(pct) {
|
|
415
|
+
if (pct >= 99.5) return pct.toFixed(1) + '%';
|
|
415
416
|
if (pct >= 10) return Math.round(pct) + '%';
|
|
416
417
|
if (pct >= 1) return pct.toFixed(1) + '%';
|
|
417
418
|
if (pct > 0) return pct.toFixed(2) + '%';
|
|
@@ -470,20 +471,93 @@ export function displayTokenEfficiency(tokenEfficiency, metrics) {
|
|
|
470
471
|
}
|
|
471
472
|
|
|
472
473
|
// ── Token cost evidence from metrics ──
|
|
473
|
-
displayTokenEvidence(metrics);
|
|
474
|
+
displayTokenEvidence(metrics, tokenEfficiency);
|
|
474
475
|
}
|
|
475
476
|
|
|
476
|
-
|
|
477
|
+
// ── Per-dimension cost annotation ──
|
|
478
|
+
// Shows a single cost insight line under each scored dimension
|
|
479
|
+
|
|
480
|
+
function dollarFormat(amount) {
|
|
481
|
+
if (amount >= 1000) return '$' + (amount / 1000).toFixed(1).replace(/\.0$/, '') + 'k';
|
|
482
|
+
if (amount >= 100) return '$' + Math.round(amount);
|
|
483
|
+
if (amount >= 1) return '$' + amount.toFixed(2);
|
|
484
|
+
return '$' + amount.toFixed(2);
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
function displayDimensionCostLine(dimensionName, metrics, tokenEfficiency) {
|
|
488
|
+
if (!tokenEfficiency || !tokenEfficiency.hasData) return;
|
|
489
|
+
|
|
490
|
+
const pricing = tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal);
|
|
491
|
+
|
|
492
|
+
if (dimensionName === 'thinking') {
|
|
493
|
+
const de = metrics.decomposition.details.tokenEvidence;
|
|
494
|
+
if (de && de.avgTokensPerExchangeSingleShot && de.avgTokensPerExchangeMultiStep) {
|
|
495
|
+
const ssCost = de.avgTokensPerExchangeSingleShot * pricing;
|
|
496
|
+
const msCost = de.avgTokensPerExchangeMultiStep * pricing;
|
|
497
|
+
if (ssCost > msCost * 1.1) {
|
|
498
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('single-shot')} ${orange(dollarFormat(ssCost) + '/exchange')} ${dim('vs multi-step')} ${green(dollarFormat(msCost) + '/exchange')}`);
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
if (dimensionName === 'debugging') {
|
|
504
|
+
const dbe = metrics.debugCycles.details.tokenEvidence;
|
|
505
|
+
if (dbe && dbe.avgTokensQuickFix && dbe.avgTokensLongLoop) {
|
|
506
|
+
const qfCost = dbe.avgTokensQuickFix * pricing;
|
|
507
|
+
const llCost = dbe.avgTokensLongLoop * pricing;
|
|
508
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('quick fix')} ${green(dollarFormat(qfCost))} ${dim('vs debug spiral')} ${orange(dollarFormat(llCost))}`);
|
|
509
|
+
} else if (dbe && dbe.avgTokensSpecificDebug) {
|
|
510
|
+
const spCost = dbe.avgTokensSpecificDebug * pricing;
|
|
511
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('avg debug cycle')} ${dim(dollarFormat(spCost))}`);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
if (dimensionName === 'ai leverage') {
|
|
516
|
+
const aie = metrics.aiLeverage.details.tokenEvidence;
|
|
517
|
+
if (aie && aie.avgTokensArchitectural && aie.avgTokensBoilerplate) {
|
|
518
|
+
const archCost = aie.avgTokensArchitectural * pricing;
|
|
519
|
+
const boilCost = aie.avgTokensBoilerplate * pricing;
|
|
520
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('architectural prompt')} ${dim(dollarFormat(archCost))} ${dim('vs boilerplate')} ${dim(dollarFormat(boilCost))}`);
|
|
521
|
+
} else if (aie && aie.avgTokensTrivialPrompt && aie.avgTokensComplexPrompt) {
|
|
522
|
+
const trivCost = aie.avgTokensTrivialPrompt * pricing;
|
|
523
|
+
const compCost = aie.avgTokensComplexPrompt * pricing;
|
|
524
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('trivial prompt')} ${dim(dollarFormat(trivCost))} ${dim('vs detailed')} ${dim(dollarFormat(compCost))}`);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
if (dimensionName === 'workflow') {
|
|
529
|
+
const sse = metrics.sessionStructure.details.tokenEvidence;
|
|
530
|
+
if (sse && sse.avgTokensPerExchangeMarathon && sse.avgTokensPerExchangeFocused) {
|
|
531
|
+
const marCost = sse.avgTokensPerExchangeMarathon * pricing;
|
|
532
|
+
const focCost = sse.avgTokensPerExchangeFocused * pricing;
|
|
533
|
+
if (marCost > focCost * 1.1) {
|
|
534
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('marathon')} ${orange(dollarFormat(marCost) + '/exchange')} ${dim('vs focused')} ${green(dollarFormat(focCost) + '/exchange')}`);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
function displayTokenEvidence(metrics, tokenEfficiency) {
|
|
477
541
|
const evidenceLines = [];
|
|
542
|
+
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
543
|
+
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
478
544
|
|
|
479
545
|
// Decomposition: single-shot vs multi-step cost
|
|
480
546
|
const de = metrics.decomposition.details.tokenEvidence;
|
|
481
547
|
if (de && de.avgTokensPerExchangeSingleShot && de.avgTokensPerExchangeMultiStep) {
|
|
482
548
|
const ratio = (de.avgTokensPerExchangeSingleShot / de.avgTokensPerExchangeMultiStep).toFixed(1);
|
|
483
549
|
if (parseFloat(ratio) > 1.2) {
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
550
|
+
if (pricing > 0) {
|
|
551
|
+
const ssCost = dollarFormat(de.avgTokensPerExchangeSingleShot * pricing);
|
|
552
|
+
const msCost = dollarFormat(de.avgTokensPerExchangeMultiStep * pricing);
|
|
553
|
+
evidenceLines.push(
|
|
554
|
+
` ${dim('\u2022 Single-shot prompts cost')} ${orange(ssCost + '/exchange')} ${dim('vs')} ${green(msCost)} ${dim('in multi-step sessions')} ${dim('(' + ratio + 'x)')}`
|
|
555
|
+
);
|
|
556
|
+
} else {
|
|
557
|
+
evidenceLines.push(
|
|
558
|
+
` ${dim('\u2022 Single-shot prompts cost')} ${orange(ratio + 'x')} ${dim('more tokens per exchange than multi-step sessions')}`
|
|
559
|
+
);
|
|
560
|
+
}
|
|
487
561
|
}
|
|
488
562
|
}
|
|
489
563
|
|
|
@@ -492,32 +566,62 @@ function displayTokenEvidence(metrics) {
|
|
|
492
566
|
if (dbe && dbe.avgTokensVagueDebug && dbe.avgTokensSpecificDebug) {
|
|
493
567
|
const ratio = (dbe.avgTokensVagueDebug / dbe.avgTokensSpecificDebug).toFixed(1);
|
|
494
568
|
if (parseFloat(ratio) > 1.2) {
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
569
|
+
if (pricing > 0) {
|
|
570
|
+
const vCost = dollarFormat(dbe.avgTokensVagueDebug * pricing);
|
|
571
|
+
const sCost = dollarFormat(dbe.avgTokensSpecificDebug * pricing);
|
|
572
|
+
evidenceLines.push(
|
|
573
|
+
` ${dim('\u2022 Vague debug prompts cost')} ${orange(vCost)} ${dim('vs')} ${green(sCost)} ${dim('for specific reports')} ${dim('(' + ratio + 'x)')}`
|
|
574
|
+
);
|
|
575
|
+
} else {
|
|
576
|
+
evidenceLines.push(
|
|
577
|
+
` ${dim('\u2022 Vague debug prompts cost')} ${orange(ratio + 'x')} ${dim('more than specific error reports')}`
|
|
578
|
+
);
|
|
579
|
+
}
|
|
498
580
|
}
|
|
499
581
|
}
|
|
500
582
|
|
|
501
583
|
// AI Leverage: trivial prompts vs detailed ones
|
|
502
584
|
const aie = metrics.aiLeverage.details.tokenEvidence;
|
|
503
585
|
if (aie && aie.avgTokensTrivialPrompt && aie.avgTokensComplexPrompt) {
|
|
504
|
-
// Trivial prompts often cost nearly as much because Claude re-reads everything anyway
|
|
505
586
|
const savingsPct = Math.round((1 - aie.avgTokensTrivialPrompt / aie.avgTokensComplexPrompt) * 100);
|
|
506
|
-
if (
|
|
587
|
+
if (pricing > 0) {
|
|
588
|
+
const trivCost = dollarFormat(aie.avgTokensTrivialPrompt * pricing);
|
|
589
|
+
const compCost = dollarFormat(aie.avgTokensComplexPrompt * pricing);
|
|
590
|
+
evidenceLines.push(
|
|
591
|
+
` ${dim('\u2022 Short vague prompts (<50 chars) cost')} ${dim(trivCost)} ${dim('— only ' + savingsPct + '% less than detailed ones at')} ${dim(compCost)}`
|
|
592
|
+
);
|
|
593
|
+
} else if (savingsPct < 40) {
|
|
507
594
|
evidenceLines.push(
|
|
508
595
|
` ${dim('\u2022 Short vague prompts (<50 chars) cost')} ${dim(numberFormat(aie.avgTokensTrivialPrompt) + ' tokens')} ${dim('— only ' + savingsPct + '% less than detailed ones')}`
|
|
509
596
|
);
|
|
510
597
|
}
|
|
511
598
|
}
|
|
512
599
|
|
|
600
|
+
// AI Leverage: architectural vs boilerplate cost
|
|
601
|
+
if (aie && aie.avgTokensArchitectural && aie.avgTokensBoilerplate && pricing > 0) {
|
|
602
|
+
const archCost = dollarFormat(aie.avgTokensArchitectural * pricing);
|
|
603
|
+
const boilCost = dollarFormat(aie.avgTokensBoilerplate * pricing);
|
|
604
|
+
evidenceLines.push(
|
|
605
|
+
` ${dim('\u2022 Architectural prompts cost')} ${dim(archCost + '/exchange')} ${dim('vs boilerplate at')} ${dim(boilCost + '/exchange')}`
|
|
606
|
+
);
|
|
607
|
+
}
|
|
608
|
+
|
|
513
609
|
// Session structure: marathon vs focused cost
|
|
514
610
|
const sse = metrics.sessionStructure.details.tokenEvidence;
|
|
515
611
|
if (sse && sse.avgTokensPerExchangeMarathon && sse.avgTokensPerExchangeFocused) {
|
|
516
612
|
const ratio = (sse.avgTokensPerExchangeMarathon / sse.avgTokensPerExchangeFocused).toFixed(1);
|
|
517
613
|
if (parseFloat(ratio) > 1.1) {
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
614
|
+
if (pricing > 0) {
|
|
615
|
+
const marCost = dollarFormat(sse.avgTokensPerExchangeMarathon * pricing);
|
|
616
|
+
const focCost = dollarFormat(sse.avgTokensPerExchangeFocused * pricing);
|
|
617
|
+
evidenceLines.push(
|
|
618
|
+
` ${dim('\u2022 Marathon sessions (>60m) cost')} ${orange(marCost + '/exchange')} ${dim('vs focused at')} ${green(focCost)} ${dim('(' + ratio + 'x)')}`
|
|
619
|
+
);
|
|
620
|
+
} else {
|
|
621
|
+
evidenceLines.push(
|
|
622
|
+
` ${dim('\u2022 Marathon sessions (>60m) cost')} ${orange(ratio + 'x')} ${dim('more per exchange than focused ones (10-45m)')}`
|
|
623
|
+
);
|
|
624
|
+
}
|
|
521
625
|
}
|
|
522
626
|
}
|
|
523
627
|
|
|
@@ -525,9 +629,32 @@ function displayTokenEvidence(metrics) {
|
|
|
525
629
|
if (sse && sse.avgTokensPerExchangeNoContext && sse.avgTokensPerExchangeWithContext) {
|
|
526
630
|
const ratio = (sse.avgTokensPerExchangeNoContext / sse.avgTokensPerExchangeWithContext).toFixed(1);
|
|
527
631
|
if (parseFloat(ratio) > 1.1) {
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
632
|
+
if (pricing > 0) {
|
|
633
|
+
const noCost = dollarFormat(sse.avgTokensPerExchangeNoContext * pricing);
|
|
634
|
+
const wCost = dollarFormat(sse.avgTokensPerExchangeWithContext * pricing);
|
|
635
|
+
evidenceLines.push(
|
|
636
|
+
` ${dim('\u2022 Sessions without upfront context cost')} ${orange(noCost + '/exchange')} ${dim('vs')} ${green(wCost)} ${dim('with context')} ${dim('(' + ratio + 'x)')}`
|
|
637
|
+
);
|
|
638
|
+
} else {
|
|
639
|
+
evidenceLines.push(
|
|
640
|
+
` ${dim('\u2022 Sessions without upfront context cost')} ${orange(ratio + 'x')} ${dim('more per exchange')}`
|
|
641
|
+
);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Session length cost curve from token efficiency
|
|
647
|
+
if (tokenEfficiency && tokenEfficiency.hasData && tokenEfficiency.sessionLengthAnalysis) {
|
|
648
|
+
const sla = tokenEfficiency.sessionLengthAnalysis;
|
|
649
|
+
if (sla.length >= 2 && pricing > 0) {
|
|
650
|
+
const sorted = [...sla].sort((a, b) => a.avgTokensPerExchange - b.avgTokensPerExchange);
|
|
651
|
+
const cheapest = sorted[0];
|
|
652
|
+
const costliest = sorted[sorted.length - 1];
|
|
653
|
+
if (costliest.avgTokensPerExchange > cheapest.avgTokensPerExchange * 1.5) {
|
|
654
|
+
evidenceLines.push(
|
|
655
|
+
` ${dim('\u2022 Cost curve:')} ${dim(cheapest.label)} ${green(dollarFormat(cheapest.avgTokensPerExchange * pricing) + '/exchange')} ${dim('\u2192')} ${dim(costliest.label)} ${orange(dollarFormat(costliest.avgTokensPerExchange * pricing) + '/exchange')}`
|
|
656
|
+
);
|
|
657
|
+
}
|
|
531
658
|
}
|
|
532
659
|
}
|
|
533
660
|
|
|
@@ -572,10 +699,18 @@ function displayCrossPlatform(perToolScores) {
|
|
|
572
699
|
// PROJECT ANALYSIS
|
|
573
700
|
// ══════════════════════════════════════════════
|
|
574
701
|
|
|
575
|
-
function displayProjects(insights) {
|
|
702
|
+
function displayProjects(insights, tokenEfficiency = null) {
|
|
576
703
|
const projects = insights?.projectComplexity;
|
|
577
704
|
if (!projects || projects.length === 0) return;
|
|
578
705
|
|
|
706
|
+
// Build a lookup from token efficiency per-project data
|
|
707
|
+
const projectCostMap = {};
|
|
708
|
+
if (tokenEfficiency && tokenEfficiency.hasData && tokenEfficiency.perProject) {
|
|
709
|
+
for (const pp of tokenEfficiency.perProject) {
|
|
710
|
+
projectCostMap[pp.fullName] = pp;
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
|
|
579
714
|
console.log(dim(' PROJECT ANALYSIS\n'));
|
|
580
715
|
for (const p of projects) {
|
|
581
716
|
const compColor = p.complexity === 'HIGH' ? green : p.complexity === 'MEDIUM' ? yellow : dim;
|
|
@@ -584,7 +719,17 @@ function displayProjects(insights) {
|
|
|
584
719
|
if (signals) {
|
|
585
720
|
console.log(` ${dim(signals)}`);
|
|
586
721
|
}
|
|
587
|
-
|
|
722
|
+
// Look up cost data — match by full project name or short name
|
|
723
|
+
const costData = Object.values(projectCostMap).find(pp =>
|
|
724
|
+
p.name === pp.name || pp.fullName?.endsWith(p.name.replace('...', ''))
|
|
725
|
+
);
|
|
726
|
+
if (costData && costData.exchanges > 0) {
|
|
727
|
+
const costPerExchange = costData.estimatedCost / costData.exchanges;
|
|
728
|
+
const totalCost = costData.estimatedCost;
|
|
729
|
+
console.log(` ${dim(`${p.sessions} sessions \u00B7 ${numberFormat(p.exchanges)} exchanges \u00B7 ${p.daysActive} days active`)} ${dim('|')} ${dim('$' + totalCost.toFixed(0) + ' total \u00B7 $' + costPerExchange.toFixed(2) + '/exchange')}`);
|
|
730
|
+
} else {
|
|
731
|
+
console.log(` ${dim(`${p.sessions} sessions \u00B7 ${numberFormat(p.exchanges)} exchanges \u00B7 ${p.daysActive} days active`)}`);
|
|
732
|
+
}
|
|
588
733
|
console.log();
|
|
589
734
|
}
|
|
590
735
|
}
|
|
@@ -593,7 +738,7 @@ function displayProjects(insights) {
|
|
|
593
738
|
// DIMENSION NARRATIVES — Clinical, benchmarked
|
|
594
739
|
// ══════════════════════════════════════════════
|
|
595
740
|
|
|
596
|
-
export function displayNarratives(metrics, prose) {
|
|
741
|
+
export function displayNarratives(metrics, prose, tokenEfficiency = null) {
|
|
597
742
|
const shownSnippets = new Set();
|
|
598
743
|
function showLabeledSnippet(label, prompt) {
|
|
599
744
|
if (!prompt) return;
|
|
@@ -617,17 +762,19 @@ export function displayNarratives(metrics, prose) {
|
|
|
617
762
|
for (const line of lines) {
|
|
618
763
|
console.log(` ${dim(line.trim())}`);
|
|
619
764
|
}
|
|
765
|
+
// Add token cost line for this dimension
|
|
620
766
|
const titleLower = section.title.toLowerCase();
|
|
767
|
+
displayDimensionCostLine(titleLower, metrics, tokenEfficiency);
|
|
621
768
|
const matched = sectionSnippetMap[titleLower];
|
|
622
769
|
if (matched && matched.prompt) showLabeledSnippet(matched.label, matched.prompt);
|
|
623
770
|
console.log();
|
|
624
771
|
}
|
|
625
772
|
} else {
|
|
626
|
-
displayDataNarratives(metrics, shownSnippets);
|
|
773
|
+
displayDataNarratives(metrics, shownSnippets, tokenEfficiency);
|
|
627
774
|
}
|
|
628
775
|
}
|
|
629
776
|
|
|
630
|
-
function displayDataNarratives(metrics, shownSnippets) {
|
|
777
|
+
function displayDataNarratives(metrics, shownSnippets, tokenEfficiency = null) {
|
|
631
778
|
const d = metrics.decomposition.details;
|
|
632
779
|
const db = metrics.debugCycles.details;
|
|
633
780
|
const ai = metrics.aiLeverage.details;
|
|
@@ -646,6 +793,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
646
793
|
console.log(` ${dim(`Avg session depth: ${d.avgExchangesPerSession} exchanges (benchmark: ${BENCHMARKS.avgExchangesPerSession})`)}`);
|
|
647
794
|
console.log(` ${dim(`Avg prompt length: ${numberFormat(d.avgPromptLength)} chars (benchmark: ${BENCHMARKS.avgPromptLength})`)}`);
|
|
648
795
|
console.log(` ${dim(`Multi-step ratio: ${d.multiStepSessions}/${d.totalSessions} sessions (${Math.round(d.multiStepSessions / Math.max(1, d.totalSessions) * 100)}%)`)}`);
|
|
796
|
+
displayDimensionCostLine('thinking', metrics, tokenEfficiency);
|
|
649
797
|
showSnippet('Evidence', pickExample(metrics.decomposition.examples, 'decomposition'));
|
|
650
798
|
console.log();
|
|
651
799
|
|
|
@@ -654,6 +802,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
654
802
|
console.log(` ${dim(`Avg resolution: ${db.avgTurnsToResolve} turns (benchmark: ${BENCHMARKS.avgTurnsToResolve})`)}`);
|
|
655
803
|
console.log(` ${dim(`Specific reports: ${db.specificReportRatio}% (benchmark: ${BENCHMARKS.specificReportRatio}%)`)}`);
|
|
656
804
|
console.log(` ${dim(`Extended loops: ${db.longLoops} | Quick fixes: ${db.quickFixes}/${db.totalDebugSequences}`)}`);
|
|
805
|
+
displayDimensionCostLine('debugging', metrics, tokenEfficiency);
|
|
657
806
|
showSnippet('Evidence', pickExample(metrics.debugCycles.examples, 'specific_report') || pickExample(metrics.debugCycles.examples, 'quick_fix'));
|
|
658
807
|
console.log();
|
|
659
808
|
|
|
@@ -662,6 +811,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
662
811
|
console.log(` ${dim(`High-level ratio: ${ai.highLevelRatio}% (benchmark: ${BENCHMARKS.highLevelRatio}%)`)}`);
|
|
663
812
|
console.log(` ${dim(`Breakdown: ${ai.architecturalPrompts} architectural \u00B7 ${ai.planningPrompts} planning \u00B7 ${ai.exploratoryPrompts} exploratory`)}`);
|
|
664
813
|
console.log(` ${dim(`Boilerplate: ${ai.boilerplatePrompts} (${Math.round(ai.boilerplatePrompts / Math.max(1, ai.totalPrompts) * 100)}%) \u00B7 Testing: ${ai.testingPrompts}`)}`);
|
|
814
|
+
displayDimensionCostLine('ai leverage', metrics, tokenEfficiency);
|
|
665
815
|
showSnippet('Evidence', pickExample(metrics.aiLeverage.examples, 'architectural') || pickExample(metrics.aiLeverage.examples, 'planning'));
|
|
666
816
|
console.log();
|
|
667
817
|
|
|
@@ -670,6 +820,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
670
820
|
console.log(` ${dim(`Context-setting: ${ss.contextSetRatio}% (benchmark: ${BENCHMARKS.contextSetRatio}%)`)}`);
|
|
671
821
|
console.log(` ${dim(`Review at end: ${ss.reviewEndRatio}% (benchmark: ${BENCHMARKS.reviewEndRatio}%)`)}`);
|
|
672
822
|
console.log(` ${dim(`Refinement rate: ${ss.refinementRatio}% (benchmark: ${BENCHMARKS.refinementRatio}%)`)}`);
|
|
823
|
+
displayDimensionCostLine('workflow', metrics, tokenEfficiency);
|
|
673
824
|
showSnippet('Evidence', pickExample(metrics.sessionStructure.examples, 'context_setting') || pickExample(metrics.sessionStructure.examples, 'refinement'));
|
|
674
825
|
console.log();
|
|
675
826
|
}
|
|
@@ -838,7 +989,7 @@ function displayNextSteps(result) {
|
|
|
838
989
|
// VERBOSE: DETAILED BREAKDOWN
|
|
839
990
|
// ══════════════════════════════════════════════
|
|
840
991
|
|
|
841
|
-
export function displayVerbose(metrics, sessions) {
|
|
992
|
+
export function displayVerbose(metrics, sessions, tokenEfficiency = null) {
|
|
842
993
|
console.log(doubleRule());
|
|
843
994
|
console.log(dim('\n DETAILED BREAKDOWN\n'));
|
|
844
995
|
|
|
@@ -894,6 +1045,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
894
1045
|
console.log(` ${dim(pad('Multi-step sessions', 28))} ${white(d.multiStepSessions + '/' + d.totalSessions)} ${dim('(' + Math.round(d.multiStepSessions / Math.max(1, d.totalSessions) * 100) + '%)')}`);
|
|
895
1046
|
console.log(` ${dim(pad('Single-shot sessions', 28))} ${white(String(d.singleShotSessions))}`);
|
|
896
1047
|
console.log(` ${dim(pad('Contextual followups', 28))} ${white(d.contextualFollowupRatio + '%')}${d.contextualFollowupRatio > 20 ? cyan(' builds on context well') : orange(' could reference prior work more')}`);
|
|
1048
|
+
displayDimensionCostLine('thinking', metrics, tokenEfficiency);
|
|
897
1049
|
console.log();
|
|
898
1050
|
|
|
899
1051
|
// ── Debugging ──
|
|
@@ -906,6 +1058,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
906
1058
|
console.log(` ${dim(pad('Extended loops (>5 turns)', 28))} ${db.longLoops === 0 ? green('0 -- zero spirals') : orange(String(db.longLoops))}`);
|
|
907
1059
|
metricRow('Specific report ratio', db.specificReportRatio, BENCHMARKS.specificReportRatio, '%');
|
|
908
1060
|
console.log(` ${dim(pad('Vague reports', 28))} ${db.vagueReports === 0 ? green('0 -- never vague') : orange(String(db.vagueReports))}`);
|
|
1061
|
+
displayDimensionCostLine('debugging', metrics, tokenEfficiency);
|
|
909
1062
|
console.log();
|
|
910
1063
|
|
|
911
1064
|
// ── AI Leverage ──
|
|
@@ -923,6 +1076,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
923
1076
|
console.log(` ${dim(pad(' Testing', 28))} ${white(String(ai.testingPrompts))} ${dim('(' + Math.round(ai.testingPrompts / total * 100) + '%)')}`);
|
|
924
1077
|
console.log();
|
|
925
1078
|
metricRow('High-level ratio', ai.highLevelRatio, BENCHMARKS.highLevelRatio, '%');
|
|
1079
|
+
displayDimensionCostLine('ai leverage', metrics, tokenEfficiency);
|
|
926
1080
|
console.log();
|
|
927
1081
|
|
|
928
1082
|
// ── Session Structure ──
|
|
@@ -943,6 +1097,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
943
1097
|
console.log(` ${dim(pad(' Medium (5-60m)', 28))} ${white(String(dur.medium))}`);
|
|
944
1098
|
console.log(` ${dim(pad(' Long (>60m)', 28))} ${white(String(dur.long))}`);
|
|
945
1099
|
}
|
|
1100
|
+
displayDimensionCostLine('workflow', metrics, tokenEfficiency);
|
|
946
1101
|
console.log();
|
|
947
1102
|
}
|
|
948
1103
|
|
|
@@ -957,8 +1112,8 @@ export function displayOffline(result, metrics, extra = {}) {
|
|
|
957
1112
|
displayDimensions(result);
|
|
958
1113
|
displayTokenEfficiency(extra.tokenEfficiency, metrics);
|
|
959
1114
|
displayCrossPlatform(extra.perToolScores);
|
|
960
|
-
displayDataNarratives(metrics, new Set());
|
|
961
|
-
displayProjects(extra.insights);
|
|
1115
|
+
displayDataNarratives(metrics, new Set(), extra.tokenEfficiency);
|
|
1116
|
+
displayProjects(extra.insights, extra.tokenEfficiency);
|
|
962
1117
|
displaySignatures(extra.insights);
|
|
963
1118
|
displayWatchPoints(extra.insights);
|
|
964
1119
|
displayTrajectory(extra.insights);
|
|
@@ -980,8 +1135,8 @@ export function displayFull(result, metrics, prose, extra = {}) {
|
|
|
980
1135
|
displayDimensions(result);
|
|
981
1136
|
displayTokenEfficiency(extra.tokenEfficiency, metrics);
|
|
982
1137
|
displayCrossPlatform(extra.perToolScores);
|
|
983
|
-
displayNarratives(metrics, prose);
|
|
984
|
-
displayProjects(extra.insights);
|
|
1138
|
+
displayNarratives(metrics, prose, extra.tokenEfficiency);
|
|
1139
|
+
displayProjects(extra.insights, extra.tokenEfficiency);
|
|
985
1140
|
displaySignatures(extra.insights);
|
|
986
1141
|
displayWatchPoints(extra.insights);
|
|
987
1142
|
displayTrajectory(extra.insights);
|
package/src/index.js
CHANGED
|
@@ -213,13 +213,13 @@ export async function run(options = {}) {
|
|
|
213
213
|
|
|
214
214
|
// ── Step 6: Verbose prompt (interactive) ──
|
|
215
215
|
if (options.verbose) {
|
|
216
|
-
displayVerbose(metrics, allSessions);
|
|
216
|
+
displayVerbose(metrics, allSessions, tokenEfficiency);
|
|
217
217
|
} else {
|
|
218
218
|
try {
|
|
219
219
|
const wantsVerbose = await askVerbose();
|
|
220
220
|
if (wantsVerbose) {
|
|
221
221
|
console.log();
|
|
222
|
-
displayVerbose(metrics, allSessions);
|
|
222
|
+
displayVerbose(metrics, allSessions, tokenEfficiency);
|
|
223
223
|
} else {
|
|
224
224
|
console.log();
|
|
225
225
|
}
|
package/src/insights.js
CHANGED
|
@@ -250,9 +250,21 @@ export function computeWatchPoints(allSessions, metrics, tokenEfficiency = null)
|
|
|
250
250
|
}
|
|
251
251
|
const totalFollowupSessions = Object.values(projectSessions).reduce((sum, s) => sum + Math.max(0, s.length - 1), 0);
|
|
252
252
|
if (totalFollowupSessions > 3 && contextRestarts / totalFollowupSessions > 0.5) {
|
|
253
|
+
// Estimate cost of context restarts — each restart forces full context re-read
|
|
254
|
+
let costNote = '';
|
|
255
|
+
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
256
|
+
const avgCostPerSession = tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.sessionsAnalyzed);
|
|
257
|
+
// The re-read cost of each restart scales with context size — estimate as % of avg session cost
|
|
258
|
+
const rereadPct = tokenEfficiency.contextRereadRatio || 0.94;
|
|
259
|
+
const wastedPerRestart = avgCostPerSession * rereadPct * 0.3; // ~30% of re-read is avoidable with context continuity
|
|
260
|
+
const totalWasted = wastedPerRestart * contextRestarts;
|
|
261
|
+
if (totalWasted > 1) {
|
|
262
|
+
costNote = ` Estimated wasted re-read cost: ~$${totalWasted.toFixed(0)}.`;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
253
265
|
watchPoints.push({
|
|
254
266
|
name: 'Context amnesia',
|
|
255
|
-
detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient
|
|
267
|
+
detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient.${costNote}`,
|
|
256
268
|
evidence: bestContextRestartPrompt,
|
|
257
269
|
});
|
|
258
270
|
}
|
|
@@ -290,27 +302,58 @@ export function computeWatchPoints(allSessions, metrics, tokenEfficiency = null)
|
|
|
290
302
|
}
|
|
291
303
|
}
|
|
292
304
|
}
|
|
305
|
+
// Dollar annotation: long prompts trigger large context re-reads each time
|
|
306
|
+
let monologueCostNote = '';
|
|
307
|
+
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
308
|
+
// Compare avg tokens for long vs short prompts from the prompt length analysis
|
|
309
|
+
const longBucket = tokenEfficiency.promptLengthAnalysis.find(b => b.label === '500+ chars');
|
|
310
|
+
const shortBucket = tokenEfficiency.promptLengthAnalysis.find(b => b.label === '20-100 chars');
|
|
311
|
+
if (longBucket && shortBucket && longBucket.avgCost && shortBucket.avgCost) {
|
|
312
|
+
const ratio = (longBucket.avgCost / shortBucket.avgCost).toFixed(1);
|
|
313
|
+
monologueCostNote = ` Long prompts cost ${ratio}x more per exchange ($${longBucket.avgCost.toFixed(2)} vs $${shortBucket.avgCost.toFixed(2)}).`;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
293
316
|
watchPoints.push({
|
|
294
317
|
name: 'Monologue prompting',
|
|
295
|
-
detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output
|
|
318
|
+
detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output.${monologueCostNote}`,
|
|
296
319
|
evidence: bestLongPrompt,
|
|
297
320
|
});
|
|
298
321
|
}
|
|
299
322
|
|
|
300
323
|
// Low context-setting
|
|
301
324
|
if (ss.contextSetRatio < 20) {
|
|
325
|
+
let contextCostNote = '';
|
|
326
|
+
if (ss.tokenEvidence?.avgTokensPerExchangeWithContext && ss.tokenEvidence?.avgTokensPerExchangeNoContext) {
|
|
327
|
+
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
328
|
+
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
329
|
+
if (pricing > 0) {
|
|
330
|
+
const withCtx = ss.tokenEvidence.avgTokensPerExchangeWithContext * pricing;
|
|
331
|
+
const noCtx = ss.tokenEvidence.avgTokensPerExchangeNoContext * pricing;
|
|
332
|
+
if (noCtx > withCtx * 1.1) {
|
|
333
|
+
contextCostNote = ` Sessions without context cost $${noCtx.toFixed(2)}/exchange vs $${withCtx.toFixed(2)} with context.`;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
302
337
|
watchPoints.push({
|
|
303
338
|
name: 'Missing context',
|
|
304
|
-
detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections
|
|
339
|
+
detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections.${contextCostNote}`,
|
|
305
340
|
evidence: null,
|
|
306
341
|
});
|
|
307
342
|
}
|
|
308
343
|
|
|
309
344
|
// Extended debug spirals
|
|
310
345
|
if (db.longLoops > 2) {
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
346
|
+
let loopCostStr = '';
|
|
347
|
+
if (db.tokenEvidence?.avgTokensLongLoop) {
|
|
348
|
+
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
349
|
+
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
350
|
+
if (pricing > 0) {
|
|
351
|
+
const costPerLoop = db.tokenEvidence.avgTokensLongLoop * pricing;
|
|
352
|
+
loopCostStr = ` Each spiral costs ~$${costPerLoop.toFixed(2)}.`;
|
|
353
|
+
} else {
|
|
354
|
+
loopCostStr = ` Each spiral averages ${numberFormatInsight(db.tokenEvidence.avgTokensLongLoop)} tokens.`;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
314
357
|
watchPoints.push({
|
|
315
358
|
name: 'Debug spirals',
|
|
316
359
|
detail: `${db.longLoops} extended debug loops (>5 turns) detected.${loopCostStr} When stuck, try providing more specific error context or breaking the problem differently.`,
|