chekk 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/chekk.js +1 -1
- package/package.json +1 -1
- package/src/display.js +187 -29
- package/src/index.js +2 -2
- package/src/insights.js +49 -6
package/bin/chekk.js
CHANGED
|
@@ -4,7 +4,7 @@ import { execSync, spawn } from 'child_process';
|
|
|
4
4
|
import { Command } from 'commander';
|
|
5
5
|
import { run } from '../src/index.js';
|
|
6
6
|
|
|
7
|
-
const LOCAL_VERSION = '0.5.
|
|
7
|
+
const LOCAL_VERSION = '0.5.2';
|
|
8
8
|
|
|
9
9
|
// ── Auto-update check ──
|
|
10
10
|
// If running from a cached npx install, check if there's a newer version
|
package/package.json
CHANGED
package/src/display.js
CHANGED
|
@@ -173,7 +173,7 @@ export function displayHeader() {
|
|
|
173
173
|
console.log();
|
|
174
174
|
const lines = [
|
|
175
175
|
'',
|
|
176
|
-
` ${bold.white('chekk')}${dim(' v0.5.
|
|
176
|
+
` ${bold.white('chekk')}${dim(' v0.5.2')}`,
|
|
177
177
|
` ${dim('prompt engineering capability profile')}`,
|
|
178
178
|
'',
|
|
179
179
|
];
|
|
@@ -213,7 +213,10 @@ export async function displayProgressBar(durationMs = 2000) {
|
|
|
213
213
|
console.log();
|
|
214
214
|
console.log();
|
|
215
215
|
console.log(` ${green('\u2713')} ${dim('Profile generated.')}`);
|
|
216
|
-
|
|
216
|
+
// Brief pause so user sees "Profile generated" before clearing
|
|
217
|
+
await sleep(600);
|
|
218
|
+
// Clear screen so results start at the top
|
|
219
|
+
process.stdout.write('\x1b[2J\x1b[H');
|
|
217
220
|
}
|
|
218
221
|
|
|
219
222
|
// ══════════════════════════════════════════════
|
|
@@ -231,7 +234,7 @@ function displayProfileHeader(result, extra = {}) {
|
|
|
231
234
|
console.log(` ${bold.white('PROMPT ENGINEERING CAPABILITY PROFILE')}`);
|
|
232
235
|
console.log();
|
|
233
236
|
if (sessionStats) {
|
|
234
|
-
console.log(` ${dim(`Generated ${dateStr} | chekk v0.5.
|
|
237
|
+
console.log(` ${dim(`Generated ${dateStr} | chekk v0.5.2`)}`);
|
|
235
238
|
console.log(` ${dim(`Analysis: ${sessionStats.totalSessions} sessions \u00B7 ${sessionStats.tools.length} tool${sessionStats.tools.length > 1 ? 's' : ''} \u00B7 ${numberFormat(sessionStats.totalExchanges)} exchanges`)}`);
|
|
236
239
|
if (sessionStats.dateRangeShort) {
|
|
237
240
|
console.log(` ${dim(`Period: ${sessionStats.dateRangeShort}`)}`);
|
|
@@ -412,6 +415,7 @@ export function displayTokenEfficiency(tokenEfficiency, metrics) {
|
|
|
412
415
|
|
|
413
416
|
// Format percentages with appropriate precision
|
|
414
417
|
function fmtPct(pct) {
|
|
418
|
+
if (pct >= 99.5) return pct.toFixed(1) + '%';
|
|
415
419
|
if (pct >= 10) return Math.round(pct) + '%';
|
|
416
420
|
if (pct >= 1) return pct.toFixed(1) + '%';
|
|
417
421
|
if (pct > 0) return pct.toFixed(2) + '%';
|
|
@@ -470,20 +474,93 @@ export function displayTokenEfficiency(tokenEfficiency, metrics) {
|
|
|
470
474
|
}
|
|
471
475
|
|
|
472
476
|
// ── Token cost evidence from metrics ──
|
|
473
|
-
displayTokenEvidence(metrics);
|
|
477
|
+
displayTokenEvidence(metrics, tokenEfficiency);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// ── Per-dimension cost annotation ──
|
|
481
|
+
// Shows a single cost insight line under each scored dimension
|
|
482
|
+
|
|
483
|
+
function dollarFormat(amount) {
|
|
484
|
+
if (amount >= 1000) return '$' + (amount / 1000).toFixed(1).replace(/\.0$/, '') + 'k';
|
|
485
|
+
if (amount >= 100) return '$' + Math.round(amount);
|
|
486
|
+
if (amount >= 1) return '$' + amount.toFixed(2);
|
|
487
|
+
return '$' + amount.toFixed(2);
|
|
474
488
|
}
|
|
475
489
|
|
|
476
|
-
function
|
|
490
|
+
function displayDimensionCostLine(dimensionName, metrics, tokenEfficiency) {
|
|
491
|
+
if (!tokenEfficiency || !tokenEfficiency.hasData) return;
|
|
492
|
+
|
|
493
|
+
const pricing = tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal);
|
|
494
|
+
|
|
495
|
+
if (dimensionName === 'thinking') {
|
|
496
|
+
const de = metrics.decomposition.details.tokenEvidence;
|
|
497
|
+
if (de && de.avgTokensPerExchangeSingleShot && de.avgTokensPerExchangeMultiStep) {
|
|
498
|
+
const ssCost = de.avgTokensPerExchangeSingleShot * pricing;
|
|
499
|
+
const msCost = de.avgTokensPerExchangeMultiStep * pricing;
|
|
500
|
+
if (ssCost > msCost * 1.1) {
|
|
501
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('single-shot')} ${orange(dollarFormat(ssCost) + '/exchange')} ${dim('vs multi-step')} ${green(dollarFormat(msCost) + '/exchange')}`);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
if (dimensionName === 'debugging') {
|
|
507
|
+
const dbe = metrics.debugCycles.details.tokenEvidence;
|
|
508
|
+
if (dbe && dbe.avgTokensQuickFix && dbe.avgTokensLongLoop) {
|
|
509
|
+
const qfCost = dbe.avgTokensQuickFix * pricing;
|
|
510
|
+
const llCost = dbe.avgTokensLongLoop * pricing;
|
|
511
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('quick fix')} ${green(dollarFormat(qfCost))} ${dim('vs debug spiral')} ${orange(dollarFormat(llCost))}`);
|
|
512
|
+
} else if (dbe && dbe.avgTokensSpecificDebug) {
|
|
513
|
+
const spCost = dbe.avgTokensSpecificDebug * pricing;
|
|
514
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('avg debug cycle')} ${dim(dollarFormat(spCost))}`);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
if (dimensionName === 'ai leverage') {
|
|
519
|
+
const aie = metrics.aiLeverage.details.tokenEvidence;
|
|
520
|
+
if (aie && aie.avgTokensArchitectural && aie.avgTokensBoilerplate) {
|
|
521
|
+
const archCost = aie.avgTokensArchitectural * pricing;
|
|
522
|
+
const boilCost = aie.avgTokensBoilerplate * pricing;
|
|
523
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('architectural prompt')} ${dim(dollarFormat(archCost))} ${dim('vs boilerplate')} ${dim(dollarFormat(boilCost))}`);
|
|
524
|
+
} else if (aie && aie.avgTokensTrivialPrompt && aie.avgTokensComplexPrompt) {
|
|
525
|
+
const trivCost = aie.avgTokensTrivialPrompt * pricing;
|
|
526
|
+
const compCost = aie.avgTokensComplexPrompt * pricing;
|
|
527
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('trivial prompt')} ${dim(dollarFormat(trivCost))} ${dim('vs detailed')} ${dim(dollarFormat(compCost))}`);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
if (dimensionName === 'workflow') {
|
|
532
|
+
const sse = metrics.sessionStructure.details.tokenEvidence;
|
|
533
|
+
if (sse && sse.avgTokensPerExchangeMarathon && sse.avgTokensPerExchangeFocused) {
|
|
534
|
+
const marCost = sse.avgTokensPerExchangeMarathon * pricing;
|
|
535
|
+
const focCost = sse.avgTokensPerExchangeFocused * pricing;
|
|
536
|
+
if (marCost > focCost * 1.1) {
|
|
537
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('marathon')} ${orange(dollarFormat(marCost) + '/exchange')} ${dim('vs focused')} ${green(dollarFormat(focCost) + '/exchange')}`);
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
function displayTokenEvidence(metrics, tokenEfficiency) {
|
|
477
544
|
const evidenceLines = [];
|
|
545
|
+
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
546
|
+
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
478
547
|
|
|
479
548
|
// Decomposition: single-shot vs multi-step cost
|
|
480
549
|
const de = metrics.decomposition.details.tokenEvidence;
|
|
481
550
|
if (de && de.avgTokensPerExchangeSingleShot && de.avgTokensPerExchangeMultiStep) {
|
|
482
551
|
const ratio = (de.avgTokensPerExchangeSingleShot / de.avgTokensPerExchangeMultiStep).toFixed(1);
|
|
483
552
|
if (parseFloat(ratio) > 1.2) {
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
553
|
+
if (pricing > 0) {
|
|
554
|
+
const ssCost = dollarFormat(de.avgTokensPerExchangeSingleShot * pricing);
|
|
555
|
+
const msCost = dollarFormat(de.avgTokensPerExchangeMultiStep * pricing);
|
|
556
|
+
evidenceLines.push(
|
|
557
|
+
` ${dim('\u2022 Single-shot prompts cost')} ${orange(ssCost + '/exchange')} ${dim('vs')} ${green(msCost)} ${dim('in multi-step sessions')} ${dim('(' + ratio + 'x)')}`
|
|
558
|
+
);
|
|
559
|
+
} else {
|
|
560
|
+
evidenceLines.push(
|
|
561
|
+
` ${dim('\u2022 Single-shot prompts cost')} ${orange(ratio + 'x')} ${dim('more tokens per exchange than multi-step sessions')}`
|
|
562
|
+
);
|
|
563
|
+
}
|
|
487
564
|
}
|
|
488
565
|
}
|
|
489
566
|
|
|
@@ -492,32 +569,62 @@ function displayTokenEvidence(metrics) {
|
|
|
492
569
|
if (dbe && dbe.avgTokensVagueDebug && dbe.avgTokensSpecificDebug) {
|
|
493
570
|
const ratio = (dbe.avgTokensVagueDebug / dbe.avgTokensSpecificDebug).toFixed(1);
|
|
494
571
|
if (parseFloat(ratio) > 1.2) {
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
572
|
+
if (pricing > 0) {
|
|
573
|
+
const vCost = dollarFormat(dbe.avgTokensVagueDebug * pricing);
|
|
574
|
+
const sCost = dollarFormat(dbe.avgTokensSpecificDebug * pricing);
|
|
575
|
+
evidenceLines.push(
|
|
576
|
+
` ${dim('\u2022 Vague debug prompts cost')} ${orange(vCost)} ${dim('vs')} ${green(sCost)} ${dim('for specific reports')} ${dim('(' + ratio + 'x)')}`
|
|
577
|
+
);
|
|
578
|
+
} else {
|
|
579
|
+
evidenceLines.push(
|
|
580
|
+
` ${dim('\u2022 Vague debug prompts cost')} ${orange(ratio + 'x')} ${dim('more than specific error reports')}`
|
|
581
|
+
);
|
|
582
|
+
}
|
|
498
583
|
}
|
|
499
584
|
}
|
|
500
585
|
|
|
501
586
|
// AI Leverage: trivial prompts vs detailed ones
|
|
502
587
|
const aie = metrics.aiLeverage.details.tokenEvidence;
|
|
503
588
|
if (aie && aie.avgTokensTrivialPrompt && aie.avgTokensComplexPrompt) {
|
|
504
|
-
// Trivial prompts often cost nearly as much because Claude re-reads everything anyway
|
|
505
589
|
const savingsPct = Math.round((1 - aie.avgTokensTrivialPrompt / aie.avgTokensComplexPrompt) * 100);
|
|
506
|
-
if (
|
|
590
|
+
if (pricing > 0) {
|
|
591
|
+
const trivCost = dollarFormat(aie.avgTokensTrivialPrompt * pricing);
|
|
592
|
+
const compCost = dollarFormat(aie.avgTokensComplexPrompt * pricing);
|
|
593
|
+
evidenceLines.push(
|
|
594
|
+
` ${dim('\u2022 Short vague prompts (<50 chars) cost')} ${dim(trivCost)} ${dim('— only ' + savingsPct + '% less than detailed ones at')} ${dim(compCost)}`
|
|
595
|
+
);
|
|
596
|
+
} else if (savingsPct < 40) {
|
|
507
597
|
evidenceLines.push(
|
|
508
598
|
` ${dim('\u2022 Short vague prompts (<50 chars) cost')} ${dim(numberFormat(aie.avgTokensTrivialPrompt) + ' tokens')} ${dim('— only ' + savingsPct + '% less than detailed ones')}`
|
|
509
599
|
);
|
|
510
600
|
}
|
|
511
601
|
}
|
|
512
602
|
|
|
603
|
+
// AI Leverage: architectural vs boilerplate cost
|
|
604
|
+
if (aie && aie.avgTokensArchitectural && aie.avgTokensBoilerplate && pricing > 0) {
|
|
605
|
+
const archCost = dollarFormat(aie.avgTokensArchitectural * pricing);
|
|
606
|
+
const boilCost = dollarFormat(aie.avgTokensBoilerplate * pricing);
|
|
607
|
+
evidenceLines.push(
|
|
608
|
+
` ${dim('\u2022 Architectural prompts cost')} ${dim(archCost + '/exchange')} ${dim('vs boilerplate at')} ${dim(boilCost + '/exchange')}`
|
|
609
|
+
);
|
|
610
|
+
}
|
|
611
|
+
|
|
513
612
|
// Session structure: marathon vs focused cost
|
|
514
613
|
const sse = metrics.sessionStructure.details.tokenEvidence;
|
|
515
614
|
if (sse && sse.avgTokensPerExchangeMarathon && sse.avgTokensPerExchangeFocused) {
|
|
516
615
|
const ratio = (sse.avgTokensPerExchangeMarathon / sse.avgTokensPerExchangeFocused).toFixed(1);
|
|
517
616
|
if (parseFloat(ratio) > 1.1) {
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
617
|
+
if (pricing > 0) {
|
|
618
|
+
const marCost = dollarFormat(sse.avgTokensPerExchangeMarathon * pricing);
|
|
619
|
+
const focCost = dollarFormat(sse.avgTokensPerExchangeFocused * pricing);
|
|
620
|
+
evidenceLines.push(
|
|
621
|
+
` ${dim('\u2022 Marathon sessions (>60m) cost')} ${orange(marCost + '/exchange')} ${dim('vs focused at')} ${green(focCost)} ${dim('(' + ratio + 'x)')}`
|
|
622
|
+
);
|
|
623
|
+
} else {
|
|
624
|
+
evidenceLines.push(
|
|
625
|
+
` ${dim('\u2022 Marathon sessions (>60m) cost')} ${orange(ratio + 'x')} ${dim('more per exchange than focused ones (10-45m)')}`
|
|
626
|
+
);
|
|
627
|
+
}
|
|
521
628
|
}
|
|
522
629
|
}
|
|
523
630
|
|
|
@@ -525,9 +632,32 @@ function displayTokenEvidence(metrics) {
|
|
|
525
632
|
if (sse && sse.avgTokensPerExchangeNoContext && sse.avgTokensPerExchangeWithContext) {
|
|
526
633
|
const ratio = (sse.avgTokensPerExchangeNoContext / sse.avgTokensPerExchangeWithContext).toFixed(1);
|
|
527
634
|
if (parseFloat(ratio) > 1.1) {
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
635
|
+
if (pricing > 0) {
|
|
636
|
+
const noCost = dollarFormat(sse.avgTokensPerExchangeNoContext * pricing);
|
|
637
|
+
const wCost = dollarFormat(sse.avgTokensPerExchangeWithContext * pricing);
|
|
638
|
+
evidenceLines.push(
|
|
639
|
+
` ${dim('\u2022 Sessions without upfront context cost')} ${orange(noCost + '/exchange')} ${dim('vs')} ${green(wCost)} ${dim('with context')} ${dim('(' + ratio + 'x)')}`
|
|
640
|
+
);
|
|
641
|
+
} else {
|
|
642
|
+
evidenceLines.push(
|
|
643
|
+
` ${dim('\u2022 Sessions without upfront context cost')} ${orange(ratio + 'x')} ${dim('more per exchange')}`
|
|
644
|
+
);
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
// Session length cost curve from token efficiency
|
|
650
|
+
if (tokenEfficiency && tokenEfficiency.hasData && tokenEfficiency.sessionLengthAnalysis) {
|
|
651
|
+
const sla = tokenEfficiency.sessionLengthAnalysis;
|
|
652
|
+
if (sla.length >= 2 && pricing > 0) {
|
|
653
|
+
const sorted = [...sla].sort((a, b) => a.avgTokensPerExchange - b.avgTokensPerExchange);
|
|
654
|
+
const cheapest = sorted[0];
|
|
655
|
+
const costliest = sorted[sorted.length - 1];
|
|
656
|
+
if (costliest.avgTokensPerExchange > cheapest.avgTokensPerExchange * 1.5) {
|
|
657
|
+
evidenceLines.push(
|
|
658
|
+
` ${dim('\u2022 Cost curve:')} ${dim(cheapest.label)} ${green(dollarFormat(cheapest.avgTokensPerExchange * pricing) + '/exchange')} ${dim('\u2192')} ${dim(costliest.label)} ${orange(dollarFormat(costliest.avgTokensPerExchange * pricing) + '/exchange')}`
|
|
659
|
+
);
|
|
660
|
+
}
|
|
531
661
|
}
|
|
532
662
|
}
|
|
533
663
|
|
|
@@ -572,10 +702,18 @@ function displayCrossPlatform(perToolScores) {
|
|
|
572
702
|
// PROJECT ANALYSIS
|
|
573
703
|
// ══════════════════════════════════════════════
|
|
574
704
|
|
|
575
|
-
function displayProjects(insights) {
|
|
705
|
+
function displayProjects(insights, tokenEfficiency = null) {
|
|
576
706
|
const projects = insights?.projectComplexity;
|
|
577
707
|
if (!projects || projects.length === 0) return;
|
|
578
708
|
|
|
709
|
+
// Build a lookup from token efficiency per-project data
|
|
710
|
+
const projectCostMap = {};
|
|
711
|
+
if (tokenEfficiency && tokenEfficiency.hasData && tokenEfficiency.perProject) {
|
|
712
|
+
for (const pp of tokenEfficiency.perProject) {
|
|
713
|
+
projectCostMap[pp.fullName] = pp;
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
|
|
579
717
|
console.log(dim(' PROJECT ANALYSIS\n'));
|
|
580
718
|
for (const p of projects) {
|
|
581
719
|
const compColor = p.complexity === 'HIGH' ? green : p.complexity === 'MEDIUM' ? yellow : dim;
|
|
@@ -584,7 +722,17 @@ function displayProjects(insights) {
|
|
|
584
722
|
if (signals) {
|
|
585
723
|
console.log(` ${dim(signals)}`);
|
|
586
724
|
}
|
|
587
|
-
|
|
725
|
+
// Look up cost data — match by full project name or short name
|
|
726
|
+
const costData = Object.values(projectCostMap).find(pp =>
|
|
727
|
+
p.name === pp.name || pp.fullName?.endsWith(p.name.replace('...', ''))
|
|
728
|
+
);
|
|
729
|
+
if (costData && costData.exchanges > 0) {
|
|
730
|
+
const costPerExchange = costData.estimatedCost / costData.exchanges;
|
|
731
|
+
const totalCost = costData.estimatedCost;
|
|
732
|
+
console.log(` ${dim(`${p.sessions} sessions \u00B7 ${numberFormat(p.exchanges)} exchanges \u00B7 ${p.daysActive} days active`)} ${dim('|')} ${dim('$' + totalCost.toFixed(0) + ' total \u00B7 $' + costPerExchange.toFixed(2) + '/exchange')}`);
|
|
733
|
+
} else {
|
|
734
|
+
console.log(` ${dim(`${p.sessions} sessions \u00B7 ${numberFormat(p.exchanges)} exchanges \u00B7 ${p.daysActive} days active`)}`);
|
|
735
|
+
}
|
|
588
736
|
console.log();
|
|
589
737
|
}
|
|
590
738
|
}
|
|
@@ -593,7 +741,7 @@ function displayProjects(insights) {
|
|
|
593
741
|
// DIMENSION NARRATIVES — Clinical, benchmarked
|
|
594
742
|
// ══════════════════════════════════════════════
|
|
595
743
|
|
|
596
|
-
export function displayNarratives(metrics, prose) {
|
|
744
|
+
export function displayNarratives(metrics, prose, tokenEfficiency = null) {
|
|
597
745
|
const shownSnippets = new Set();
|
|
598
746
|
function showLabeledSnippet(label, prompt) {
|
|
599
747
|
if (!prompt) return;
|
|
@@ -617,17 +765,19 @@ export function displayNarratives(metrics, prose) {
|
|
|
617
765
|
for (const line of lines) {
|
|
618
766
|
console.log(` ${dim(line.trim())}`);
|
|
619
767
|
}
|
|
768
|
+
// Add token cost line for this dimension
|
|
620
769
|
const titleLower = section.title.toLowerCase();
|
|
770
|
+
displayDimensionCostLine(titleLower, metrics, tokenEfficiency);
|
|
621
771
|
const matched = sectionSnippetMap[titleLower];
|
|
622
772
|
if (matched && matched.prompt) showLabeledSnippet(matched.label, matched.prompt);
|
|
623
773
|
console.log();
|
|
624
774
|
}
|
|
625
775
|
} else {
|
|
626
|
-
displayDataNarratives(metrics, shownSnippets);
|
|
776
|
+
displayDataNarratives(metrics, shownSnippets, tokenEfficiency);
|
|
627
777
|
}
|
|
628
778
|
}
|
|
629
779
|
|
|
630
|
-
function displayDataNarratives(metrics, shownSnippets) {
|
|
780
|
+
function displayDataNarratives(metrics, shownSnippets, tokenEfficiency = null) {
|
|
631
781
|
const d = metrics.decomposition.details;
|
|
632
782
|
const db = metrics.debugCycles.details;
|
|
633
783
|
const ai = metrics.aiLeverage.details;
|
|
@@ -646,6 +796,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
646
796
|
console.log(` ${dim(`Avg session depth: ${d.avgExchangesPerSession} exchanges (benchmark: ${BENCHMARKS.avgExchangesPerSession})`)}`);
|
|
647
797
|
console.log(` ${dim(`Avg prompt length: ${numberFormat(d.avgPromptLength)} chars (benchmark: ${BENCHMARKS.avgPromptLength})`)}`);
|
|
648
798
|
console.log(` ${dim(`Multi-step ratio: ${d.multiStepSessions}/${d.totalSessions} sessions (${Math.round(d.multiStepSessions / Math.max(1, d.totalSessions) * 100)}%)`)}`);
|
|
799
|
+
displayDimensionCostLine('thinking', metrics, tokenEfficiency);
|
|
649
800
|
showSnippet('Evidence', pickExample(metrics.decomposition.examples, 'decomposition'));
|
|
650
801
|
console.log();
|
|
651
802
|
|
|
@@ -654,6 +805,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
654
805
|
console.log(` ${dim(`Avg resolution: ${db.avgTurnsToResolve} turns (benchmark: ${BENCHMARKS.avgTurnsToResolve})`)}`);
|
|
655
806
|
console.log(` ${dim(`Specific reports: ${db.specificReportRatio}% (benchmark: ${BENCHMARKS.specificReportRatio}%)`)}`);
|
|
656
807
|
console.log(` ${dim(`Extended loops: ${db.longLoops} | Quick fixes: ${db.quickFixes}/${db.totalDebugSequences}`)}`);
|
|
808
|
+
displayDimensionCostLine('debugging', metrics, tokenEfficiency);
|
|
657
809
|
showSnippet('Evidence', pickExample(metrics.debugCycles.examples, 'specific_report') || pickExample(metrics.debugCycles.examples, 'quick_fix'));
|
|
658
810
|
console.log();
|
|
659
811
|
|
|
@@ -662,6 +814,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
662
814
|
console.log(` ${dim(`High-level ratio: ${ai.highLevelRatio}% (benchmark: ${BENCHMARKS.highLevelRatio}%)`)}`);
|
|
663
815
|
console.log(` ${dim(`Breakdown: ${ai.architecturalPrompts} architectural \u00B7 ${ai.planningPrompts} planning \u00B7 ${ai.exploratoryPrompts} exploratory`)}`);
|
|
664
816
|
console.log(` ${dim(`Boilerplate: ${ai.boilerplatePrompts} (${Math.round(ai.boilerplatePrompts / Math.max(1, ai.totalPrompts) * 100)}%) \u00B7 Testing: ${ai.testingPrompts}`)}`);
|
|
817
|
+
displayDimensionCostLine('ai leverage', metrics, tokenEfficiency);
|
|
665
818
|
showSnippet('Evidence', pickExample(metrics.aiLeverage.examples, 'architectural') || pickExample(metrics.aiLeverage.examples, 'planning'));
|
|
666
819
|
console.log();
|
|
667
820
|
|
|
@@ -670,6 +823,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
670
823
|
console.log(` ${dim(`Context-setting: ${ss.contextSetRatio}% (benchmark: ${BENCHMARKS.contextSetRatio}%)`)}`);
|
|
671
824
|
console.log(` ${dim(`Review at end: ${ss.reviewEndRatio}% (benchmark: ${BENCHMARKS.reviewEndRatio}%)`)}`);
|
|
672
825
|
console.log(` ${dim(`Refinement rate: ${ss.refinementRatio}% (benchmark: ${BENCHMARKS.refinementRatio}%)`)}`);
|
|
826
|
+
displayDimensionCostLine('workflow', metrics, tokenEfficiency);
|
|
673
827
|
showSnippet('Evidence', pickExample(metrics.sessionStructure.examples, 'context_setting') || pickExample(metrics.sessionStructure.examples, 'refinement'));
|
|
674
828
|
console.log();
|
|
675
829
|
}
|
|
@@ -838,7 +992,7 @@ function displayNextSteps(result) {
|
|
|
838
992
|
// VERBOSE: DETAILED BREAKDOWN
|
|
839
993
|
// ══════════════════════════════════════════════
|
|
840
994
|
|
|
841
|
-
export function displayVerbose(metrics, sessions) {
|
|
995
|
+
export function displayVerbose(metrics, sessions, tokenEfficiency = null) {
|
|
842
996
|
console.log(doubleRule());
|
|
843
997
|
console.log(dim('\n DETAILED BREAKDOWN\n'));
|
|
844
998
|
|
|
@@ -894,6 +1048,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
894
1048
|
console.log(` ${dim(pad('Multi-step sessions', 28))} ${white(d.multiStepSessions + '/' + d.totalSessions)} ${dim('(' + Math.round(d.multiStepSessions / Math.max(1, d.totalSessions) * 100) + '%)')}`);
|
|
895
1049
|
console.log(` ${dim(pad('Single-shot sessions', 28))} ${white(String(d.singleShotSessions))}`);
|
|
896
1050
|
console.log(` ${dim(pad('Contextual followups', 28))} ${white(d.contextualFollowupRatio + '%')}${d.contextualFollowupRatio > 20 ? cyan(' builds on context well') : orange(' could reference prior work more')}`);
|
|
1051
|
+
displayDimensionCostLine('thinking', metrics, tokenEfficiency);
|
|
897
1052
|
console.log();
|
|
898
1053
|
|
|
899
1054
|
// ── Debugging ──
|
|
@@ -906,6 +1061,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
906
1061
|
console.log(` ${dim(pad('Extended loops (>5 turns)', 28))} ${db.longLoops === 0 ? green('0 -- zero spirals') : orange(String(db.longLoops))}`);
|
|
907
1062
|
metricRow('Specific report ratio', db.specificReportRatio, BENCHMARKS.specificReportRatio, '%');
|
|
908
1063
|
console.log(` ${dim(pad('Vague reports', 28))} ${db.vagueReports === 0 ? green('0 -- never vague') : orange(String(db.vagueReports))}`);
|
|
1064
|
+
displayDimensionCostLine('debugging', metrics, tokenEfficiency);
|
|
909
1065
|
console.log();
|
|
910
1066
|
|
|
911
1067
|
// ── AI Leverage ──
|
|
@@ -923,6 +1079,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
923
1079
|
console.log(` ${dim(pad(' Testing', 28))} ${white(String(ai.testingPrompts))} ${dim('(' + Math.round(ai.testingPrompts / total * 100) + '%)')}`);
|
|
924
1080
|
console.log();
|
|
925
1081
|
metricRow('High-level ratio', ai.highLevelRatio, BENCHMARKS.highLevelRatio, '%');
|
|
1082
|
+
displayDimensionCostLine('ai leverage', metrics, tokenEfficiency);
|
|
926
1083
|
console.log();
|
|
927
1084
|
|
|
928
1085
|
// ── Session Structure ──
|
|
@@ -943,6 +1100,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
943
1100
|
console.log(` ${dim(pad(' Medium (5-60m)', 28))} ${white(String(dur.medium))}`);
|
|
944
1101
|
console.log(` ${dim(pad(' Long (>60m)', 28))} ${white(String(dur.long))}`);
|
|
945
1102
|
}
|
|
1103
|
+
displayDimensionCostLine('workflow', metrics, tokenEfficiency);
|
|
946
1104
|
console.log();
|
|
947
1105
|
}
|
|
948
1106
|
|
|
@@ -957,8 +1115,8 @@ export function displayOffline(result, metrics, extra = {}) {
|
|
|
957
1115
|
displayDimensions(result);
|
|
958
1116
|
displayTokenEfficiency(extra.tokenEfficiency, metrics);
|
|
959
1117
|
displayCrossPlatform(extra.perToolScores);
|
|
960
|
-
displayDataNarratives(metrics, new Set());
|
|
961
|
-
displayProjects(extra.insights);
|
|
1118
|
+
displayDataNarratives(metrics, new Set(), extra.tokenEfficiency);
|
|
1119
|
+
displayProjects(extra.insights, extra.tokenEfficiency);
|
|
962
1120
|
displaySignatures(extra.insights);
|
|
963
1121
|
displayWatchPoints(extra.insights);
|
|
964
1122
|
displayTrajectory(extra.insights);
|
|
@@ -980,8 +1138,8 @@ export function displayFull(result, metrics, prose, extra = {}) {
|
|
|
980
1138
|
displayDimensions(result);
|
|
981
1139
|
displayTokenEfficiency(extra.tokenEfficiency, metrics);
|
|
982
1140
|
displayCrossPlatform(extra.perToolScores);
|
|
983
|
-
displayNarratives(metrics, prose);
|
|
984
|
-
displayProjects(extra.insights);
|
|
1141
|
+
displayNarratives(metrics, prose, extra.tokenEfficiency);
|
|
1142
|
+
displayProjects(extra.insights, extra.tokenEfficiency);
|
|
985
1143
|
displaySignatures(extra.insights);
|
|
986
1144
|
displayWatchPoints(extra.insights);
|
|
987
1145
|
displayTrajectory(extra.insights);
|
package/src/index.js
CHANGED
|
@@ -213,13 +213,13 @@ export async function run(options = {}) {
|
|
|
213
213
|
|
|
214
214
|
// ── Step 6: Verbose prompt (interactive) ──
|
|
215
215
|
if (options.verbose) {
|
|
216
|
-
displayVerbose(metrics, allSessions);
|
|
216
|
+
displayVerbose(metrics, allSessions, tokenEfficiency);
|
|
217
217
|
} else {
|
|
218
218
|
try {
|
|
219
219
|
const wantsVerbose = await askVerbose();
|
|
220
220
|
if (wantsVerbose) {
|
|
221
221
|
console.log();
|
|
222
|
-
displayVerbose(metrics, allSessions);
|
|
222
|
+
displayVerbose(metrics, allSessions, tokenEfficiency);
|
|
223
223
|
} else {
|
|
224
224
|
console.log();
|
|
225
225
|
}
|
package/src/insights.js
CHANGED
|
@@ -250,9 +250,21 @@ export function computeWatchPoints(allSessions, metrics, tokenEfficiency = null)
|
|
|
250
250
|
}
|
|
251
251
|
const totalFollowupSessions = Object.values(projectSessions).reduce((sum, s) => sum + Math.max(0, s.length - 1), 0);
|
|
252
252
|
if (totalFollowupSessions > 3 && contextRestarts / totalFollowupSessions > 0.5) {
|
|
253
|
+
// Estimate cost of context restarts — each restart forces full context re-read
|
|
254
|
+
let costNote = '';
|
|
255
|
+
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
256
|
+
const avgCostPerSession = tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.sessionsAnalyzed);
|
|
257
|
+
// The re-read cost of each restart scales with context size — estimate as % of avg session cost
|
|
258
|
+
const rereadPct = tokenEfficiency.contextRereadRatio || 0.94;
|
|
259
|
+
const wastedPerRestart = avgCostPerSession * rereadPct * 0.3; // ~30% of re-read is avoidable with context continuity
|
|
260
|
+
const totalWasted = wastedPerRestart * contextRestarts;
|
|
261
|
+
if (totalWasted > 1) {
|
|
262
|
+
costNote = ` Estimated wasted re-read cost: ~$${totalWasted.toFixed(0)}.`;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
253
265
|
watchPoints.push({
|
|
254
266
|
name: 'Context amnesia',
|
|
255
|
-
detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient
|
|
267
|
+
detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient.${costNote}`,
|
|
256
268
|
evidence: bestContextRestartPrompt,
|
|
257
269
|
});
|
|
258
270
|
}
|
|
@@ -290,27 +302,58 @@ export function computeWatchPoints(allSessions, metrics, tokenEfficiency = null)
|
|
|
290
302
|
}
|
|
291
303
|
}
|
|
292
304
|
}
|
|
305
|
+
// Dollar annotation: long prompts trigger large context re-reads each time
|
|
306
|
+
let monologueCostNote = '';
|
|
307
|
+
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
308
|
+
// Compare avg tokens for long vs short prompts from the prompt length analysis
|
|
309
|
+
const longBucket = tokenEfficiency.promptLengthAnalysis.find(b => b.label === '500+ chars');
|
|
310
|
+
const shortBucket = tokenEfficiency.promptLengthAnalysis.find(b => b.label === '20-100 chars');
|
|
311
|
+
if (longBucket && shortBucket && longBucket.avgCost && shortBucket.avgCost) {
|
|
312
|
+
const ratio = (longBucket.avgCost / shortBucket.avgCost).toFixed(1);
|
|
313
|
+
monologueCostNote = ` Long prompts cost ${ratio}x more per exchange ($${longBucket.avgCost.toFixed(2)} vs $${shortBucket.avgCost.toFixed(2)}).`;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
293
316
|
watchPoints.push({
|
|
294
317
|
name: 'Monologue prompting',
|
|
295
|
-
detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output
|
|
318
|
+
detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output.${monologueCostNote}`,
|
|
296
319
|
evidence: bestLongPrompt,
|
|
297
320
|
});
|
|
298
321
|
}
|
|
299
322
|
|
|
300
323
|
// Low context-setting
|
|
301
324
|
if (ss.contextSetRatio < 20) {
|
|
325
|
+
let contextCostNote = '';
|
|
326
|
+
if (ss.tokenEvidence?.avgTokensPerExchangeWithContext && ss.tokenEvidence?.avgTokensPerExchangeNoContext) {
|
|
327
|
+
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
328
|
+
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
329
|
+
if (pricing > 0) {
|
|
330
|
+
const withCtx = ss.tokenEvidence.avgTokensPerExchangeWithContext * pricing;
|
|
331
|
+
const noCtx = ss.tokenEvidence.avgTokensPerExchangeNoContext * pricing;
|
|
332
|
+
if (noCtx > withCtx * 1.1) {
|
|
333
|
+
contextCostNote = ` Sessions without context cost $${noCtx.toFixed(2)}/exchange vs $${withCtx.toFixed(2)} with context.`;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
302
337
|
watchPoints.push({
|
|
303
338
|
name: 'Missing context',
|
|
304
|
-
detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections
|
|
339
|
+
detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections.${contextCostNote}`,
|
|
305
340
|
evidence: null,
|
|
306
341
|
});
|
|
307
342
|
}
|
|
308
343
|
|
|
309
344
|
// Extended debug spirals
|
|
310
345
|
if (db.longLoops > 2) {
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
346
|
+
let loopCostStr = '';
|
|
347
|
+
if (db.tokenEvidence?.avgTokensLongLoop) {
|
|
348
|
+
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
349
|
+
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
350
|
+
if (pricing > 0) {
|
|
351
|
+
const costPerLoop = db.tokenEvidence.avgTokensLongLoop * pricing;
|
|
352
|
+
loopCostStr = ` Each spiral costs ~$${costPerLoop.toFixed(2)}.`;
|
|
353
|
+
} else {
|
|
354
|
+
loopCostStr = ` Each spiral averages ${numberFormatInsight(db.tokenEvidence.avgTokensLongLoop)} tokens.`;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
314
357
|
watchPoints.push({
|
|
315
358
|
name: 'Debug spirals',
|
|
316
359
|
detail: `${db.longLoops} extended debug loops (>5 turns) detected.${loopCostStr} When stuck, try providing more specific error context or breaking the problem differently.`,
|