npm - chekk - Versions diffs - 0.4.3 → 0.5.0 - Mend

chekk 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/bin/chekk.js +1 -1
package/package.json +2 -2
package/src/display.js +192 -2
package/src/index.js +9 -5
package/src/insights.js +57 -4
package/src/metrics/ai-leverage.js +28 -0
package/src/metrics/debug-cycles.js +43 -0
package/src/metrics/decomposition.js +25 -0
package/src/metrics/session-structure.js +35 -0
package/src/metrics/token-efficiency.js +258 -0
package/src/parsers/claude-code.js +27 -0
package/src/upload.js +10 -1

package/bin/chekk.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { execSync, spawn } from 'child_process';
 import { Command } from 'commander';
 import { run } from '../src/index.js';
-const LOCAL_VERSION = '0.4.3';
+const LOCAL_VERSION = '0.5.0';
 // ── Auto-update check ──
 // If running from a cached npx install, check if there's a newer version

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "chekk",
-  "version": "0.4.3",
-  "description": "See how you prompt. Chekk analyzes your AI coding workflow and tells you what kind of engineer you are.",
+  "version": "0.5.0",
+  "description": "See how you prompt. Chekk analyzes your AI coding workflow, tells you what kind of engineer you are, and shows what your habits actually cost.",
   "bin": {
     "chekk": "./bin/chekk.js"
   },

package/src/display.js CHANGED Viewed

@@ -37,6 +37,8 @@ function progressBar(score, width = 18) {
 }
 function numberFormat(n) {
+  if (n >= 1_000_000_000) return (n / 1_000_000_000).toFixed(1).replace(/\.0$/, '') + 'B';
+  if (n >= 1_000_000) return (n / 1_000_000).toFixed(1).replace(/\.0$/, '') + 'M';
   if (n >= 1000) return (n / 1000).toFixed(1).replace(/\.0$/, '') + 'k';
   return String(n);
 }
@@ -171,7 +173,7 @@ export function displayHeader() {
   console.log();
   const lines = [
     '',
-    `   ${bold.white('chekk')}${dim(' v0.4.3')}`,
+    `   ${bold.white('chekk')}${dim(' v0.5.0')}`,
     `   ${dim('prompt engineering capability profile')}`,
     '',
   ];
@@ -229,7 +231,7 @@ function displayProfileHeader(result, extra = {}) {
   console.log(`  ${bold.white('PROMPT ENGINEERING CAPABILITY PROFILE')}`);
   console.log();
   if (sessionStats) {
-    console.log(`  ${dim(`Generated ${dateStr} | chekk v0.4.3`)}`);
+    console.log(`  ${dim(`Generated ${dateStr} | chekk v0.5.0`)}`);
     console.log(`  ${dim(`Analysis: ${sessionStats.totalSessions} sessions \u00B7 ${sessionStats.tools.length} tool${sessionStats.tools.length > 1 ? 's' : ''} \u00B7 ${numberFormat(sessionStats.totalExchanges)} exchanges`)}`);
     if (sessionStats.dateRangeShort) {
       console.log(`  ${dim(`Period: ${sessionStats.dateRangeShort}`)}`);
@@ -353,6 +355,192 @@ function displayDimensions(result) {
   console.log();
 }
+// ══════════════════════════════════════════════
+// TOKEN EFFICIENCY — Spend overview panel
+// ══════════════════════════════════════════════
+export function displayTokenEfficiency(tokenEfficiency, metrics) {
+  if (!tokenEfficiency || !tokenEfficiency.hasData) return;
+  const te = tokenEfficiency;
+  console.log(dim('  TOKEN EFFICIENCY'));
+  console.log();
+  // ── Overview stats ──
+  const overviewLines = [
+    '',
+    `  ${dim('Total tokens')}     ${bold(numberFormat(te.grandTotal))}`,
+    `  ${dim('Est. cost')}        ${bold('$' + te.estimatedCostTotal.toFixed(2))}`,
+    `  ${dim('Sessions')}         ${dim(String(te.sessionsAnalyzed))}`,
+    `  ${dim('Avg/exchange')}     ${dim(numberFormat(te.avgTokensPerExchange) + ' tokens')}`,
+    '',
+  ];
+  // Token composition bar — ensure every non-zero category gets at least 1 block
+  const barWidth = 40;
+  const categories = [
+    { pct: te.composition.cacheReadPct, color: orange, label: 'context re-read' },
+    { pct: te.composition.cacheCreationPct, color: yellow, label: 'cache create' },
+    { pct: te.composition.inputPct, color: blue, label: 'new input' },
+    { pct: te.composition.outputPct, color: green, label: 'output (code)' },
+  ];
+  // Allocate bar widths: give at least 1 block to any non-zero category
+  let remaining = barWidth;
+  const widths = categories.map(c => {
+    if (c.pct > 0 && c.pct < (100 / barWidth)) { remaining--; return 1; }
+    return 0;
+  });
+  for (let i = 0; i < categories.length; i++) {
+    if (widths[i] === 0 && categories[i].pct > 0) {
+      widths[i] = Math.max(1, Math.round(categories[i].pct / 100 * barWidth));
+    }
+  }
+  // Adjust largest to fill remaining
+  const total = widths.reduce((a, b) => a + b, 0);
+  if (total !== barWidth) {
+    const largest = widths.indexOf(Math.max(...widths));
+    widths[largest] += barWidth - total;
+  }
+  let barStr = '';
+  for (let i = 0; i < categories.length; i++) {
+    barStr += categories[i].color('\u2588'.repeat(Math.max(0, widths[i])));
+  }
+  overviewLines.push(`  ${barStr}`);
+  // Format percentages with appropriate precision
+  function fmtPct(pct) {
+    if (pct >= 10) return Math.round(pct) + '%';
+    if (pct >= 1) return pct.toFixed(1) + '%';
+    if (pct > 0) return pct.toFixed(2) + '%';
+    return '0%';
+  }
+  overviewLines.push(`  ${orange('\u2588')} ${dim('context re-read ' + fmtPct(te.composition.cacheReadPct))}  ` +
+    `${yellow('\u2588')} ${dim('cache create ' + fmtPct(te.composition.cacheCreationPct))}`);
+  overviewLines.push(`  ${blue('\u2588')} ${dim('new input ' + fmtPct(te.composition.inputPct))}  ` +
+    `${green('\u2588')} ${dim('output ' + fmtPct(te.composition.outputPct))}`);
+  overviewLines.push('');
+  // The key insight — use composition percentages for accuracy
+  const outputPct = te.composition.outputPct;
+  const nonOutputPct = 100 - outputPct;
+  if (outputPct < 50) {
+    overviewLines.push(`  ${dim('Only')} ${bold(fmtPct(outputPct))} ${dim('of tokens are Claude writing code.')}`);
+    overviewLines.push(`  ${dim('The other')} ${bold(fmtPct(nonOutputPct))} ${dim('is context re-reading.')}`);
+    overviewLines.push('');
+  }
+  for (const l of box(overviewLines, 53)) console.log(l);
+  console.log();
+  // ── Per-project breakdown ──
+  if (te.perProject.length > 1) {
+    console.log(`  ${dim('SPEND BY PROJECT')}`);
+    console.log(`  ${dim('\u2500'.repeat(53))}`);
+    for (const p of te.perProject.slice(0, 5)) {
+      const pctOfTotal = te.grandTotal > 0 ? Math.round(p.totalTokens / te.grandTotal * 100) : 0;
+      const costStr = '$' + p.estimatedCost.toFixed(2);
+      const shortName = p.name.length > 24 ? '...' + p.name.slice(-21) : p.name;
+      console.log(
+        `  ${pad(white(shortName), 26)} ${pad(dim(numberFormat(p.totalTokens) + ' tokens'), 16)} ` +
+        `${pad(dim(costStr), 8)} ${dim(pctOfTotal + '%')}`
+      );
+    }
+    console.log();
+  }
+  // ── Costliest sessions ──
+  if (te.costliestSessions.length > 0) {
+    console.log(`  ${dim('COSTLIEST SESSIONS')}`);
+    console.log(`  ${dim('\u2500'.repeat(53))}`);
+    for (const s of te.costliestSessions.slice(0, 3)) {
+      const costStr = '$' + s.estimatedCost.toFixed(2);
+      const truncPrompt = s.firstPrompt.length > 40 ? s.firstPrompt.slice(0, 37) + '...' : s.firstPrompt;
+      console.log(
+        `  ${dim(numberFormat(s.totalTokens) + ' tokens')}  ${dim(costStr)}  ${dim(s.exchanges + ' exchanges')}`
+      );
+      if (truncPrompt) {
+        console.log(`  ${dim('\u21B3')} ${dim.italic('\u201C' + truncPrompt + '\u201D')}`);
+      }
+      console.log();
+    }
+  }
+  // ── Token cost evidence from metrics ──
+  displayTokenEvidence(metrics);
+}
+function displayTokenEvidence(metrics) {
+  const evidenceLines = [];
+  // Decomposition: single-shot vs multi-step cost
+  const de = metrics.decomposition.details.tokenEvidence;
+  if (de && de.avgTokensPerExchangeSingleShot && de.avgTokensPerExchangeMultiStep) {
+    const ratio = (de.avgTokensPerExchangeSingleShot / de.avgTokensPerExchangeMultiStep).toFixed(1);
+    if (parseFloat(ratio) > 1.2) {
+      evidenceLines.push(
+        `  ${dim('\u2022 Single-shot prompts cost')} ${orange(ratio + 'x')} ${dim('more tokens per exchange than multi-step sessions')}`
+      );
+    }
+  }
+  // Debug cycles: vague vs specific cost
+  const dbe = metrics.debugCycles.details.tokenEvidence;
+  if (dbe && dbe.avgTokensVagueDebug && dbe.avgTokensSpecificDebug) {
+    const ratio = (dbe.avgTokensVagueDebug / dbe.avgTokensSpecificDebug).toFixed(1);
+    if (parseFloat(ratio) > 1.2) {
+      evidenceLines.push(
+        `  ${dim('\u2022 Vague debug prompts cost')} ${orange(ratio + 'x')} ${dim('more than specific error reports')}`
+      );
+    }
+  }
+  // AI Leverage: trivial prompts vs detailed ones
+  const aie = metrics.aiLeverage.details.tokenEvidence;
+  if (aie && aie.avgTokensTrivialPrompt && aie.avgTokensComplexPrompt) {
+    // Trivial prompts often cost nearly as much because Claude re-reads everything anyway
+    const savingsPct = Math.round((1 - aie.avgTokensTrivialPrompt / aie.avgTokensComplexPrompt) * 100);
+    if (savingsPct < 40) {
+      evidenceLines.push(
+        `  ${dim('\u2022 Short vague prompts (<50 chars) cost')} ${dim(numberFormat(aie.avgTokensTrivialPrompt) + ' tokens')} ${dim('— only ' + savingsPct + '% less than detailed ones')}`
+      );
+    }
+  }
+  // Session structure: marathon vs focused cost
+  const sse = metrics.sessionStructure.details.tokenEvidence;
+  if (sse && sse.avgTokensPerExchangeMarathon && sse.avgTokensPerExchangeFocused) {
+    const ratio = (sse.avgTokensPerExchangeMarathon / sse.avgTokensPerExchangeFocused).toFixed(1);
+    if (parseFloat(ratio) > 1.1) {
+      evidenceLines.push(
+        `  ${dim('\u2022 Marathon sessions (>60m) cost')} ${orange(ratio + 'x')} ${dim('more per exchange than focused ones (10-45m)')}`
+      );
+    }
+  }
+  // Context-setting vs no context
+  if (sse && sse.avgTokensPerExchangeNoContext && sse.avgTokensPerExchangeWithContext) {
+    const ratio = (sse.avgTokensPerExchangeNoContext / sse.avgTokensPerExchangeWithContext).toFixed(1);
+    if (parseFloat(ratio) > 1.1) {
+      evidenceLines.push(
+        `  ${dim('\u2022 Sessions without upfront context cost')} ${orange(ratio + 'x')} ${dim('more per exchange')}`
+      );
+    }
+  }
+  if (evidenceLines.length > 0) {
+    console.log(`  ${dim('WHAT YOUR HABITS ACTUALLY COST')}`);
+    console.log(`  ${dim('\u2500'.repeat(53))}`);
+    for (const line of evidenceLines) {
+      console.log(line);
+    }
+    console.log();
+  }
+}
 // ══════════════════════════════════════════════
 // CROSS-PLATFORM
 // ══════════════════════════════════════════════
@@ -767,6 +955,7 @@ export function displayOffline(result, metrics, extra = {}) {
   displaySummary(result, extra);
   displayArchetype(result);
   displayDimensions(result);
+  displayTokenEfficiency(extra.tokenEfficiency, metrics);
   displayCrossPlatform(extra.perToolScores);
   displayDataNarratives(metrics, new Set());
   displayProjects(extra.insights);
@@ -789,6 +978,7 @@ export function displayFull(result, metrics, prose, extra = {}) {
   displaySummary(result, extra);
   displayArchetype(result);
   displayDimensions(result);
+  displayTokenEfficiency(extra.tokenEfficiency, metrics);
   displayCrossPlatform(extra.perToolScores);
   displayNarratives(metrics, prose);
   displayProjects(extra.insights);

package/src/index.js CHANGED Viewed

@@ -11,6 +11,7 @@ import { computeDebugCycles } from './metrics/debug-cycles.js';
 import { computeAILeverage } from './metrics/ai-leverage.js';
 import { computeSessionStructure } from './metrics/session-structure.js';
 import { computeOverallScore } from './scorer.js';
+import { computeTokenEfficiency } from './metrics/token-efficiency.js';
 import {
   computeSignatures,
   computeWatchPoints,
@@ -142,6 +143,9 @@ export async function run(options = {}) {
   const result = computeOverallScore(metrics);
+  // ── Step 3a: Compute token efficiency analytics ──
+  const tokenEfficiency = computeTokenEfficiency(allSessions);
   // ── Cross-platform scores ──
   const perToolScores = tools.length > 1 ? computePerToolScores(allSessions) : null;
@@ -172,8 +176,8 @@ export async function run(options = {}) {
   };
   // ── Step 3b: Compute insights ──
-  const signatures = computeSignatures(allSessions, metrics);
-  const watchPoints = computeWatchPoints(allSessions, metrics);
+  const signatures = computeSignatures(allSessions, metrics, tokenEfficiency);
+  const watchPoints = computeWatchPoints(allSessions, metrics, tokenEfficiency);
   const trajectory = computeTrajectory(allSessions);
   const projectComplexity = computeProjectComplexity(allSessions);
   const assessment = generateAssessment(result, metrics, signatures, watchPoints);
@@ -183,7 +187,7 @@ export async function run(options = {}) {
   // ── JSON output ──
   if (options.json) {
-    console.log(JSON.stringify({ metrics, result, sessionStats, perToolScores, scoreDelta, insights }, null, 2));
+    console.log(JSON.stringify({ metrics, result, sessionStats, perToolScores, scoreDelta, insights, tokenEfficiency }, null, 2));
     return;
   }
@@ -192,7 +196,7 @@ export async function run(options = {}) {
   if (!options.offline) {
     const [, proseResult] = await Promise.all([
       displayProgressBar(1500),
-      generateProse(metrics, result, sessionStats).catch(() => null),
+      generateProse(metrics, result, sessionStats, tokenEfficiency).catch(() => null),
     ]);
     prose = proseResult;
   } else {
@@ -200,7 +204,7 @@ export async function run(options = {}) {
   }
   // ── Step 5: Display results ──
-  const extra = { scoreDelta, perToolScores, insights, sessionStats };
+  const extra = { scoreDelta, perToolScores, insights, sessionStats, tokenEfficiency };
   if (options.offline) {
     displayOffline(result, metrics, extra);
   } else {

package/src/insights.js CHANGED Viewed

@@ -45,6 +45,13 @@ const preflightPatterns = /^(before (we|you|i)|don'?t code|review (first|this|my
 const testFirstPatterns = /\b(write (the )?tests? (first|before)|test.?driven|TDD|spec first|start with (tests?|specs?))\b/i;
 const negativeConstraintPatterns = /\b(don'?t|do not|never|avoid|must not|shouldn'?t)\b.*\b(add|create|use|include|change|modify|touch|remove)\b/i;
+// Number formatting for insights text
+function numberFormatInsight(n) {
+  if (n >= 1_000_000) return (n / 1_000_000).toFixed(1).replace(/\.0$/, '') + 'M';
+  if (n >= 1000) return (n / 1000).toFixed(1).replace(/\.0$/, '') + 'k';
+  return String(n);
+}
 // Evidence quality filter (same rules as metric parsers)
 const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
 function isGoodEvidence(prompt) {
@@ -55,7 +62,7 @@ function isGoodEvidence(prompt) {
   return true;
 }
-export function computeSignatures(allSessions, metrics) {
+export function computeSignatures(allSessions, metrics, tokenEfficiency = null) {
   const signatures = [];
   const d = metrics.decomposition.details;
   const db = metrics.debugCycles.details;
@@ -190,6 +197,19 @@ export function computeSignatures(allSessions, metrics) {
     });
   }
+  // ── Token-backed signature: efficient token usage ──
+  if (tokenEfficiency && tokenEfficiency.hasData) {
+    const te = tokenEfficiency;
+    // If context re-read ratio is below 90%, that's notably efficient
+    if (te.contextRereadRatio < 0.90 && te.sessionsAnalyzed >= 5) {
+      signatures.push({
+        name: 'Token-efficient prompting',
+        detail: `Only ${Math.round(te.contextRereadRatio * 100)}% of your tokens are context re-reads (typical: 95%+). Your focused sessions and clear prompts minimize wasted tokens. Estimated spend: $${te.estimatedCostTotal.toFixed(2)}.`,
+        evidence: null,
+      });
+    }
+  }
   return signatures.slice(0, 4); // Max 4 signatures
 }
@@ -197,7 +217,7 @@ export function computeSignatures(allSessions, metrics) {
 // WATCH POINTS — Anti-patterns
 // ══════════════════════════════════════════════
-export function computeWatchPoints(allSessions, metrics) {
+export function computeWatchPoints(allSessions, metrics, tokenEfficiency = null) {
   const watchPoints = [];
   const d = metrics.decomposition.details;
   const db = metrics.debugCycles.details;
@@ -288,14 +308,47 @@ export function computeWatchPoints(allSessions, metrics) {
   // Extended debug spirals
   if (db.longLoops > 2) {
+    const loopCostStr = db.tokenEvidence?.avgTokensLongLoop
+      ? ` Each spiral averages ${numberFormatInsight(db.tokenEvidence.avgTokensLongLoop)} tokens.`
+      : '';
     watchPoints.push({
       name: 'Debug spirals',
-      detail: `${db.longLoops} extended debug loops (>5 turns) detected. When stuck, try providing more specific error context or breaking the problem differently.`,
+      detail: `${db.longLoops} extended debug loops (>5 turns) detected.${loopCostStr} When stuck, try providing more specific error context or breaking the problem differently.`,
       evidence: null,
     });
   }
-  return watchPoints.slice(0, 3); // Max 3 watch points
+  // ── Token-backed watch points ──
+  if (tokenEfficiency && tokenEfficiency.hasData) {
+    const te = tokenEfficiency;
+    // Marathon sessions burning disproportionate tokens
+    const marathonSessions = te.costliestSessions.filter(s => s.exchanges > 50);
+    if (marathonSessions.length >= 2) {
+      const marathonCost = marathonSessions.reduce((s, m) => s + m.estimatedCost, 0);
+      const marathonPct = te.estimatedCostTotal > 0 ? Math.round(marathonCost / te.estimatedCostTotal * 100) : 0;
+      if (marathonPct > 40) {
+        watchPoints.push({
+          name: 'Marathon session tax',
+          detail: `${marathonSessions.length} marathon sessions (50+ exchanges) consumed ~${marathonPct}% of your total spend (~$${marathonCost.toFixed(2)}). Context compounds — splitting into focused sessions would reduce token waste.`,
+          evidence: null,
+        });
+      }
+    }
+    // Vague prompts costing more than specific ones
+    const vagueAvg = db.tokenEvidence?.avgTokensVagueDebug;
+    const specificAvg = db.tokenEvidence?.avgTokensSpecificDebug;
+    if (vagueAvg && specificAvg && vagueAvg > specificAvg * 1.5 && db.vagueReports > 3) {
+      watchPoints.push({
+        name: 'Vague prompts are expensive',
+        detail: `Your vague debug prompts average ${numberFormatInsight(vagueAvg)} tokens vs ${numberFormatInsight(specificAvg)} for specific ones — ${(vagueAvg / specificAvg).toFixed(1)}x more expensive. Adding error details upfront saves real money.`,
+        evidence: null,
+      });
+    }
+  }
+  return watchPoints.slice(0, 4); // Max 4 watch points (was 3, expanded for token insights)
 }
 // ══════════════════════════════════════════════

package/src/metrics/ai-leverage.js CHANGED Viewed

@@ -136,6 +136,28 @@ export function computeAILeverage(sessions) {
   if (bestPlanPrompt) examples.push({ type: 'planning', prompt: bestPlanPrompt });
   if (bestExplorePrompt) examples.push({ type: 'exploratory', prompt: bestExplorePrompt });
+  // ── Token cost evidence ──
+  // Compare cost of trivial prompts vs complex/architectural prompts
+  let trivialTokens = 0, trivialTokenCount = 0;
+  let complexTokensTotal = 0, complexTokensCount = 0;
+  let boilerplateTokens = 0, boilerplateTokenCount = 0;
+  let archTokens = 0, archTokenCount = 0;
+  for (const session of sessions) {
+    for (const exchange of session.exchanges) {
+      const prompt = exchange.userPrompt || '';
+      const t = exchange.tokenUsage;
+      const tokens = t ? (t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens) : 0;
+      if (tokens === 0) continue;
+      if (prompt.length < 50) { trivialTokens += tokens; trivialTokenCount++; }
+      const sentences = prompt.split(/[.!?]+/).filter(s => s.trim().length > 10);
+      if (prompt.length > 200 && sentences.length >= 2) { complexTokensTotal += tokens; complexTokensCount++; }
+      if (boilerplatePatterns.test(prompt)) { boilerplateTokens += tokens; boilerplateTokenCount++; }
+      if (architecturalPatterns.test(prompt) || planningPatterns.test(prompt)) { archTokens += tokens; archTokenCount++; }
+    }
+  }
   return {
     score: Math.max(0, Math.min(100, score)),
     details: {
@@ -152,6 +174,12 @@ export function computeAILeverage(sessions) {
         research: highLeverageToolUses,
         coding: codingToolUses,
       },
+      tokenEvidence: {
+        avgTokensTrivialPrompt: trivialTokenCount > 0 ? Math.round(trivialTokens / trivialTokenCount) : null,
+        avgTokensComplexPrompt: complexTokensCount > 0 ? Math.round(complexTokensTotal / complexTokensCount) : null,
+        avgTokensBoilerplate: boilerplateTokenCount > 0 ? Math.round(boilerplateTokens / boilerplateTokenCount) : null,
+        avgTokensArchitectural: archTokenCount > 0 ? Math.round(archTokens / archTokenCount) : null,
+      },
     },
     examples,
   };

package/src/metrics/debug-cycles.js CHANGED Viewed

@@ -145,6 +145,43 @@ export function computeDebugCycles(sessions) {
   if (bestSpecificReport) examples.push({ type: 'specific_report', prompt: bestSpecificReport });
   if (bestQuickFix) examples.push({ type: 'quick_fix', prompt: bestQuickFix });
+  // ── Token cost evidence ──
+  // Compare cost of vague vs specific debug exchanges
+  let vagueTokens = 0, vagueTokenCount = 0;
+  let specificTokens = 0, specificTokenCount = 0;
+  let longLoopTokens = 0, longLoopTokenCount = 0;
+  let quickFixTokens = 0, quickFixTokenCount = 0;
+  for (const session of sessions) {
+    const { exchanges } = session;
+    let debugExchanges = [];
+    let inDebug = false;
+    for (let i = 0; i < exchanges.length; i++) {
+      const prompt = exchanges[i].userPrompt || '';
+      const t = exchanges[i].tokenUsage;
+      const tokens = t ? (t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens) : 0;
+      if (errorPatterns.test(prompt) && tokens > 0) {
+        if (!inDebug) { inDebug = true; debugExchanges = []; }
+        debugExchanges.push({ prompt, tokens });
+        if (vaguePhrases.test(prompt)) { vagueTokens += tokens; vagueTokenCount++; }
+        if (specificDebugPatterns.test(prompt) || prompt.length > 200) { specificTokens += tokens; specificTokenCount++; }
+      } else if (inDebug) {
+        if (debugExchanges.length <= 2) {
+          const total = debugExchanges.reduce((s, e) => s + e.tokens, 0);
+          quickFixTokens += total; quickFixTokenCount++;
+        } else if (debugExchanges.length > 5) {
+          const total = debugExchanges.reduce((s, e) => s + e.tokens, 0);
+          longLoopTokens += total; longLoopTokenCount++;
+        }
+        inDebug = false;
+        debugExchanges = [];
+      }
+    }
+  }
   return {
     score: Math.max(0, Math.min(100, score)),
     details: {
@@ -155,6 +192,12 @@ export function computeDebugCycles(sessions) {
       specificReportRatio: Math.round(specificRatio * 100),
       vagueReports,
       specificReports,
+      tokenEvidence: {
+        avgTokensVagueDebug: vagueTokenCount > 0 ? Math.round(vagueTokens / vagueTokenCount) : null,
+        avgTokensSpecificDebug: specificTokenCount > 0 ? Math.round(specificTokens / specificTokenCount) : null,
+        avgTokensQuickFix: quickFixTokenCount > 0 ? Math.round(quickFixTokens / quickFixTokenCount) : null,
+        avgTokensLongLoop: longLoopTokenCount > 0 ? Math.round(longLoopTokens / longLoopTokenCount) : null,
+      },
     },
     examples,
   };

package/src/metrics/decomposition.js CHANGED Viewed

@@ -117,6 +117,27 @@ export function computeDecomposition(sessions) {
   }
   if (bestFollowupPrompt) examples.push({ type: 'followup', prompt: bestFollowupPrompt });
+  // ── Token cost evidence ──
+  // Compare token cost of single-shot sessions vs multi-step sessions
+  // to prove decomposition saves tokens
+  let singleShotTokens = 0, singleShotCount = 0;
+  let multiStepTokens = 0, multiStepCount = 0;
+  for (const session of sessions) {
+    const t = session.tokenUsage;
+    if (!t || (t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens) === 0) continue;
+    const total = t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens;
+    const perExchange = session.exchangeCount > 0 ? total / session.exchangeCount : total;
+    if (session.exchangeCount === 1) {
+      singleShotTokens += perExchange;
+      singleShotCount++;
+    } else if (session.exchangeCount >= 4) {
+      multiStepTokens += perExchange;
+      multiStepCount++;
+    }
+  }
+  const avgTokensSingleShot = singleShotCount > 0 ? Math.round(singleShotTokens / singleShotCount) : null;
+  const avgTokensMultiStep = multiStepCount > 0 ? Math.round(multiStepTokens / multiStepCount) : null;
   return {
     score: Math.max(0, Math.min(100, score)),
     details: {
@@ -127,6 +148,10 @@ export function computeDecomposition(sessions) {
       avgPromptLength: Math.round(avgPromptLength),
       longPromptRatio: promptCount > 0 ? Math.round(longPromptCount / promptCount * 100) : 0,
       contextualFollowupRatio: promptCount > 0 ? Math.round(followupRatio * 100) : 0,
+      tokenEvidence: {
+        avgTokensPerExchangeSingleShot: avgTokensSingleShot,
+        avgTokensPerExchangeMultiStep: avgTokensMultiStep,
+      },
     },
     examples,
   };

package/src/metrics/session-structure.js CHANGED Viewed

@@ -144,6 +144,35 @@ export function computeSessionStructure(sessions) {
   if (bestContextPrompt) examples.push({ type: 'context_setting', prompt: bestContextPrompt });
   if (bestRefinementPrompt) examples.push({ type: 'refinement', prompt: bestRefinementPrompt });
+  // ── Token cost evidence ──
+  // Compare token cost of focused sessions vs marathon sessions
+  let focusedTokens = 0, focusedTokenCount = 0;
+  let marathonTokens = 0, marathonTokenCount = 0;
+  let contextSetTokens = 0, contextSetCount = 0;
+  let noContextTokens = 0, noContextCount = 0;
+  for (const session of sessions) {
+    const t = session.tokenUsage;
+    if (!t || (t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens) === 0) continue;
+    const total = t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens;
+    const perExchange = session.exchangeCount > 0 ? total / session.exchangeCount : total;
+    // Duration-based
+    if (session.durationMinutes >= 10 && session.durationMinutes <= 45) {
+      focusedTokens += perExchange; focusedTokenCount++;
+    } else if (session.durationMinutes > 60) {
+      marathonTokens += perExchange; marathonTokenCount++;
+    }
+    // Context-setting vs not
+    const firstPrompt = session.exchanges[0]?.userPrompt || '';
+    if (contextSettingPatterns.test(firstPrompt) || firstPrompt.length > 200) {
+      contextSetTokens += perExchange; contextSetCount++;
+    } else if (session.exchanges.length > 0) {
+      noContextTokens += perExchange; noContextCount++;
+    }
+  }
   return {
     score: Math.max(0, Math.min(100, score)),
     details: {
@@ -158,6 +187,12 @@ export function computeSessionStructure(sessions) {
         long: longSessions,
         focused: focusedSessions,
       },
+      tokenEvidence: {
+        avgTokensPerExchangeFocused: focusedTokenCount > 0 ? Math.round(focusedTokens / focusedTokenCount) : null,
+        avgTokensPerExchangeMarathon: marathonTokenCount > 0 ? Math.round(marathonTokens / marathonTokenCount) : null,
+        avgTokensPerExchangeWithContext: contextSetCount > 0 ? Math.round(contextSetTokens / contextSetCount) : null,
+        avgTokensPerExchangeNoContext: noContextCount > 0 ? Math.round(noContextTokens / noContextCount) : null,
+      },
     },
     examples,
   };

package/src/metrics/token-efficiency.js ADDED Viewed

@@ -0,0 +1,258 @@
+/**
+ * Token Efficiency Analytics
+ *
+ * Computes token spend statistics from Claude Code session data.
+ * This is NOT a scored dimension — it provides concrete evidence
+ * that enriches the existing 4 metrics with cost data.
+ *
+ * Outputs:
+ * - Total token breakdown (input, output, cache read, cache creation)
+ * - Estimated cost using Anthropic pricing
+ * - Per-project token breakdown
+ * - Costliest sessions and prompts
+ * - Cache efficiency ratio (how much context is re-read vs new)
+ * - Prompt-type cost analysis (vague vs specific, short vs long)
+ */
+// ── Anthropic pricing per million tokens (as of early 2025) ──
+// Claude Code uses a mix of models; we estimate with Sonnet pricing
+// which is the most common model in Claude Code sessions.
+const PRICING = {
+  'claude-sonnet-4-5-20250929': { input: 3.00, output: 15.00, cacheRead: 0.30, cacheCreation: 3.75 },
+  'claude-opus-4-6':            { input: 15.00, output: 75.00, cacheRead: 1.50, cacheCreation: 18.75 },
+  'claude-haiku-4-5-20251001':  { input: 0.80, output: 4.00, cacheRead: 0.08, cacheCreation: 1.00 },
+  // Fallback for unknown models — use Sonnet pricing as default
+  default:                      { input: 3.00, output: 15.00, cacheRead: 0.30, cacheCreation: 3.75 },
+};
+function getPricing(model) {
+  if (!model) return PRICING.default;
+  for (const [key, prices] of Object.entries(PRICING)) {
+    if (key !== 'default' && model.includes(key.replace(/-\d+$/, ''))) return prices;
+  }
+  // Try partial match
+  if (model.includes('opus')) return PRICING['claude-opus-4-6'];
+  if (model.includes('haiku')) return PRICING['claude-haiku-4-5-20251001'];
+  if (model.includes('sonnet')) return PRICING['claude-sonnet-4-5-20250929'];
+  return PRICING.default;
+}
+function estimateCost(tokens, pricing) {
+  return (
+    (tokens.inputTokens / 1_000_000) * pricing.input +
+    (tokens.outputTokens / 1_000_000) * pricing.output +
+    (tokens.cacheReadTokens / 1_000_000) * pricing.cacheRead +
+    (tokens.cacheCreationTokens / 1_000_000) * pricing.cacheCreation
+  );
+}
+function addTokens(target, source) {
+  target.inputTokens += source.inputTokens || 0;
+  target.outputTokens += source.outputTokens || 0;
+  target.cacheReadTokens += source.cacheReadTokens || 0;
+  target.cacheCreationTokens += source.cacheCreationTokens || 0;
+}
+function totalTokens(t) {
+  return (t.inputTokens || 0) + (t.outputTokens || 0) + (t.cacheReadTokens || 0) + (t.cacheCreationTokens || 0);
+}
+/**
+ * Compute comprehensive token efficiency analytics.
+ *
+ * @param {Array} sessions - Parsed sessions with tokenUsage on each exchange
+ * @returns {Object} Token analytics data (not a score)
+ */
+export function computeTokenEfficiency(sessions) {
+  if (sessions.length === 0) {
+    return { hasData: false };
+  }
+  // Check if any session has token data (only Claude Code currently provides this)
+  const sessionsWithTokens = sessions.filter(s =>
+    s.tokenUsage && totalTokens(s.tokenUsage) > 0
+  );
+  if (sessionsWithTokens.length === 0) {
+    return { hasData: false };
+  }
+  // ── Aggregate totals ──
+  const totals = { inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreationTokens: 0 };
+  for (const s of sessionsWithTokens) {
+    addTokens(totals, s.tokenUsage);
+  }
+  const grandTotal = totalTokens(totals);
+  // ── Cost estimation ──
+  // For now use default pricing; could be refined per-message if model data is on exchanges
+  const pricing = PRICING.default;
+  const estimatedCostTotal = estimateCost(totals, pricing);
+  // ── Token composition ──
+  const composition = {
+    inputPct: grandTotal > 0 ? (totals.inputTokens / grandTotal * 100) : 0,
+    outputPct: grandTotal > 0 ? (totals.outputTokens / grandTotal * 100) : 0,
+    cacheReadPct: grandTotal > 0 ? (totals.cacheReadTokens / grandTotal * 100) : 0,
+    cacheCreationPct: grandTotal > 0 ? (totals.cacheCreationTokens / grandTotal * 100) : 0,
+  };
+  // The "context re-reading" ratio: cache_read / (cache_read + output)
+  // This shows how much of Claude's work is re-reading vs producing new output
+  const contextRereadRatio = (totals.cacheReadTokens + totals.outputTokens) > 0
+    ? totals.cacheReadTokens / (totals.cacheReadTokens + totals.outputTokens)
+    : 0;
+  // ── Per-project breakdown ──
+  const projectTokens = {};
+  for (const s of sessionsWithTokens) {
+    const p = s.project || 'unknown';
+    if (!projectTokens[p]) {
+      projectTokens[p] = {
+        tokens: { inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreationTokens: 0 },
+        sessions: 0,
+        exchanges: 0,
+      };
+    }
+    addTokens(projectTokens[p].tokens, s.tokenUsage);
+    projectTokens[p].sessions++;
+    projectTokens[p].exchanges += s.exchangeCount;
+  }
+  const perProject = Object.entries(projectTokens)
+    .map(([name, data]) => ({
+      name: name.length > 30 ? '...' + name.slice(-27) : name,
+      fullName: name,
+      totalTokens: totalTokens(data.tokens),
+      estimatedCost: estimateCost(data.tokens, pricing),
+      sessions: data.sessions,
+      exchanges: data.exchanges,
+      tokensPerExchange: data.exchanges > 0 ? Math.round(totalTokens(data.tokens) / data.exchanges) : 0,
+      ...data.tokens,
+    }))
+    .sort((a, b) => b.totalTokens - a.totalTokens);
+  // ── Costliest sessions ──
+  const costliestSessions = sessionsWithTokens
+    .map(s => ({
+      id: s.id,
+      project: s.project || 'unknown',
+      totalTokens: totalTokens(s.tokenUsage),
+      estimatedCost: estimateCost(s.tokenUsage, pricing),
+      exchanges: s.exchangeCount,
+      durationMinutes: s.durationMinutes,
+      cacheReadRatio: totalTokens(s.tokenUsage) > 0
+        ? s.tokenUsage.cacheReadTokens / totalTokens(s.tokenUsage)
+        : 0,
+      firstPrompt: s.exchanges[0]?.userPrompt?.slice(0, 80) || '',
+    }))
+    .sort((a, b) => b.totalTokens - a.totalTokens)
+    .slice(0, 5);
+  // ── Costliest exchanges (individual prompts) ──
+  const allExchanges = [];
+  for (const s of sessionsWithTokens) {
+    for (let i = 0; i < s.exchanges.length; i++) {
+      const ex = s.exchanges[i];
+      if (!ex.tokenUsage || totalTokens(ex.tokenUsage) === 0) continue;
+      allExchanges.push({
+        prompt: ex.userPrompt || '',
+        totalTokens: totalTokens(ex.tokenUsage),
+        estimatedCost: estimateCost(ex.tokenUsage, pricing),
+        cacheReadTokens: ex.tokenUsage.cacheReadTokens,
+        outputTokens: ex.tokenUsage.outputTokens,
+        sessionId: s.id,
+        project: s.project || 'unknown',
+        exchangeIndex: i,
+      });
+    }
+  }
+  allExchanges.sort((a, b) => b.totalTokens - a.totalTokens);
+  const costliestExchanges = allExchanges.slice(0, 5);
+  // ── Prompt length vs token cost correlation ──
+  // Group exchanges by prompt length buckets and compute avg token cost
+  const buckets = {
+    veryShort: { label: '< 20 chars', prompts: 0, totalTokens: 0, totalCost: 0 },
+    short: { label: '20-100 chars', prompts: 0, totalTokens: 0, totalCost: 0 },
+    medium: { label: '100-500 chars', prompts: 0, totalTokens: 0, totalCost: 0 },
+    long: { label: '500+ chars', prompts: 0, totalTokens: 0, totalCost: 0 },
+  };
+  for (const ex of allExchanges) {
+    const len = ex.prompt.length;
+    let bucket;
+    if (len < 20) bucket = buckets.veryShort;
+    else if (len < 100) bucket = buckets.short;
+    else if (len < 500) bucket = buckets.medium;
+    else bucket = buckets.long;
+    bucket.prompts++;
+    bucket.totalTokens += ex.totalTokens;
+    bucket.totalCost += ex.estimatedCost;
+  }
+  const promptLengthAnalysis = Object.values(buckets)
+    .filter(b => b.prompts > 0)
+    .map(b => ({
+      ...b,
+      avgTokens: Math.round(b.totalTokens / b.prompts),
+      avgCost: b.totalCost / b.prompts,
+    }));
+  // ── Session length vs token efficiency ──
+  // Marathon sessions compound context, so later exchanges cost more
+  const sessionLengthAnalysis = {
+    short: { label: '1-5 exchanges', sessions: 0, avgTokensPerExchange: 0, totalTokens: 0, totalExchanges: 0 },
+    medium: { label: '6-20 exchanges', sessions: 0, avgTokensPerExchange: 0, totalTokens: 0, totalExchanges: 0 },
+    long: { label: '21-50 exchanges', sessions: 0, avgTokensPerExchange: 0, totalTokens: 0, totalExchanges: 0 },
+    marathon: { label: '50+ exchanges', sessions: 0, avgTokensPerExchange: 0, totalTokens: 0, totalExchanges: 0 },
+  };
+  for (const s of sessionsWithTokens) {
+    const ec = s.exchangeCount;
+    const t = totalTokens(s.tokenUsage);
+    let bucket;
+    if (ec <= 5) bucket = sessionLengthAnalysis.short;
+    else if (ec <= 20) bucket = sessionLengthAnalysis.medium;
+    else if (ec <= 50) bucket = sessionLengthAnalysis.long;
+    else bucket = sessionLengthAnalysis.marathon;
+    bucket.sessions++;
+    bucket.totalTokens += t;
+    bucket.totalExchanges += ec;
+  }
+  for (const bucket of Object.values(sessionLengthAnalysis)) {
+    bucket.avgTokensPerExchange = bucket.totalExchanges > 0
+      ? Math.round(bucket.totalTokens / bucket.totalExchanges)
+      : 0;
+  }
+  // ── Top-level stats ──
+  const avgTokensPerSession = sessionsWithTokens.length > 0
+    ? Math.round(grandTotal / sessionsWithTokens.length)
+    : 0;
+  const avgTokensPerExchange = allExchanges.length > 0
+    ? Math.round(grandTotal / allExchanges.length)
+    : 0;
+  return {
+    hasData: true,
+    sessionsAnalyzed: sessionsWithTokens.length,
+    totals,
+    grandTotal,
+    estimatedCostTotal,
+    composition,
+    contextRereadRatio,
+    avgTokensPerSession,
+    avgTokensPerExchange,
+    perProject,
+    costliestSessions,
+    costliestExchanges,
+    promptLengthAnalysis,
+    sessionLengthAnalysis: Object.values(sessionLengthAnalysis).filter(b => b.sessions > 0),
+  };
+}

package/src/parsers/claude-code.js CHANGED Viewed

@@ -81,6 +81,14 @@ function parseSessionFile(filePath) {
     // Skip tool result messages (these are system-injected responses to tool calls)
     if (role === 'user' && hasToolResults(entry.message.content)) continue;
+    // Extract token usage from assistant messages
+    const usage = (role === 'assistant' && entry.message.usage) ? {
+      inputTokens: entry.message.usage.input_tokens || 0,
+      outputTokens: entry.message.usage.output_tokens || 0,
+      cacheReadTokens: entry.message.usage.cache_read_input_tokens || 0,
+      cacheCreationTokens: entry.message.usage.cache_creation_input_tokens || 0,
+    } : null;
     const turn = {
       role,
       text: extractTextContent(entry.message.content),
@@ -90,6 +98,7 @@ function parseSessionFile(filePath) {
       uuid: entry.uuid || null,
       parentUuid: entry.parentUuid || null,
       model: entry.message.model || null,
+      usage,
     };
     // Skip empty assistant messages that are just tool call continuations
@@ -120,11 +129,19 @@ function groupIntoExchanges(turns) {
         assistantResponses: [],
         toolCalls: [],
         thinkingContent: [],
+        tokenUsage: { inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreationTokens: 0 },
       };
     } else if (turn.role === 'assistant' && current) {
       if (turn.text) current.assistantResponses.push(turn.text);
       if (turn.thinking) current.thinkingContent.push(turn.thinking);
       current.toolCalls.push(...turn.toolCalls);
+      // Accumulate token usage across all assistant turns in this exchange
+      if (turn.usage) {
+        current.tokenUsage.inputTokens += turn.usage.inputTokens;
+        current.tokenUsage.outputTokens += turn.usage.outputTokens;
+        current.tokenUsage.cacheReadTokens += turn.usage.cacheReadTokens;
+        current.tokenUsage.cacheCreationTokens += turn.usage.cacheCreationTokens;
+      }
     }
   }
@@ -159,6 +176,15 @@ export function parseProject(projectPath) {
       .map(t => new Date(t).getTime())
       .sort();
+    // Aggregate token usage across all exchanges in this session
+    const sessionTokens = { inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreationTokens: 0 };
+    for (const ex of exchanges) {
+      sessionTokens.inputTokens += ex.tokenUsage.inputTokens;
+      sessionTokens.outputTokens += ex.tokenUsage.outputTokens;
+      sessionTokens.cacheReadTokens += ex.tokenUsage.cacheReadTokens;
+      sessionTokens.cacheCreationTokens += ex.tokenUsage.cacheCreationTokens;
+    }
     sessions.push({
       id: file.replace('.jsonl', ''),
       file,
@@ -170,6 +196,7 @@ export function parseProject(projectPath) {
       durationMinutes: timestamps.length >= 2
         ? Math.round((timestamps[timestamps.length - 1] - timestamps[0]) / 60000)
         : 0,
+      tokenUsage: sessionTokens,
     });
   }

package/src/upload.js CHANGED Viewed

@@ -15,7 +15,7 @@ function truncateExamples(examples, maxLen = 120) {
   }));
 }
-export async function generateProse(metrics, result, sessionStats) {
+export async function generateProse(metrics, result, sessionStats, tokenEfficiency = null) {
   const payload = {
     metrics: {
       decomposition: {
@@ -46,6 +46,15 @@ export async function generateProse(metrics, result, sessionStats) {
       tier: result.tier,
     },
     sessionStats,
+    // Include token analytics summary for richer prose generation
+    tokenEfficiency: tokenEfficiency && tokenEfficiency.hasData ? {
+      grandTotal: tokenEfficiency.grandTotal,
+      estimatedCostTotal: tokenEfficiency.estimatedCostTotal,
+      contextRereadRatio: tokenEfficiency.contextRereadRatio,
+      composition: tokenEfficiency.composition,
+      avgTokensPerExchange: tokenEfficiency.avgTokensPerExchange,
+      sessionsAnalyzed: tokenEfficiency.sessionsAnalyzed,
+    } : null,
   };
   const response = await fetch(`${API_BASE}/public/cli/analyze`, {