chekk 0.4.3 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/chekk.js +1 -1
- package/package.json +2 -2
- package/src/display.js +357 -12
- package/src/index.js +11 -7
- package/src/insights.js +103 -7
- package/src/metrics/ai-leverage.js +28 -0
- package/src/metrics/debug-cycles.js +43 -0
- package/src/metrics/decomposition.js +25 -0
- package/src/metrics/session-structure.js +35 -0
- package/src/metrics/token-efficiency.js +258 -0
- package/src/parsers/claude-code.js +27 -0
- package/src/upload.js +10 -1
package/bin/chekk.js
CHANGED
|
@@ -4,7 +4,7 @@ import { execSync, spawn } from 'child_process';
|
|
|
4
4
|
import { Command } from 'commander';
|
|
5
5
|
import { run } from '../src/index.js';
|
|
6
6
|
|
|
7
|
-
const LOCAL_VERSION = '0.
|
|
7
|
+
const LOCAL_VERSION = '0.5.1';
|
|
8
8
|
|
|
9
9
|
// ── Auto-update check ──
|
|
10
10
|
// If running from a cached npx install, check if there's a newer version
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "chekk",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "See how you prompt. Chekk analyzes your AI coding workflow
|
|
3
|
+
"version": "0.5.1",
|
|
4
|
+
"description": "See how you prompt. Chekk analyzes your AI coding workflow, tells you what kind of engineer you are, and shows what your habits actually cost.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"chekk": "./bin/chekk.js"
|
|
7
7
|
},
|
package/src/display.js
CHANGED
|
@@ -37,6 +37,8 @@ function progressBar(score, width = 18) {
|
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
function numberFormat(n) {
|
|
40
|
+
if (n >= 1_000_000_000) return (n / 1_000_000_000).toFixed(1).replace(/\.0$/, '') + 'B';
|
|
41
|
+
if (n >= 1_000_000) return (n / 1_000_000).toFixed(1).replace(/\.0$/, '') + 'M';
|
|
40
42
|
if (n >= 1000) return (n / 1000).toFixed(1).replace(/\.0$/, '') + 'k';
|
|
41
43
|
return String(n);
|
|
42
44
|
}
|
|
@@ -171,7 +173,7 @@ export function displayHeader() {
|
|
|
171
173
|
console.log();
|
|
172
174
|
const lines = [
|
|
173
175
|
'',
|
|
174
|
-
` ${bold.white('chekk')}${dim(' v0.
|
|
176
|
+
` ${bold.white('chekk')}${dim(' v0.5.1')}`,
|
|
175
177
|
` ${dim('prompt engineering capability profile')}`,
|
|
176
178
|
'',
|
|
177
179
|
];
|
|
@@ -229,7 +231,7 @@ function displayProfileHeader(result, extra = {}) {
|
|
|
229
231
|
console.log(` ${bold.white('PROMPT ENGINEERING CAPABILITY PROFILE')}`);
|
|
230
232
|
console.log();
|
|
231
233
|
if (sessionStats) {
|
|
232
|
-
console.log(` ${dim(`Generated ${dateStr} | chekk v0.
|
|
234
|
+
console.log(` ${dim(`Generated ${dateStr} | chekk v0.5.1`)}`);
|
|
233
235
|
console.log(` ${dim(`Analysis: ${sessionStats.totalSessions} sessions \u00B7 ${sessionStats.tools.length} tool${sessionStats.tools.length > 1 ? 's' : ''} \u00B7 ${numberFormat(sessionStats.totalExchanges)} exchanges`)}`);
|
|
234
236
|
if (sessionStats.dateRangeShort) {
|
|
235
237
|
console.log(` ${dim(`Period: ${sessionStats.dateRangeShort}`)}`);
|
|
@@ -353,6 +355,319 @@ function displayDimensions(result) {
|
|
|
353
355
|
console.log();
|
|
354
356
|
}
|
|
355
357
|
|
|
358
|
+
// ══════════════════════════════════════════════
|
|
359
|
+
// TOKEN EFFICIENCY — Spend overview panel
|
|
360
|
+
// ══════════════════════════════════════════════
|
|
361
|
+
|
|
362
|
+
export function displayTokenEfficiency(tokenEfficiency, metrics) {
|
|
363
|
+
if (!tokenEfficiency || !tokenEfficiency.hasData) return;
|
|
364
|
+
|
|
365
|
+
const te = tokenEfficiency;
|
|
366
|
+
console.log(dim(' TOKEN EFFICIENCY'));
|
|
367
|
+
console.log();
|
|
368
|
+
|
|
369
|
+
// ── Overview stats ──
|
|
370
|
+
const overviewLines = [
|
|
371
|
+
'',
|
|
372
|
+
` ${dim('Total tokens')} ${bold(numberFormat(te.grandTotal))}`,
|
|
373
|
+
` ${dim('Est. cost')} ${bold('$' + te.estimatedCostTotal.toFixed(2))}`,
|
|
374
|
+
` ${dim('Sessions')} ${dim(String(te.sessionsAnalyzed))}`,
|
|
375
|
+
` ${dim('Avg/exchange')} ${dim(numberFormat(te.avgTokensPerExchange) + ' tokens')}`,
|
|
376
|
+
'',
|
|
377
|
+
];
|
|
378
|
+
|
|
379
|
+
// Token composition bar — ensure every non-zero category gets at least 1 block
|
|
380
|
+
const barWidth = 40;
|
|
381
|
+
const categories = [
|
|
382
|
+
{ pct: te.composition.cacheReadPct, color: orange, label: 'context re-read' },
|
|
383
|
+
{ pct: te.composition.cacheCreationPct, color: yellow, label: 'cache create' },
|
|
384
|
+
{ pct: te.composition.inputPct, color: blue, label: 'new input' },
|
|
385
|
+
{ pct: te.composition.outputPct, color: green, label: 'output (code)' },
|
|
386
|
+
];
|
|
387
|
+
|
|
388
|
+
// Allocate bar widths: give at least 1 block to any non-zero category
|
|
389
|
+
let remaining = barWidth;
|
|
390
|
+
const widths = categories.map(c => {
|
|
391
|
+
if (c.pct > 0 && c.pct < (100 / barWidth)) { remaining--; return 1; }
|
|
392
|
+
return 0;
|
|
393
|
+
});
|
|
394
|
+
for (let i = 0; i < categories.length; i++) {
|
|
395
|
+
if (widths[i] === 0 && categories[i].pct > 0) {
|
|
396
|
+
widths[i] = Math.max(1, Math.round(categories[i].pct / 100 * barWidth));
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
// Adjust largest to fill remaining
|
|
400
|
+
const total = widths.reduce((a, b) => a + b, 0);
|
|
401
|
+
if (total !== barWidth) {
|
|
402
|
+
const largest = widths.indexOf(Math.max(...widths));
|
|
403
|
+
widths[largest] += barWidth - total;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
let barStr = '';
|
|
407
|
+
for (let i = 0; i < categories.length; i++) {
|
|
408
|
+
barStr += categories[i].color('\u2588'.repeat(Math.max(0, widths[i])));
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
overviewLines.push(` ${barStr}`);
|
|
412
|
+
|
|
413
|
+
// Format percentages with appropriate precision
|
|
414
|
+
function fmtPct(pct) {
|
|
415
|
+
if (pct >= 99.5) return pct.toFixed(1) + '%';
|
|
416
|
+
if (pct >= 10) return Math.round(pct) + '%';
|
|
417
|
+
if (pct >= 1) return pct.toFixed(1) + '%';
|
|
418
|
+
if (pct > 0) return pct.toFixed(2) + '%';
|
|
419
|
+
return '0%';
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
overviewLines.push(` ${orange('\u2588')} ${dim('context re-read ' + fmtPct(te.composition.cacheReadPct))} ` +
|
|
423
|
+
`${yellow('\u2588')} ${dim('cache create ' + fmtPct(te.composition.cacheCreationPct))}`);
|
|
424
|
+
overviewLines.push(` ${blue('\u2588')} ${dim('new input ' + fmtPct(te.composition.inputPct))} ` +
|
|
425
|
+
`${green('\u2588')} ${dim('output ' + fmtPct(te.composition.outputPct))}`);
|
|
426
|
+
overviewLines.push('');
|
|
427
|
+
|
|
428
|
+
// The key insight — use composition percentages for accuracy
|
|
429
|
+
const outputPct = te.composition.outputPct;
|
|
430
|
+
const nonOutputPct = 100 - outputPct;
|
|
431
|
+
if (outputPct < 50) {
|
|
432
|
+
overviewLines.push(` ${dim('Only')} ${bold(fmtPct(outputPct))} ${dim('of tokens are Claude writing code.')}`);
|
|
433
|
+
overviewLines.push(` ${dim('The other')} ${bold(fmtPct(nonOutputPct))} ${dim('is context re-reading.')}`);
|
|
434
|
+
overviewLines.push('');
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
for (const l of box(overviewLines, 53)) console.log(l);
|
|
438
|
+
console.log();
|
|
439
|
+
|
|
440
|
+
// ── Per-project breakdown ──
|
|
441
|
+
if (te.perProject.length > 1) {
|
|
442
|
+
console.log(` ${dim('SPEND BY PROJECT')}`);
|
|
443
|
+
console.log(` ${dim('\u2500'.repeat(53))}`);
|
|
444
|
+
for (const p of te.perProject.slice(0, 5)) {
|
|
445
|
+
const pctOfTotal = te.grandTotal > 0 ? Math.round(p.totalTokens / te.grandTotal * 100) : 0;
|
|
446
|
+
const costStr = '$' + p.estimatedCost.toFixed(2);
|
|
447
|
+
const shortName = p.name.length > 24 ? '...' + p.name.slice(-21) : p.name;
|
|
448
|
+
console.log(
|
|
449
|
+
` ${pad(white(shortName), 26)} ${pad(dim(numberFormat(p.totalTokens) + ' tokens'), 16)} ` +
|
|
450
|
+
`${pad(dim(costStr), 8)} ${dim(pctOfTotal + '%')}`
|
|
451
|
+
);
|
|
452
|
+
}
|
|
453
|
+
console.log();
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// ── Costliest sessions ──
|
|
457
|
+
if (te.costliestSessions.length > 0) {
|
|
458
|
+
console.log(` ${dim('COSTLIEST SESSIONS')}`);
|
|
459
|
+
console.log(` ${dim('\u2500'.repeat(53))}`);
|
|
460
|
+
for (const s of te.costliestSessions.slice(0, 3)) {
|
|
461
|
+
const costStr = '$' + s.estimatedCost.toFixed(2);
|
|
462
|
+
const truncPrompt = s.firstPrompt.length > 40 ? s.firstPrompt.slice(0, 37) + '...' : s.firstPrompt;
|
|
463
|
+
console.log(
|
|
464
|
+
` ${dim(numberFormat(s.totalTokens) + ' tokens')} ${dim(costStr)} ${dim(s.exchanges + ' exchanges')}`
|
|
465
|
+
);
|
|
466
|
+
if (truncPrompt) {
|
|
467
|
+
console.log(` ${dim('\u21B3')} ${dim.italic('\u201C' + truncPrompt + '\u201D')}`);
|
|
468
|
+
}
|
|
469
|
+
console.log();
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// ── Token cost evidence from metrics ──
|
|
474
|
+
displayTokenEvidence(metrics, tokenEfficiency);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// ── Per-dimension cost annotation ──
|
|
478
|
+
// Shows a single cost insight line under each scored dimension
|
|
479
|
+
|
|
480
|
+
function dollarFormat(amount) {
|
|
481
|
+
if (amount >= 1000) return '$' + (amount / 1000).toFixed(1).replace(/\.0$/, '') + 'k';
|
|
482
|
+
if (amount >= 100) return '$' + Math.round(amount);
|
|
483
|
+
if (amount >= 1) return '$' + amount.toFixed(2);
|
|
484
|
+
return '$' + amount.toFixed(2);
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
function displayDimensionCostLine(dimensionName, metrics, tokenEfficiency) {
|
|
488
|
+
if (!tokenEfficiency || !tokenEfficiency.hasData) return;
|
|
489
|
+
|
|
490
|
+
const pricing = tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal);
|
|
491
|
+
|
|
492
|
+
if (dimensionName === 'thinking') {
|
|
493
|
+
const de = metrics.decomposition.details.tokenEvidence;
|
|
494
|
+
if (de && de.avgTokensPerExchangeSingleShot && de.avgTokensPerExchangeMultiStep) {
|
|
495
|
+
const ssCost = de.avgTokensPerExchangeSingleShot * pricing;
|
|
496
|
+
const msCost = de.avgTokensPerExchangeMultiStep * pricing;
|
|
497
|
+
if (ssCost > msCost * 1.1) {
|
|
498
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('single-shot')} ${orange(dollarFormat(ssCost) + '/exchange')} ${dim('vs multi-step')} ${green(dollarFormat(msCost) + '/exchange')}`);
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
if (dimensionName === 'debugging') {
|
|
504
|
+
const dbe = metrics.debugCycles.details.tokenEvidence;
|
|
505
|
+
if (dbe && dbe.avgTokensQuickFix && dbe.avgTokensLongLoop) {
|
|
506
|
+
const qfCost = dbe.avgTokensQuickFix * pricing;
|
|
507
|
+
const llCost = dbe.avgTokensLongLoop * pricing;
|
|
508
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('quick fix')} ${green(dollarFormat(qfCost))} ${dim('vs debug spiral')} ${orange(dollarFormat(llCost))}`);
|
|
509
|
+
} else if (dbe && dbe.avgTokensSpecificDebug) {
|
|
510
|
+
const spCost = dbe.avgTokensSpecificDebug * pricing;
|
|
511
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('avg debug cycle')} ${dim(dollarFormat(spCost))}`);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
if (dimensionName === 'ai leverage') {
|
|
516
|
+
const aie = metrics.aiLeverage.details.tokenEvidence;
|
|
517
|
+
if (aie && aie.avgTokensArchitectural && aie.avgTokensBoilerplate) {
|
|
518
|
+
const archCost = aie.avgTokensArchitectural * pricing;
|
|
519
|
+
const boilCost = aie.avgTokensBoilerplate * pricing;
|
|
520
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('architectural prompt')} ${dim(dollarFormat(archCost))} ${dim('vs boilerplate')} ${dim(dollarFormat(boilCost))}`);
|
|
521
|
+
} else if (aie && aie.avgTokensTrivialPrompt && aie.avgTokensComplexPrompt) {
|
|
522
|
+
const trivCost = aie.avgTokensTrivialPrompt * pricing;
|
|
523
|
+
const compCost = aie.avgTokensComplexPrompt * pricing;
|
|
524
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('trivial prompt')} ${dim(dollarFormat(trivCost))} ${dim('vs detailed')} ${dim(dollarFormat(compCost))}`);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
if (dimensionName === 'workflow') {
|
|
529
|
+
const sse = metrics.sessionStructure.details.tokenEvidence;
|
|
530
|
+
if (sse && sse.avgTokensPerExchangeMarathon && sse.avgTokensPerExchangeFocused) {
|
|
531
|
+
const marCost = sse.avgTokensPerExchangeMarathon * pricing;
|
|
532
|
+
const focCost = sse.avgTokensPerExchangeFocused * pricing;
|
|
533
|
+
if (marCost > focCost * 1.1) {
|
|
534
|
+
console.log(` ${dim('\u21B3 Cost:')} ${dim('marathon')} ${orange(dollarFormat(marCost) + '/exchange')} ${dim('vs focused')} ${green(dollarFormat(focCost) + '/exchange')}`);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
function displayTokenEvidence(metrics, tokenEfficiency) {
|
|
541
|
+
const evidenceLines = [];
|
|
542
|
+
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
543
|
+
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
544
|
+
|
|
545
|
+
// Decomposition: single-shot vs multi-step cost
|
|
546
|
+
const de = metrics.decomposition.details.tokenEvidence;
|
|
547
|
+
if (de && de.avgTokensPerExchangeSingleShot && de.avgTokensPerExchangeMultiStep) {
|
|
548
|
+
const ratio = (de.avgTokensPerExchangeSingleShot / de.avgTokensPerExchangeMultiStep).toFixed(1);
|
|
549
|
+
if (parseFloat(ratio) > 1.2) {
|
|
550
|
+
if (pricing > 0) {
|
|
551
|
+
const ssCost = dollarFormat(de.avgTokensPerExchangeSingleShot * pricing);
|
|
552
|
+
const msCost = dollarFormat(de.avgTokensPerExchangeMultiStep * pricing);
|
|
553
|
+
evidenceLines.push(
|
|
554
|
+
` ${dim('\u2022 Single-shot prompts cost')} ${orange(ssCost + '/exchange')} ${dim('vs')} ${green(msCost)} ${dim('in multi-step sessions')} ${dim('(' + ratio + 'x)')}`
|
|
555
|
+
);
|
|
556
|
+
} else {
|
|
557
|
+
evidenceLines.push(
|
|
558
|
+
` ${dim('\u2022 Single-shot prompts cost')} ${orange(ratio + 'x')} ${dim('more tokens per exchange than multi-step sessions')}`
|
|
559
|
+
);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
// Debug cycles: vague vs specific cost
|
|
565
|
+
const dbe = metrics.debugCycles.details.tokenEvidence;
|
|
566
|
+
if (dbe && dbe.avgTokensVagueDebug && dbe.avgTokensSpecificDebug) {
|
|
567
|
+
const ratio = (dbe.avgTokensVagueDebug / dbe.avgTokensSpecificDebug).toFixed(1);
|
|
568
|
+
if (parseFloat(ratio) > 1.2) {
|
|
569
|
+
if (pricing > 0) {
|
|
570
|
+
const vCost = dollarFormat(dbe.avgTokensVagueDebug * pricing);
|
|
571
|
+
const sCost = dollarFormat(dbe.avgTokensSpecificDebug * pricing);
|
|
572
|
+
evidenceLines.push(
|
|
573
|
+
` ${dim('\u2022 Vague debug prompts cost')} ${orange(vCost)} ${dim('vs')} ${green(sCost)} ${dim('for specific reports')} ${dim('(' + ratio + 'x)')}`
|
|
574
|
+
);
|
|
575
|
+
} else {
|
|
576
|
+
evidenceLines.push(
|
|
577
|
+
` ${dim('\u2022 Vague debug prompts cost')} ${orange(ratio + 'x')} ${dim('more than specific error reports')}`
|
|
578
|
+
);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
// AI Leverage: trivial prompts vs detailed ones
|
|
584
|
+
const aie = metrics.aiLeverage.details.tokenEvidence;
|
|
585
|
+
if (aie && aie.avgTokensTrivialPrompt && aie.avgTokensComplexPrompt) {
|
|
586
|
+
const savingsPct = Math.round((1 - aie.avgTokensTrivialPrompt / aie.avgTokensComplexPrompt) * 100);
|
|
587
|
+
if (pricing > 0) {
|
|
588
|
+
const trivCost = dollarFormat(aie.avgTokensTrivialPrompt * pricing);
|
|
589
|
+
const compCost = dollarFormat(aie.avgTokensComplexPrompt * pricing);
|
|
590
|
+
evidenceLines.push(
|
|
591
|
+
` ${dim('\u2022 Short vague prompts (<50 chars) cost')} ${dim(trivCost)} ${dim('— only ' + savingsPct + '% less than detailed ones at')} ${dim(compCost)}`
|
|
592
|
+
);
|
|
593
|
+
} else if (savingsPct < 40) {
|
|
594
|
+
evidenceLines.push(
|
|
595
|
+
` ${dim('\u2022 Short vague prompts (<50 chars) cost')} ${dim(numberFormat(aie.avgTokensTrivialPrompt) + ' tokens')} ${dim('— only ' + savingsPct + '% less than detailed ones')}`
|
|
596
|
+
);
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// AI Leverage: architectural vs boilerplate cost
|
|
601
|
+
if (aie && aie.avgTokensArchitectural && aie.avgTokensBoilerplate && pricing > 0) {
|
|
602
|
+
const archCost = dollarFormat(aie.avgTokensArchitectural * pricing);
|
|
603
|
+
const boilCost = dollarFormat(aie.avgTokensBoilerplate * pricing);
|
|
604
|
+
evidenceLines.push(
|
|
605
|
+
` ${dim('\u2022 Architectural prompts cost')} ${dim(archCost + '/exchange')} ${dim('vs boilerplate at')} ${dim(boilCost + '/exchange')}`
|
|
606
|
+
);
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// Session structure: marathon vs focused cost
|
|
610
|
+
const sse = metrics.sessionStructure.details.tokenEvidence;
|
|
611
|
+
if (sse && sse.avgTokensPerExchangeMarathon && sse.avgTokensPerExchangeFocused) {
|
|
612
|
+
const ratio = (sse.avgTokensPerExchangeMarathon / sse.avgTokensPerExchangeFocused).toFixed(1);
|
|
613
|
+
if (parseFloat(ratio) > 1.1) {
|
|
614
|
+
if (pricing > 0) {
|
|
615
|
+
const marCost = dollarFormat(sse.avgTokensPerExchangeMarathon * pricing);
|
|
616
|
+
const focCost = dollarFormat(sse.avgTokensPerExchangeFocused * pricing);
|
|
617
|
+
evidenceLines.push(
|
|
618
|
+
` ${dim('\u2022 Marathon sessions (>60m) cost')} ${orange(marCost + '/exchange')} ${dim('vs focused at')} ${green(focCost)} ${dim('(' + ratio + 'x)')}`
|
|
619
|
+
);
|
|
620
|
+
} else {
|
|
621
|
+
evidenceLines.push(
|
|
622
|
+
` ${dim('\u2022 Marathon sessions (>60m) cost')} ${orange(ratio + 'x')} ${dim('more per exchange than focused ones (10-45m)')}`
|
|
623
|
+
);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
// Context-setting vs no context
|
|
629
|
+
if (sse && sse.avgTokensPerExchangeNoContext && sse.avgTokensPerExchangeWithContext) {
|
|
630
|
+
const ratio = (sse.avgTokensPerExchangeNoContext / sse.avgTokensPerExchangeWithContext).toFixed(1);
|
|
631
|
+
if (parseFloat(ratio) > 1.1) {
|
|
632
|
+
if (pricing > 0) {
|
|
633
|
+
const noCost = dollarFormat(sse.avgTokensPerExchangeNoContext * pricing);
|
|
634
|
+
const wCost = dollarFormat(sse.avgTokensPerExchangeWithContext * pricing);
|
|
635
|
+
evidenceLines.push(
|
|
636
|
+
` ${dim('\u2022 Sessions without upfront context cost')} ${orange(noCost + '/exchange')} ${dim('vs')} ${green(wCost)} ${dim('with context')} ${dim('(' + ratio + 'x)')}`
|
|
637
|
+
);
|
|
638
|
+
} else {
|
|
639
|
+
evidenceLines.push(
|
|
640
|
+
` ${dim('\u2022 Sessions without upfront context cost')} ${orange(ratio + 'x')} ${dim('more per exchange')}`
|
|
641
|
+
);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Session length cost curve from token efficiency
|
|
647
|
+
if (tokenEfficiency && tokenEfficiency.hasData && tokenEfficiency.sessionLengthAnalysis) {
|
|
648
|
+
const sla = tokenEfficiency.sessionLengthAnalysis;
|
|
649
|
+
if (sla.length >= 2 && pricing > 0) {
|
|
650
|
+
const sorted = [...sla].sort((a, b) => a.avgTokensPerExchange - b.avgTokensPerExchange);
|
|
651
|
+
const cheapest = sorted[0];
|
|
652
|
+
const costliest = sorted[sorted.length - 1];
|
|
653
|
+
if (costliest.avgTokensPerExchange > cheapest.avgTokensPerExchange * 1.5) {
|
|
654
|
+
evidenceLines.push(
|
|
655
|
+
` ${dim('\u2022 Cost curve:')} ${dim(cheapest.label)} ${green(dollarFormat(cheapest.avgTokensPerExchange * pricing) + '/exchange')} ${dim('\u2192')} ${dim(costliest.label)} ${orange(dollarFormat(costliest.avgTokensPerExchange * pricing) + '/exchange')}`
|
|
656
|
+
);
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
if (evidenceLines.length > 0) {
|
|
662
|
+
console.log(` ${dim('WHAT YOUR HABITS ACTUALLY COST')}`);
|
|
663
|
+
console.log(` ${dim('\u2500'.repeat(53))}`);
|
|
664
|
+
for (const line of evidenceLines) {
|
|
665
|
+
console.log(line);
|
|
666
|
+
}
|
|
667
|
+
console.log();
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
356
671
|
// ══════════════════════════════════════════════
|
|
357
672
|
// CROSS-PLATFORM
|
|
358
673
|
// ══════════════════════════════════════════════
|
|
@@ -384,10 +699,18 @@ function displayCrossPlatform(perToolScores) {
|
|
|
384
699
|
// PROJECT ANALYSIS
|
|
385
700
|
// ══════════════════════════════════════════════
|
|
386
701
|
|
|
387
|
-
function displayProjects(insights) {
|
|
702
|
+
function displayProjects(insights, tokenEfficiency = null) {
|
|
388
703
|
const projects = insights?.projectComplexity;
|
|
389
704
|
if (!projects || projects.length === 0) return;
|
|
390
705
|
|
|
706
|
+
// Build a lookup from token efficiency per-project data
|
|
707
|
+
const projectCostMap = {};
|
|
708
|
+
if (tokenEfficiency && tokenEfficiency.hasData && tokenEfficiency.perProject) {
|
|
709
|
+
for (const pp of tokenEfficiency.perProject) {
|
|
710
|
+
projectCostMap[pp.fullName] = pp;
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
|
|
391
714
|
console.log(dim(' PROJECT ANALYSIS\n'));
|
|
392
715
|
for (const p of projects) {
|
|
393
716
|
const compColor = p.complexity === 'HIGH' ? green : p.complexity === 'MEDIUM' ? yellow : dim;
|
|
@@ -396,7 +719,17 @@ function displayProjects(insights) {
|
|
|
396
719
|
if (signals) {
|
|
397
720
|
console.log(` ${dim(signals)}`);
|
|
398
721
|
}
|
|
399
|
-
|
|
722
|
+
// Look up cost data — match by full project name or short name
|
|
723
|
+
const costData = Object.values(projectCostMap).find(pp =>
|
|
724
|
+
p.name === pp.name || pp.fullName?.endsWith(p.name.replace('...', ''))
|
|
725
|
+
);
|
|
726
|
+
if (costData && costData.exchanges > 0) {
|
|
727
|
+
const costPerExchange = costData.estimatedCost / costData.exchanges;
|
|
728
|
+
const totalCost = costData.estimatedCost;
|
|
729
|
+
console.log(` ${dim(`${p.sessions} sessions \u00B7 ${numberFormat(p.exchanges)} exchanges \u00B7 ${p.daysActive} days active`)} ${dim('|')} ${dim('$' + totalCost.toFixed(0) + ' total \u00B7 $' + costPerExchange.toFixed(2) + '/exchange')}`);
|
|
730
|
+
} else {
|
|
731
|
+
console.log(` ${dim(`${p.sessions} sessions \u00B7 ${numberFormat(p.exchanges)} exchanges \u00B7 ${p.daysActive} days active`)}`);
|
|
732
|
+
}
|
|
400
733
|
console.log();
|
|
401
734
|
}
|
|
402
735
|
}
|
|
@@ -405,7 +738,7 @@ function displayProjects(insights) {
|
|
|
405
738
|
// DIMENSION NARRATIVES — Clinical, benchmarked
|
|
406
739
|
// ══════════════════════════════════════════════
|
|
407
740
|
|
|
408
|
-
export function displayNarratives(metrics, prose) {
|
|
741
|
+
export function displayNarratives(metrics, prose, tokenEfficiency = null) {
|
|
409
742
|
const shownSnippets = new Set();
|
|
410
743
|
function showLabeledSnippet(label, prompt) {
|
|
411
744
|
if (!prompt) return;
|
|
@@ -429,17 +762,19 @@ export function displayNarratives(metrics, prose) {
|
|
|
429
762
|
for (const line of lines) {
|
|
430
763
|
console.log(` ${dim(line.trim())}`);
|
|
431
764
|
}
|
|
765
|
+
// Add token cost line for this dimension
|
|
432
766
|
const titleLower = section.title.toLowerCase();
|
|
767
|
+
displayDimensionCostLine(titleLower, metrics, tokenEfficiency);
|
|
433
768
|
const matched = sectionSnippetMap[titleLower];
|
|
434
769
|
if (matched && matched.prompt) showLabeledSnippet(matched.label, matched.prompt);
|
|
435
770
|
console.log();
|
|
436
771
|
}
|
|
437
772
|
} else {
|
|
438
|
-
displayDataNarratives(metrics, shownSnippets);
|
|
773
|
+
displayDataNarratives(metrics, shownSnippets, tokenEfficiency);
|
|
439
774
|
}
|
|
440
775
|
}
|
|
441
776
|
|
|
442
|
-
function displayDataNarratives(metrics, shownSnippets) {
|
|
777
|
+
function displayDataNarratives(metrics, shownSnippets, tokenEfficiency = null) {
|
|
443
778
|
const d = metrics.decomposition.details;
|
|
444
779
|
const db = metrics.debugCycles.details;
|
|
445
780
|
const ai = metrics.aiLeverage.details;
|
|
@@ -458,6 +793,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
458
793
|
console.log(` ${dim(`Avg session depth: ${d.avgExchangesPerSession} exchanges (benchmark: ${BENCHMARKS.avgExchangesPerSession})`)}`);
|
|
459
794
|
console.log(` ${dim(`Avg prompt length: ${numberFormat(d.avgPromptLength)} chars (benchmark: ${BENCHMARKS.avgPromptLength})`)}`);
|
|
460
795
|
console.log(` ${dim(`Multi-step ratio: ${d.multiStepSessions}/${d.totalSessions} sessions (${Math.round(d.multiStepSessions / Math.max(1, d.totalSessions) * 100)}%)`)}`);
|
|
796
|
+
displayDimensionCostLine('thinking', metrics, tokenEfficiency);
|
|
461
797
|
showSnippet('Evidence', pickExample(metrics.decomposition.examples, 'decomposition'));
|
|
462
798
|
console.log();
|
|
463
799
|
|
|
@@ -466,6 +802,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
466
802
|
console.log(` ${dim(`Avg resolution: ${db.avgTurnsToResolve} turns (benchmark: ${BENCHMARKS.avgTurnsToResolve})`)}`);
|
|
467
803
|
console.log(` ${dim(`Specific reports: ${db.specificReportRatio}% (benchmark: ${BENCHMARKS.specificReportRatio}%)`)}`);
|
|
468
804
|
console.log(` ${dim(`Extended loops: ${db.longLoops} | Quick fixes: ${db.quickFixes}/${db.totalDebugSequences}`)}`);
|
|
805
|
+
displayDimensionCostLine('debugging', metrics, tokenEfficiency);
|
|
469
806
|
showSnippet('Evidence', pickExample(metrics.debugCycles.examples, 'specific_report') || pickExample(metrics.debugCycles.examples, 'quick_fix'));
|
|
470
807
|
console.log();
|
|
471
808
|
|
|
@@ -474,6 +811,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
474
811
|
console.log(` ${dim(`High-level ratio: ${ai.highLevelRatio}% (benchmark: ${BENCHMARKS.highLevelRatio}%)`)}`);
|
|
475
812
|
console.log(` ${dim(`Breakdown: ${ai.architecturalPrompts} architectural \u00B7 ${ai.planningPrompts} planning \u00B7 ${ai.exploratoryPrompts} exploratory`)}`);
|
|
476
813
|
console.log(` ${dim(`Boilerplate: ${ai.boilerplatePrompts} (${Math.round(ai.boilerplatePrompts / Math.max(1, ai.totalPrompts) * 100)}%) \u00B7 Testing: ${ai.testingPrompts}`)}`);
|
|
814
|
+
displayDimensionCostLine('ai leverage', metrics, tokenEfficiency);
|
|
477
815
|
showSnippet('Evidence', pickExample(metrics.aiLeverage.examples, 'architectural') || pickExample(metrics.aiLeverage.examples, 'planning'));
|
|
478
816
|
console.log();
|
|
479
817
|
|
|
@@ -482,6 +820,7 @@ function displayDataNarratives(metrics, shownSnippets) {
|
|
|
482
820
|
console.log(` ${dim(`Context-setting: ${ss.contextSetRatio}% (benchmark: ${BENCHMARKS.contextSetRatio}%)`)}`);
|
|
483
821
|
console.log(` ${dim(`Review at end: ${ss.reviewEndRatio}% (benchmark: ${BENCHMARKS.reviewEndRatio}%)`)}`);
|
|
484
822
|
console.log(` ${dim(`Refinement rate: ${ss.refinementRatio}% (benchmark: ${BENCHMARKS.refinementRatio}%)`)}`);
|
|
823
|
+
displayDimensionCostLine('workflow', metrics, tokenEfficiency);
|
|
485
824
|
showSnippet('Evidence', pickExample(metrics.sessionStructure.examples, 'context_setting') || pickExample(metrics.sessionStructure.examples, 'refinement'));
|
|
486
825
|
console.log();
|
|
487
826
|
}
|
|
@@ -650,7 +989,7 @@ function displayNextSteps(result) {
|
|
|
650
989
|
// VERBOSE: DETAILED BREAKDOWN
|
|
651
990
|
// ══════════════════════════════════════════════
|
|
652
991
|
|
|
653
|
-
export function displayVerbose(metrics, sessions) {
|
|
992
|
+
export function displayVerbose(metrics, sessions, tokenEfficiency = null) {
|
|
654
993
|
console.log(doubleRule());
|
|
655
994
|
console.log(dim('\n DETAILED BREAKDOWN\n'));
|
|
656
995
|
|
|
@@ -706,6 +1045,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
706
1045
|
console.log(` ${dim(pad('Multi-step sessions', 28))} ${white(d.multiStepSessions + '/' + d.totalSessions)} ${dim('(' + Math.round(d.multiStepSessions / Math.max(1, d.totalSessions) * 100) + '%)')}`);
|
|
707
1046
|
console.log(` ${dim(pad('Single-shot sessions', 28))} ${white(String(d.singleShotSessions))}`);
|
|
708
1047
|
console.log(` ${dim(pad('Contextual followups', 28))} ${white(d.contextualFollowupRatio + '%')}${d.contextualFollowupRatio > 20 ? cyan(' builds on context well') : orange(' could reference prior work more')}`);
|
|
1048
|
+
displayDimensionCostLine('thinking', metrics, tokenEfficiency);
|
|
709
1049
|
console.log();
|
|
710
1050
|
|
|
711
1051
|
// ── Debugging ──
|
|
@@ -718,6 +1058,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
718
1058
|
console.log(` ${dim(pad('Extended loops (>5 turns)', 28))} ${db.longLoops === 0 ? green('0 -- zero spirals') : orange(String(db.longLoops))}`);
|
|
719
1059
|
metricRow('Specific report ratio', db.specificReportRatio, BENCHMARKS.specificReportRatio, '%');
|
|
720
1060
|
console.log(` ${dim(pad('Vague reports', 28))} ${db.vagueReports === 0 ? green('0 -- never vague') : orange(String(db.vagueReports))}`);
|
|
1061
|
+
displayDimensionCostLine('debugging', metrics, tokenEfficiency);
|
|
721
1062
|
console.log();
|
|
722
1063
|
|
|
723
1064
|
// ── AI Leverage ──
|
|
@@ -735,6 +1076,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
735
1076
|
console.log(` ${dim(pad(' Testing', 28))} ${white(String(ai.testingPrompts))} ${dim('(' + Math.round(ai.testingPrompts / total * 100) + '%)')}`);
|
|
736
1077
|
console.log();
|
|
737
1078
|
metricRow('High-level ratio', ai.highLevelRatio, BENCHMARKS.highLevelRatio, '%');
|
|
1079
|
+
displayDimensionCostLine('ai leverage', metrics, tokenEfficiency);
|
|
738
1080
|
console.log();
|
|
739
1081
|
|
|
740
1082
|
// ── Session Structure ──
|
|
@@ -755,6 +1097,7 @@ export function displayVerbose(metrics, sessions) {
|
|
|
755
1097
|
console.log(` ${dim(pad(' Medium (5-60m)', 28))} ${white(String(dur.medium))}`);
|
|
756
1098
|
console.log(` ${dim(pad(' Long (>60m)', 28))} ${white(String(dur.long))}`);
|
|
757
1099
|
}
|
|
1100
|
+
displayDimensionCostLine('workflow', metrics, tokenEfficiency);
|
|
758
1101
|
console.log();
|
|
759
1102
|
}
|
|
760
1103
|
|
|
@@ -767,9 +1110,10 @@ export function displayOffline(result, metrics, extra = {}) {
|
|
|
767
1110
|
displaySummary(result, extra);
|
|
768
1111
|
displayArchetype(result);
|
|
769
1112
|
displayDimensions(result);
|
|
1113
|
+
displayTokenEfficiency(extra.tokenEfficiency, metrics);
|
|
770
1114
|
displayCrossPlatform(extra.perToolScores);
|
|
771
|
-
displayDataNarratives(metrics, new Set());
|
|
772
|
-
displayProjects(extra.insights);
|
|
1115
|
+
displayDataNarratives(metrics, new Set(), extra.tokenEfficiency);
|
|
1116
|
+
displayProjects(extra.insights, extra.tokenEfficiency);
|
|
773
1117
|
displaySignatures(extra.insights);
|
|
774
1118
|
displayWatchPoints(extra.insights);
|
|
775
1119
|
displayTrajectory(extra.insights);
|
|
@@ -789,9 +1133,10 @@ export function displayFull(result, metrics, prose, extra = {}) {
|
|
|
789
1133
|
displaySummary(result, extra);
|
|
790
1134
|
displayArchetype(result);
|
|
791
1135
|
displayDimensions(result);
|
|
1136
|
+
displayTokenEfficiency(extra.tokenEfficiency, metrics);
|
|
792
1137
|
displayCrossPlatform(extra.perToolScores);
|
|
793
|
-
displayNarratives(metrics, prose);
|
|
794
|
-
displayProjects(extra.insights);
|
|
1138
|
+
displayNarratives(metrics, prose, extra.tokenEfficiency);
|
|
1139
|
+
displayProjects(extra.insights, extra.tokenEfficiency);
|
|
795
1140
|
displaySignatures(extra.insights);
|
|
796
1141
|
displayWatchPoints(extra.insights);
|
|
797
1142
|
displayTrajectory(extra.insights);
|
package/src/index.js
CHANGED
|
@@ -11,6 +11,7 @@ import { computeDebugCycles } from './metrics/debug-cycles.js';
|
|
|
11
11
|
import { computeAILeverage } from './metrics/ai-leverage.js';
|
|
12
12
|
import { computeSessionStructure } from './metrics/session-structure.js';
|
|
13
13
|
import { computeOverallScore } from './scorer.js';
|
|
14
|
+
import { computeTokenEfficiency } from './metrics/token-efficiency.js';
|
|
14
15
|
import {
|
|
15
16
|
computeSignatures,
|
|
16
17
|
computeWatchPoints,
|
|
@@ -142,6 +143,9 @@ export async function run(options = {}) {
|
|
|
142
143
|
|
|
143
144
|
const result = computeOverallScore(metrics);
|
|
144
145
|
|
|
146
|
+
// ── Step 3a: Compute token efficiency analytics ──
|
|
147
|
+
const tokenEfficiency = computeTokenEfficiency(allSessions);
|
|
148
|
+
|
|
145
149
|
// ── Cross-platform scores ──
|
|
146
150
|
const perToolScores = tools.length > 1 ? computePerToolScores(allSessions) : null;
|
|
147
151
|
|
|
@@ -172,8 +176,8 @@ export async function run(options = {}) {
|
|
|
172
176
|
};
|
|
173
177
|
|
|
174
178
|
// ── Step 3b: Compute insights ──
|
|
175
|
-
const signatures = computeSignatures(allSessions, metrics);
|
|
176
|
-
const watchPoints = computeWatchPoints(allSessions, metrics);
|
|
179
|
+
const signatures = computeSignatures(allSessions, metrics, tokenEfficiency);
|
|
180
|
+
const watchPoints = computeWatchPoints(allSessions, metrics, tokenEfficiency);
|
|
177
181
|
const trajectory = computeTrajectory(allSessions);
|
|
178
182
|
const projectComplexity = computeProjectComplexity(allSessions);
|
|
179
183
|
const assessment = generateAssessment(result, metrics, signatures, watchPoints);
|
|
@@ -183,7 +187,7 @@ export async function run(options = {}) {
|
|
|
183
187
|
|
|
184
188
|
// ── JSON output ──
|
|
185
189
|
if (options.json) {
|
|
186
|
-
console.log(JSON.stringify({ metrics, result, sessionStats, perToolScores, scoreDelta, insights }, null, 2));
|
|
190
|
+
console.log(JSON.stringify({ metrics, result, sessionStats, perToolScores, scoreDelta, insights, tokenEfficiency }, null, 2));
|
|
187
191
|
return;
|
|
188
192
|
}
|
|
189
193
|
|
|
@@ -192,7 +196,7 @@ export async function run(options = {}) {
|
|
|
192
196
|
if (!options.offline) {
|
|
193
197
|
const [, proseResult] = await Promise.all([
|
|
194
198
|
displayProgressBar(1500),
|
|
195
|
-
generateProse(metrics, result, sessionStats).catch(() => null),
|
|
199
|
+
generateProse(metrics, result, sessionStats, tokenEfficiency).catch(() => null),
|
|
196
200
|
]);
|
|
197
201
|
prose = proseResult;
|
|
198
202
|
} else {
|
|
@@ -200,7 +204,7 @@ export async function run(options = {}) {
|
|
|
200
204
|
}
|
|
201
205
|
|
|
202
206
|
// ── Step 5: Display results ──
|
|
203
|
-
const extra = { scoreDelta, perToolScores, insights, sessionStats };
|
|
207
|
+
const extra = { scoreDelta, perToolScores, insights, sessionStats, tokenEfficiency };
|
|
204
208
|
if (options.offline) {
|
|
205
209
|
displayOffline(result, metrics, extra);
|
|
206
210
|
} else {
|
|
@@ -209,13 +213,13 @@ export async function run(options = {}) {
|
|
|
209
213
|
|
|
210
214
|
// ── Step 6: Verbose prompt (interactive) ──
|
|
211
215
|
if (options.verbose) {
|
|
212
|
-
displayVerbose(metrics, allSessions);
|
|
216
|
+
displayVerbose(metrics, allSessions, tokenEfficiency);
|
|
213
217
|
} else {
|
|
214
218
|
try {
|
|
215
219
|
const wantsVerbose = await askVerbose();
|
|
216
220
|
if (wantsVerbose) {
|
|
217
221
|
console.log();
|
|
218
|
-
displayVerbose(metrics, allSessions);
|
|
222
|
+
displayVerbose(metrics, allSessions, tokenEfficiency);
|
|
219
223
|
} else {
|
|
220
224
|
console.log();
|
|
221
225
|
}
|
package/src/insights.js
CHANGED
|
@@ -45,6 +45,13 @@ const preflightPatterns = /^(before (we|you|i)|don'?t code|review (first|this|my
|
|
|
45
45
|
const testFirstPatterns = /\b(write (the )?tests? (first|before)|test.?driven|TDD|spec first|start with (tests?|specs?))\b/i;
|
|
46
46
|
const negativeConstraintPatterns = /\b(don'?t|do not|never|avoid|must not|shouldn'?t)\b.*\b(add|create|use|include|change|modify|touch|remove)\b/i;
|
|
47
47
|
|
|
48
|
+
// Number formatting for insights text
|
|
49
|
+
function numberFormatInsight(n) {
|
|
50
|
+
if (n >= 1_000_000) return (n / 1_000_000).toFixed(1).replace(/\.0$/, '') + 'M';
|
|
51
|
+
if (n >= 1000) return (n / 1000).toFixed(1).replace(/\.0$/, '') + 'k';
|
|
52
|
+
return String(n);
|
|
53
|
+
}
|
|
54
|
+
|
|
48
55
|
// Evidence quality filter (same rules as metric parsers)
|
|
49
56
|
const noisePatterns = /^This session is being continued|^\[?[0-9T:.Z-]{20,}|^\S+@\S+.*[%$#>]|^\s*\$\s|^\s*>\s/;
|
|
50
57
|
function isGoodEvidence(prompt) {
|
|
@@ -55,7 +62,7 @@ function isGoodEvidence(prompt) {
|
|
|
55
62
|
return true;
|
|
56
63
|
}
|
|
57
64
|
|
|
58
|
-
export function computeSignatures(allSessions, metrics) {
|
|
65
|
+
export function computeSignatures(allSessions, metrics, tokenEfficiency = null) {
|
|
59
66
|
const signatures = [];
|
|
60
67
|
const d = metrics.decomposition.details;
|
|
61
68
|
const db = metrics.debugCycles.details;
|
|
@@ -190,6 +197,19 @@ export function computeSignatures(allSessions, metrics) {
|
|
|
190
197
|
});
|
|
191
198
|
}
|
|
192
199
|
|
|
200
|
+
// ── Token-backed signature: efficient token usage ──
|
|
201
|
+
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
202
|
+
const te = tokenEfficiency;
|
|
203
|
+
// If context re-read ratio is below 90%, that's notably efficient
|
|
204
|
+
if (te.contextRereadRatio < 0.90 && te.sessionsAnalyzed >= 5) {
|
|
205
|
+
signatures.push({
|
|
206
|
+
name: 'Token-efficient prompting',
|
|
207
|
+
detail: `Only ${Math.round(te.contextRereadRatio * 100)}% of your tokens are context re-reads (typical: 95%+). Your focused sessions and clear prompts minimize wasted tokens. Estimated spend: $${te.estimatedCostTotal.toFixed(2)}.`,
|
|
208
|
+
evidence: null,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
193
213
|
return signatures.slice(0, 4); // Max 4 signatures
|
|
194
214
|
}
|
|
195
215
|
|
|
@@ -197,7 +217,7 @@ export function computeSignatures(allSessions, metrics) {
|
|
|
197
217
|
// WATCH POINTS — Anti-patterns
|
|
198
218
|
// ══════════════════════════════════════════════
|
|
199
219
|
|
|
200
|
-
export function computeWatchPoints(allSessions, metrics) {
|
|
220
|
+
export function computeWatchPoints(allSessions, metrics, tokenEfficiency = null) {
|
|
201
221
|
const watchPoints = [];
|
|
202
222
|
const d = metrics.decomposition.details;
|
|
203
223
|
const db = metrics.debugCycles.details;
|
|
@@ -230,9 +250,21 @@ export function computeWatchPoints(allSessions, metrics) {
|
|
|
230
250
|
}
|
|
231
251
|
const totalFollowupSessions = Object.values(projectSessions).reduce((sum, s) => sum + Math.max(0, s.length - 1), 0);
|
|
232
252
|
if (totalFollowupSessions > 3 && contextRestarts / totalFollowupSessions > 0.5) {
|
|
253
|
+
// Estimate cost of context restarts — each restart forces full context re-read
|
|
254
|
+
let costNote = '';
|
|
255
|
+
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
256
|
+
const avgCostPerSession = tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.sessionsAnalyzed);
|
|
257
|
+
// The re-read cost of each restart scales with context size — estimate as % of avg session cost
|
|
258
|
+
const rereadPct = tokenEfficiency.contextRereadRatio || 0.94;
|
|
259
|
+
const wastedPerRestart = avgCostPerSession * rereadPct * 0.3; // ~30% of re-read is avoidable with context continuity
|
|
260
|
+
const totalWasted = wastedPerRestart * contextRestarts;
|
|
261
|
+
if (totalWasted > 1) {
|
|
262
|
+
costNote = ` Estimated wasted re-read cost: ~$${totalWasted.toFixed(0)}.`;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
233
265
|
watchPoints.push({
|
|
234
266
|
name: 'Context amnesia',
|
|
235
|
-
detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient
|
|
267
|
+
detail: `You restart context from scratch in ${Math.round(contextRestarts / totalFollowupSessions * 100)}% of follow-up sessions on the same project. Engineers who maintain context across sessions are more efficient.${costNote}`,
|
|
236
268
|
evidence: bestContextRestartPrompt,
|
|
237
269
|
});
|
|
238
270
|
}
|
|
@@ -270,32 +302,96 @@ export function computeWatchPoints(allSessions, metrics) {
|
|
|
270
302
|
}
|
|
271
303
|
}
|
|
272
304
|
}
|
|
305
|
+
// Dollar annotation: long prompts trigger large context re-reads each time
|
|
306
|
+
let monologueCostNote = '';
|
|
307
|
+
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
308
|
+
// Compare avg tokens for long vs short prompts from the prompt length analysis
|
|
309
|
+
const longBucket = tokenEfficiency.promptLengthAnalysis.find(b => b.label === '500+ chars');
|
|
310
|
+
const shortBucket = tokenEfficiency.promptLengthAnalysis.find(b => b.label === '20-100 chars');
|
|
311
|
+
if (longBucket && shortBucket && longBucket.avgCost && shortBucket.avgCost) {
|
|
312
|
+
const ratio = (longBucket.avgCost / shortBucket.avgCost).toFixed(1);
|
|
313
|
+
monologueCostNote = ` Long prompts cost ${ratio}x more per exchange ($${longBucket.avgCost.toFixed(2)} vs $${shortBucket.avgCost.toFixed(2)}).`;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
273
316
|
watchPoints.push({
|
|
274
317
|
name: 'Monologue prompting',
|
|
275
|
-
detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output
|
|
318
|
+
detail: `Avg prompt length of ${d.avgPromptLength} chars is ${Math.round(d.avgPromptLength / BENCHMARKS.avgPromptLength)}x the benchmark. Breaking complex requests into 2-3 shorter prompts typically yields better AI output.${monologueCostNote}`,
|
|
276
319
|
evidence: bestLongPrompt,
|
|
277
320
|
});
|
|
278
321
|
}
|
|
279
322
|
|
|
280
323
|
// Low context-setting
|
|
281
324
|
if (ss.contextSetRatio < 20) {
|
|
325
|
+
let contextCostNote = '';
|
|
326
|
+
if (ss.tokenEvidence?.avgTokensPerExchangeWithContext && ss.tokenEvidence?.avgTokensPerExchangeNoContext) {
|
|
327
|
+
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
328
|
+
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
329
|
+
if (pricing > 0) {
|
|
330
|
+
const withCtx = ss.tokenEvidence.avgTokensPerExchangeWithContext * pricing;
|
|
331
|
+
const noCtx = ss.tokenEvidence.avgTokensPerExchangeNoContext * pricing;
|
|
332
|
+
if (noCtx > withCtx * 1.1) {
|
|
333
|
+
contextCostNote = ` Sessions without context cost $${noCtx.toFixed(2)}/exchange vs $${withCtx.toFixed(2)} with context.`;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
282
337
|
watchPoints.push({
|
|
283
338
|
name: 'Missing context',
|
|
284
|
-
detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections
|
|
339
|
+
detail: `Only ${ss.contextSetRatio}% of sessions start with context-setting (benchmark: ${BENCHMARKS.contextSetRatio}%). Upfront context leads to better first responses and fewer corrections.${contextCostNote}`,
|
|
285
340
|
evidence: null,
|
|
286
341
|
});
|
|
287
342
|
}
|
|
288
343
|
|
|
289
344
|
// Extended debug spirals
|
|
290
345
|
if (db.longLoops > 2) {
|
|
346
|
+
let loopCostStr = '';
|
|
347
|
+
if (db.tokenEvidence?.avgTokensLongLoop) {
|
|
348
|
+
const pricing = tokenEfficiency && tokenEfficiency.hasData
|
|
349
|
+
? tokenEfficiency.estimatedCostTotal / Math.max(1, tokenEfficiency.grandTotal) : 0;
|
|
350
|
+
if (pricing > 0) {
|
|
351
|
+
const costPerLoop = db.tokenEvidence.avgTokensLongLoop * pricing;
|
|
352
|
+
loopCostStr = ` Each spiral costs ~$${costPerLoop.toFixed(2)}.`;
|
|
353
|
+
} else {
|
|
354
|
+
loopCostStr = ` Each spiral averages ${numberFormatInsight(db.tokenEvidence.avgTokensLongLoop)} tokens.`;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
291
357
|
watchPoints.push({
|
|
292
358
|
name: 'Debug spirals',
|
|
293
|
-
detail: `${db.longLoops} extended debug loops (>5 turns) detected
|
|
359
|
+
detail: `${db.longLoops} extended debug loops (>5 turns) detected.${loopCostStr} When stuck, try providing more specific error context or breaking the problem differently.`,
|
|
294
360
|
evidence: null,
|
|
295
361
|
});
|
|
296
362
|
}
|
|
297
363
|
|
|
298
|
-
|
|
364
|
+
// ── Token-backed watch points ──
|
|
365
|
+
if (tokenEfficiency && tokenEfficiency.hasData) {
|
|
366
|
+
const te = tokenEfficiency;
|
|
367
|
+
|
|
368
|
+
// Marathon sessions burning disproportionate tokens
|
|
369
|
+
const marathonSessions = te.costliestSessions.filter(s => s.exchanges > 50);
|
|
370
|
+
if (marathonSessions.length >= 2) {
|
|
371
|
+
const marathonCost = marathonSessions.reduce((s, m) => s + m.estimatedCost, 0);
|
|
372
|
+
const marathonPct = te.estimatedCostTotal > 0 ? Math.round(marathonCost / te.estimatedCostTotal * 100) : 0;
|
|
373
|
+
if (marathonPct > 40) {
|
|
374
|
+
watchPoints.push({
|
|
375
|
+
name: 'Marathon session tax',
|
|
376
|
+
detail: `${marathonSessions.length} marathon sessions (50+ exchanges) consumed ~${marathonPct}% of your total spend (~$${marathonCost.toFixed(2)}). Context compounds — splitting into focused sessions would reduce token waste.`,
|
|
377
|
+
evidence: null,
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// Vague prompts costing more than specific ones
|
|
383
|
+
const vagueAvg = db.tokenEvidence?.avgTokensVagueDebug;
|
|
384
|
+
const specificAvg = db.tokenEvidence?.avgTokensSpecificDebug;
|
|
385
|
+
if (vagueAvg && specificAvg && vagueAvg > specificAvg * 1.5 && db.vagueReports > 3) {
|
|
386
|
+
watchPoints.push({
|
|
387
|
+
name: 'Vague prompts are expensive',
|
|
388
|
+
detail: `Your vague debug prompts average ${numberFormatInsight(vagueAvg)} tokens vs ${numberFormatInsight(specificAvg)} for specific ones — ${(vagueAvg / specificAvg).toFixed(1)}x more expensive. Adding error details upfront saves real money.`,
|
|
389
|
+
evidence: null,
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
return watchPoints.slice(0, 4); // Max 4 watch points (was 3, expanded for token insights)
|
|
299
395
|
}
|
|
300
396
|
|
|
301
397
|
// ══════════════════════════════════════════════
|
|
@@ -136,6 +136,28 @@ export function computeAILeverage(sessions) {
|
|
|
136
136
|
if (bestPlanPrompt) examples.push({ type: 'planning', prompt: bestPlanPrompt });
|
|
137
137
|
if (bestExplorePrompt) examples.push({ type: 'exploratory', prompt: bestExplorePrompt });
|
|
138
138
|
|
|
139
|
+
// ── Token cost evidence ──
|
|
140
|
+
// Compare cost of trivial prompts vs complex/architectural prompts
|
|
141
|
+
let trivialTokens = 0, trivialTokenCount = 0;
|
|
142
|
+
let complexTokensTotal = 0, complexTokensCount = 0;
|
|
143
|
+
let boilerplateTokens = 0, boilerplateTokenCount = 0;
|
|
144
|
+
let archTokens = 0, archTokenCount = 0;
|
|
145
|
+
|
|
146
|
+
for (const session of sessions) {
|
|
147
|
+
for (const exchange of session.exchanges) {
|
|
148
|
+
const prompt = exchange.userPrompt || '';
|
|
149
|
+
const t = exchange.tokenUsage;
|
|
150
|
+
const tokens = t ? (t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens) : 0;
|
|
151
|
+
if (tokens === 0) continue;
|
|
152
|
+
|
|
153
|
+
if (prompt.length < 50) { trivialTokens += tokens; trivialTokenCount++; }
|
|
154
|
+
const sentences = prompt.split(/[.!?]+/).filter(s => s.trim().length > 10);
|
|
155
|
+
if (prompt.length > 200 && sentences.length >= 2) { complexTokensTotal += tokens; complexTokensCount++; }
|
|
156
|
+
if (boilerplatePatterns.test(prompt)) { boilerplateTokens += tokens; boilerplateTokenCount++; }
|
|
157
|
+
if (architecturalPatterns.test(prompt) || planningPatterns.test(prompt)) { archTokens += tokens; archTokenCount++; }
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
139
161
|
return {
|
|
140
162
|
score: Math.max(0, Math.min(100, score)),
|
|
141
163
|
details: {
|
|
@@ -152,6 +174,12 @@ export function computeAILeverage(sessions) {
|
|
|
152
174
|
research: highLeverageToolUses,
|
|
153
175
|
coding: codingToolUses,
|
|
154
176
|
},
|
|
177
|
+
tokenEvidence: {
|
|
178
|
+
avgTokensTrivialPrompt: trivialTokenCount > 0 ? Math.round(trivialTokens / trivialTokenCount) : null,
|
|
179
|
+
avgTokensComplexPrompt: complexTokensCount > 0 ? Math.round(complexTokensTotal / complexTokensCount) : null,
|
|
180
|
+
avgTokensBoilerplate: boilerplateTokenCount > 0 ? Math.round(boilerplateTokens / boilerplateTokenCount) : null,
|
|
181
|
+
avgTokensArchitectural: archTokenCount > 0 ? Math.round(archTokens / archTokenCount) : null,
|
|
182
|
+
},
|
|
155
183
|
},
|
|
156
184
|
examples,
|
|
157
185
|
};
|
|
@@ -145,6 +145,43 @@ export function computeDebugCycles(sessions) {
|
|
|
145
145
|
if (bestSpecificReport) examples.push({ type: 'specific_report', prompt: bestSpecificReport });
|
|
146
146
|
if (bestQuickFix) examples.push({ type: 'quick_fix', prompt: bestQuickFix });
|
|
147
147
|
|
|
148
|
+
// ── Token cost evidence ──
|
|
149
|
+
// Compare cost of vague vs specific debug exchanges
|
|
150
|
+
let vagueTokens = 0, vagueTokenCount = 0;
|
|
151
|
+
let specificTokens = 0, specificTokenCount = 0;
|
|
152
|
+
let longLoopTokens = 0, longLoopTokenCount = 0;
|
|
153
|
+
let quickFixTokens = 0, quickFixTokenCount = 0;
|
|
154
|
+
|
|
155
|
+
for (const session of sessions) {
|
|
156
|
+
const { exchanges } = session;
|
|
157
|
+
let debugExchanges = [];
|
|
158
|
+
let inDebug = false;
|
|
159
|
+
|
|
160
|
+
for (let i = 0; i < exchanges.length; i++) {
|
|
161
|
+
const prompt = exchanges[i].userPrompt || '';
|
|
162
|
+
const t = exchanges[i].tokenUsage;
|
|
163
|
+
const tokens = t ? (t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens) : 0;
|
|
164
|
+
|
|
165
|
+
if (errorPatterns.test(prompt) && tokens > 0) {
|
|
166
|
+
if (!inDebug) { inDebug = true; debugExchanges = []; }
|
|
167
|
+
debugExchanges.push({ prompt, tokens });
|
|
168
|
+
|
|
169
|
+
if (vaguePhrases.test(prompt)) { vagueTokens += tokens; vagueTokenCount++; }
|
|
170
|
+
if (specificDebugPatterns.test(prompt) || prompt.length > 200) { specificTokens += tokens; specificTokenCount++; }
|
|
171
|
+
} else if (inDebug) {
|
|
172
|
+
if (debugExchanges.length <= 2) {
|
|
173
|
+
const total = debugExchanges.reduce((s, e) => s + e.tokens, 0);
|
|
174
|
+
quickFixTokens += total; quickFixTokenCount++;
|
|
175
|
+
} else if (debugExchanges.length > 5) {
|
|
176
|
+
const total = debugExchanges.reduce((s, e) => s + e.tokens, 0);
|
|
177
|
+
longLoopTokens += total; longLoopTokenCount++;
|
|
178
|
+
}
|
|
179
|
+
inDebug = false;
|
|
180
|
+
debugExchanges = [];
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
148
185
|
return {
|
|
149
186
|
score: Math.max(0, Math.min(100, score)),
|
|
150
187
|
details: {
|
|
@@ -155,6 +192,12 @@ export function computeDebugCycles(sessions) {
|
|
|
155
192
|
specificReportRatio: Math.round(specificRatio * 100),
|
|
156
193
|
vagueReports,
|
|
157
194
|
specificReports,
|
|
195
|
+
tokenEvidence: {
|
|
196
|
+
avgTokensVagueDebug: vagueTokenCount > 0 ? Math.round(vagueTokens / vagueTokenCount) : null,
|
|
197
|
+
avgTokensSpecificDebug: specificTokenCount > 0 ? Math.round(specificTokens / specificTokenCount) : null,
|
|
198
|
+
avgTokensQuickFix: quickFixTokenCount > 0 ? Math.round(quickFixTokens / quickFixTokenCount) : null,
|
|
199
|
+
avgTokensLongLoop: longLoopTokenCount > 0 ? Math.round(longLoopTokens / longLoopTokenCount) : null,
|
|
200
|
+
},
|
|
158
201
|
},
|
|
159
202
|
examples,
|
|
160
203
|
};
|
|
@@ -117,6 +117,27 @@ export function computeDecomposition(sessions) {
|
|
|
117
117
|
}
|
|
118
118
|
if (bestFollowupPrompt) examples.push({ type: 'followup', prompt: bestFollowupPrompt });
|
|
119
119
|
|
|
120
|
+
// ── Token cost evidence ──
|
|
121
|
+
// Compare token cost of single-shot sessions vs multi-step sessions
|
|
122
|
+
// to prove decomposition saves tokens
|
|
123
|
+
let singleShotTokens = 0, singleShotCount = 0;
|
|
124
|
+
let multiStepTokens = 0, multiStepCount = 0;
|
|
125
|
+
for (const session of sessions) {
|
|
126
|
+
const t = session.tokenUsage;
|
|
127
|
+
if (!t || (t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens) === 0) continue;
|
|
128
|
+
const total = t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens;
|
|
129
|
+
const perExchange = session.exchangeCount > 0 ? total / session.exchangeCount : total;
|
|
130
|
+
if (session.exchangeCount === 1) {
|
|
131
|
+
singleShotTokens += perExchange;
|
|
132
|
+
singleShotCount++;
|
|
133
|
+
} else if (session.exchangeCount >= 4) {
|
|
134
|
+
multiStepTokens += perExchange;
|
|
135
|
+
multiStepCount++;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
const avgTokensSingleShot = singleShotCount > 0 ? Math.round(singleShotTokens / singleShotCount) : null;
|
|
139
|
+
const avgTokensMultiStep = multiStepCount > 0 ? Math.round(multiStepTokens / multiStepCount) : null;
|
|
140
|
+
|
|
120
141
|
return {
|
|
121
142
|
score: Math.max(0, Math.min(100, score)),
|
|
122
143
|
details: {
|
|
@@ -127,6 +148,10 @@ export function computeDecomposition(sessions) {
|
|
|
127
148
|
avgPromptLength: Math.round(avgPromptLength),
|
|
128
149
|
longPromptRatio: promptCount > 0 ? Math.round(longPromptCount / promptCount * 100) : 0,
|
|
129
150
|
contextualFollowupRatio: promptCount > 0 ? Math.round(followupRatio * 100) : 0,
|
|
151
|
+
tokenEvidence: {
|
|
152
|
+
avgTokensPerExchangeSingleShot: avgTokensSingleShot,
|
|
153
|
+
avgTokensPerExchangeMultiStep: avgTokensMultiStep,
|
|
154
|
+
},
|
|
130
155
|
},
|
|
131
156
|
examples,
|
|
132
157
|
};
|
|
@@ -144,6 +144,35 @@ export function computeSessionStructure(sessions) {
|
|
|
144
144
|
if (bestContextPrompt) examples.push({ type: 'context_setting', prompt: bestContextPrompt });
|
|
145
145
|
if (bestRefinementPrompt) examples.push({ type: 'refinement', prompt: bestRefinementPrompt });
|
|
146
146
|
|
|
147
|
+
// ── Token cost evidence ──
|
|
148
|
+
// Compare token cost of focused sessions vs marathon sessions
|
|
149
|
+
let focusedTokens = 0, focusedTokenCount = 0;
|
|
150
|
+
let marathonTokens = 0, marathonTokenCount = 0;
|
|
151
|
+
let contextSetTokens = 0, contextSetCount = 0;
|
|
152
|
+
let noContextTokens = 0, noContextCount = 0;
|
|
153
|
+
|
|
154
|
+
for (const session of sessions) {
|
|
155
|
+
const t = session.tokenUsage;
|
|
156
|
+
if (!t || (t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens) === 0) continue;
|
|
157
|
+
const total = t.inputTokens + t.outputTokens + t.cacheReadTokens + t.cacheCreationTokens;
|
|
158
|
+
const perExchange = session.exchangeCount > 0 ? total / session.exchangeCount : total;
|
|
159
|
+
|
|
160
|
+
// Duration-based
|
|
161
|
+
if (session.durationMinutes >= 10 && session.durationMinutes <= 45) {
|
|
162
|
+
focusedTokens += perExchange; focusedTokenCount++;
|
|
163
|
+
} else if (session.durationMinutes > 60) {
|
|
164
|
+
marathonTokens += perExchange; marathonTokenCount++;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Context-setting vs not
|
|
168
|
+
const firstPrompt = session.exchanges[0]?.userPrompt || '';
|
|
169
|
+
if (contextSettingPatterns.test(firstPrompt) || firstPrompt.length > 200) {
|
|
170
|
+
contextSetTokens += perExchange; contextSetCount++;
|
|
171
|
+
} else if (session.exchanges.length > 0) {
|
|
172
|
+
noContextTokens += perExchange; noContextCount++;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
147
176
|
return {
|
|
148
177
|
score: Math.max(0, Math.min(100, score)),
|
|
149
178
|
details: {
|
|
@@ -158,6 +187,12 @@ export function computeSessionStructure(sessions) {
|
|
|
158
187
|
long: longSessions,
|
|
159
188
|
focused: focusedSessions,
|
|
160
189
|
},
|
|
190
|
+
tokenEvidence: {
|
|
191
|
+
avgTokensPerExchangeFocused: focusedTokenCount > 0 ? Math.round(focusedTokens / focusedTokenCount) : null,
|
|
192
|
+
avgTokensPerExchangeMarathon: marathonTokenCount > 0 ? Math.round(marathonTokens / marathonTokenCount) : null,
|
|
193
|
+
avgTokensPerExchangeWithContext: contextSetCount > 0 ? Math.round(contextSetTokens / contextSetCount) : null,
|
|
194
|
+
avgTokensPerExchangeNoContext: noContextCount > 0 ? Math.round(noContextTokens / noContextCount) : null,
|
|
195
|
+
},
|
|
161
196
|
},
|
|
162
197
|
examples,
|
|
163
198
|
};
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Efficiency Analytics
|
|
3
|
+
*
|
|
4
|
+
* Computes token spend statistics from Claude Code session data.
|
|
5
|
+
* This is NOT a scored dimension — it provides concrete evidence
|
|
6
|
+
* that enriches the existing 4 metrics with cost data.
|
|
7
|
+
*
|
|
8
|
+
* Outputs:
|
|
9
|
+
* - Total token breakdown (input, output, cache read, cache creation)
|
|
10
|
+
* - Estimated cost using Anthropic pricing
|
|
11
|
+
* - Per-project token breakdown
|
|
12
|
+
* - Costliest sessions and prompts
|
|
13
|
+
* - Cache efficiency ratio (how much context is re-read vs new)
|
|
14
|
+
* - Prompt-type cost analysis (vague vs specific, short vs long)
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
// ── Anthropic pricing per million tokens (as of early 2025) ──
|
|
18
|
+
// Claude Code uses a mix of models; we estimate with Sonnet pricing
|
|
19
|
+
// which is the most common model in Claude Code sessions.
|
|
20
|
+
const PRICING = {
|
|
21
|
+
'claude-sonnet-4-5-20250929': { input: 3.00, output: 15.00, cacheRead: 0.30, cacheCreation: 3.75 },
|
|
22
|
+
'claude-opus-4-6': { input: 15.00, output: 75.00, cacheRead: 1.50, cacheCreation: 18.75 },
|
|
23
|
+
'claude-haiku-4-5-20251001': { input: 0.80, output: 4.00, cacheRead: 0.08, cacheCreation: 1.00 },
|
|
24
|
+
// Fallback for unknown models — use Sonnet pricing as default
|
|
25
|
+
default: { input: 3.00, output: 15.00, cacheRead: 0.30, cacheCreation: 3.75 },
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
function getPricing(model) {
|
|
29
|
+
if (!model) return PRICING.default;
|
|
30
|
+
for (const [key, prices] of Object.entries(PRICING)) {
|
|
31
|
+
if (key !== 'default' && model.includes(key.replace(/-\d+$/, ''))) return prices;
|
|
32
|
+
}
|
|
33
|
+
// Try partial match
|
|
34
|
+
if (model.includes('opus')) return PRICING['claude-opus-4-6'];
|
|
35
|
+
if (model.includes('haiku')) return PRICING['claude-haiku-4-5-20251001'];
|
|
36
|
+
if (model.includes('sonnet')) return PRICING['claude-sonnet-4-5-20250929'];
|
|
37
|
+
return PRICING.default;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function estimateCost(tokens, pricing) {
|
|
41
|
+
return (
|
|
42
|
+
(tokens.inputTokens / 1_000_000) * pricing.input +
|
|
43
|
+
(tokens.outputTokens / 1_000_000) * pricing.output +
|
|
44
|
+
(tokens.cacheReadTokens / 1_000_000) * pricing.cacheRead +
|
|
45
|
+
(tokens.cacheCreationTokens / 1_000_000) * pricing.cacheCreation
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function addTokens(target, source) {
|
|
50
|
+
target.inputTokens += source.inputTokens || 0;
|
|
51
|
+
target.outputTokens += source.outputTokens || 0;
|
|
52
|
+
target.cacheReadTokens += source.cacheReadTokens || 0;
|
|
53
|
+
target.cacheCreationTokens += source.cacheCreationTokens || 0;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function totalTokens(t) {
|
|
57
|
+
return (t.inputTokens || 0) + (t.outputTokens || 0) + (t.cacheReadTokens || 0) + (t.cacheCreationTokens || 0);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Compute comprehensive token efficiency analytics.
|
|
62
|
+
*
|
|
63
|
+
* @param {Array} sessions - Parsed sessions with tokenUsage on each exchange
|
|
64
|
+
* @returns {Object} Token analytics data (not a score)
|
|
65
|
+
*/
|
|
66
|
+
export function computeTokenEfficiency(sessions) {
|
|
67
|
+
if (sessions.length === 0) {
|
|
68
|
+
return { hasData: false };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Check if any session has token data (only Claude Code currently provides this)
|
|
72
|
+
const sessionsWithTokens = sessions.filter(s =>
|
|
73
|
+
s.tokenUsage && totalTokens(s.tokenUsage) > 0
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
if (sessionsWithTokens.length === 0) {
|
|
77
|
+
return { hasData: false };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ── Aggregate totals ──
|
|
81
|
+
const totals = { inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreationTokens: 0 };
|
|
82
|
+
for (const s of sessionsWithTokens) {
|
|
83
|
+
addTokens(totals, s.tokenUsage);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const grandTotal = totalTokens(totals);
|
|
87
|
+
|
|
88
|
+
// ── Cost estimation ──
|
|
89
|
+
// For now use default pricing; could be refined per-message if model data is on exchanges
|
|
90
|
+
const pricing = PRICING.default;
|
|
91
|
+
const estimatedCostTotal = estimateCost(totals, pricing);
|
|
92
|
+
|
|
93
|
+
// ── Token composition ──
|
|
94
|
+
const composition = {
|
|
95
|
+
inputPct: grandTotal > 0 ? (totals.inputTokens / grandTotal * 100) : 0,
|
|
96
|
+
outputPct: grandTotal > 0 ? (totals.outputTokens / grandTotal * 100) : 0,
|
|
97
|
+
cacheReadPct: grandTotal > 0 ? (totals.cacheReadTokens / grandTotal * 100) : 0,
|
|
98
|
+
cacheCreationPct: grandTotal > 0 ? (totals.cacheCreationTokens / grandTotal * 100) : 0,
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
// The "context re-reading" ratio: cache_read / (cache_read + output)
|
|
102
|
+
// This shows how much of Claude's work is re-reading vs producing new output
|
|
103
|
+
const contextRereadRatio = (totals.cacheReadTokens + totals.outputTokens) > 0
|
|
104
|
+
? totals.cacheReadTokens / (totals.cacheReadTokens + totals.outputTokens)
|
|
105
|
+
: 0;
|
|
106
|
+
|
|
107
|
+
// ── Per-project breakdown ──
|
|
108
|
+
const projectTokens = {};
|
|
109
|
+
for (const s of sessionsWithTokens) {
|
|
110
|
+
const p = s.project || 'unknown';
|
|
111
|
+
if (!projectTokens[p]) {
|
|
112
|
+
projectTokens[p] = {
|
|
113
|
+
tokens: { inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreationTokens: 0 },
|
|
114
|
+
sessions: 0,
|
|
115
|
+
exchanges: 0,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
addTokens(projectTokens[p].tokens, s.tokenUsage);
|
|
119
|
+
projectTokens[p].sessions++;
|
|
120
|
+
projectTokens[p].exchanges += s.exchangeCount;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const perProject = Object.entries(projectTokens)
|
|
124
|
+
.map(([name, data]) => ({
|
|
125
|
+
name: name.length > 30 ? '...' + name.slice(-27) : name,
|
|
126
|
+
fullName: name,
|
|
127
|
+
totalTokens: totalTokens(data.tokens),
|
|
128
|
+
estimatedCost: estimateCost(data.tokens, pricing),
|
|
129
|
+
sessions: data.sessions,
|
|
130
|
+
exchanges: data.exchanges,
|
|
131
|
+
tokensPerExchange: data.exchanges > 0 ? Math.round(totalTokens(data.tokens) / data.exchanges) : 0,
|
|
132
|
+
...data.tokens,
|
|
133
|
+
}))
|
|
134
|
+
.sort((a, b) => b.totalTokens - a.totalTokens);
|
|
135
|
+
|
|
136
|
+
// ── Costliest sessions ──
|
|
137
|
+
const costliestSessions = sessionsWithTokens
|
|
138
|
+
.map(s => ({
|
|
139
|
+
id: s.id,
|
|
140
|
+
project: s.project || 'unknown',
|
|
141
|
+
totalTokens: totalTokens(s.tokenUsage),
|
|
142
|
+
estimatedCost: estimateCost(s.tokenUsage, pricing),
|
|
143
|
+
exchanges: s.exchangeCount,
|
|
144
|
+
durationMinutes: s.durationMinutes,
|
|
145
|
+
cacheReadRatio: totalTokens(s.tokenUsage) > 0
|
|
146
|
+
? s.tokenUsage.cacheReadTokens / totalTokens(s.tokenUsage)
|
|
147
|
+
: 0,
|
|
148
|
+
firstPrompt: s.exchanges[0]?.userPrompt?.slice(0, 80) || '',
|
|
149
|
+
}))
|
|
150
|
+
.sort((a, b) => b.totalTokens - a.totalTokens)
|
|
151
|
+
.slice(0, 5);
|
|
152
|
+
|
|
153
|
+
// ── Costliest exchanges (individual prompts) ──
|
|
154
|
+
const allExchanges = [];
|
|
155
|
+
for (const s of sessionsWithTokens) {
|
|
156
|
+
for (let i = 0; i < s.exchanges.length; i++) {
|
|
157
|
+
const ex = s.exchanges[i];
|
|
158
|
+
if (!ex.tokenUsage || totalTokens(ex.tokenUsage) === 0) continue;
|
|
159
|
+
allExchanges.push({
|
|
160
|
+
prompt: ex.userPrompt || '',
|
|
161
|
+
totalTokens: totalTokens(ex.tokenUsage),
|
|
162
|
+
estimatedCost: estimateCost(ex.tokenUsage, pricing),
|
|
163
|
+
cacheReadTokens: ex.tokenUsage.cacheReadTokens,
|
|
164
|
+
outputTokens: ex.tokenUsage.outputTokens,
|
|
165
|
+
sessionId: s.id,
|
|
166
|
+
project: s.project || 'unknown',
|
|
167
|
+
exchangeIndex: i,
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
allExchanges.sort((a, b) => b.totalTokens - a.totalTokens);
|
|
173
|
+
const costliestExchanges = allExchanges.slice(0, 5);
|
|
174
|
+
|
|
175
|
+
// ── Prompt length vs token cost correlation ──
|
|
176
|
+
// Group exchanges by prompt length buckets and compute avg token cost
|
|
177
|
+
const buckets = {
|
|
178
|
+
veryShort: { label: '< 20 chars', prompts: 0, totalTokens: 0, totalCost: 0 },
|
|
179
|
+
short: { label: '20-100 chars', prompts: 0, totalTokens: 0, totalCost: 0 },
|
|
180
|
+
medium: { label: '100-500 chars', prompts: 0, totalTokens: 0, totalCost: 0 },
|
|
181
|
+
long: { label: '500+ chars', prompts: 0, totalTokens: 0, totalCost: 0 },
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
for (const ex of allExchanges) {
|
|
185
|
+
const len = ex.prompt.length;
|
|
186
|
+
let bucket;
|
|
187
|
+
if (len < 20) bucket = buckets.veryShort;
|
|
188
|
+
else if (len < 100) bucket = buckets.short;
|
|
189
|
+
else if (len < 500) bucket = buckets.medium;
|
|
190
|
+
else bucket = buckets.long;
|
|
191
|
+
|
|
192
|
+
bucket.prompts++;
|
|
193
|
+
bucket.totalTokens += ex.totalTokens;
|
|
194
|
+
bucket.totalCost += ex.estimatedCost;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const promptLengthAnalysis = Object.values(buckets)
|
|
198
|
+
.filter(b => b.prompts > 0)
|
|
199
|
+
.map(b => ({
|
|
200
|
+
...b,
|
|
201
|
+
avgTokens: Math.round(b.totalTokens / b.prompts),
|
|
202
|
+
avgCost: b.totalCost / b.prompts,
|
|
203
|
+
}));
|
|
204
|
+
|
|
205
|
+
// ── Session length vs token efficiency ──
|
|
206
|
+
// Marathon sessions compound context, so later exchanges cost more
|
|
207
|
+
const sessionLengthAnalysis = {
|
|
208
|
+
short: { label: '1-5 exchanges', sessions: 0, avgTokensPerExchange: 0, totalTokens: 0, totalExchanges: 0 },
|
|
209
|
+
medium: { label: '6-20 exchanges', sessions: 0, avgTokensPerExchange: 0, totalTokens: 0, totalExchanges: 0 },
|
|
210
|
+
long: { label: '21-50 exchanges', sessions: 0, avgTokensPerExchange: 0, totalTokens: 0, totalExchanges: 0 },
|
|
211
|
+
marathon: { label: '50+ exchanges', sessions: 0, avgTokensPerExchange: 0, totalTokens: 0, totalExchanges: 0 },
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
for (const s of sessionsWithTokens) {
|
|
215
|
+
const ec = s.exchangeCount;
|
|
216
|
+
const t = totalTokens(s.tokenUsage);
|
|
217
|
+
let bucket;
|
|
218
|
+
if (ec <= 5) bucket = sessionLengthAnalysis.short;
|
|
219
|
+
else if (ec <= 20) bucket = sessionLengthAnalysis.medium;
|
|
220
|
+
else if (ec <= 50) bucket = sessionLengthAnalysis.long;
|
|
221
|
+
else bucket = sessionLengthAnalysis.marathon;
|
|
222
|
+
|
|
223
|
+
bucket.sessions++;
|
|
224
|
+
bucket.totalTokens += t;
|
|
225
|
+
bucket.totalExchanges += ec;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
for (const bucket of Object.values(sessionLengthAnalysis)) {
|
|
229
|
+
bucket.avgTokensPerExchange = bucket.totalExchanges > 0
|
|
230
|
+
? Math.round(bucket.totalTokens / bucket.totalExchanges)
|
|
231
|
+
: 0;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// ── Top-level stats ──
|
|
235
|
+
const avgTokensPerSession = sessionsWithTokens.length > 0
|
|
236
|
+
? Math.round(grandTotal / sessionsWithTokens.length)
|
|
237
|
+
: 0;
|
|
238
|
+
const avgTokensPerExchange = allExchanges.length > 0
|
|
239
|
+
? Math.round(grandTotal / allExchanges.length)
|
|
240
|
+
: 0;
|
|
241
|
+
|
|
242
|
+
return {
|
|
243
|
+
hasData: true,
|
|
244
|
+
sessionsAnalyzed: sessionsWithTokens.length,
|
|
245
|
+
totals,
|
|
246
|
+
grandTotal,
|
|
247
|
+
estimatedCostTotal,
|
|
248
|
+
composition,
|
|
249
|
+
contextRereadRatio,
|
|
250
|
+
avgTokensPerSession,
|
|
251
|
+
avgTokensPerExchange,
|
|
252
|
+
perProject,
|
|
253
|
+
costliestSessions,
|
|
254
|
+
costliestExchanges,
|
|
255
|
+
promptLengthAnalysis,
|
|
256
|
+
sessionLengthAnalysis: Object.values(sessionLengthAnalysis).filter(b => b.sessions > 0),
|
|
257
|
+
};
|
|
258
|
+
}
|
|
@@ -81,6 +81,14 @@ function parseSessionFile(filePath) {
|
|
|
81
81
|
// Skip tool result messages (these are system-injected responses to tool calls)
|
|
82
82
|
if (role === 'user' && hasToolResults(entry.message.content)) continue;
|
|
83
83
|
|
|
84
|
+
// Extract token usage from assistant messages
|
|
85
|
+
const usage = (role === 'assistant' && entry.message.usage) ? {
|
|
86
|
+
inputTokens: entry.message.usage.input_tokens || 0,
|
|
87
|
+
outputTokens: entry.message.usage.output_tokens || 0,
|
|
88
|
+
cacheReadTokens: entry.message.usage.cache_read_input_tokens || 0,
|
|
89
|
+
cacheCreationTokens: entry.message.usage.cache_creation_input_tokens || 0,
|
|
90
|
+
} : null;
|
|
91
|
+
|
|
84
92
|
const turn = {
|
|
85
93
|
role,
|
|
86
94
|
text: extractTextContent(entry.message.content),
|
|
@@ -90,6 +98,7 @@ function parseSessionFile(filePath) {
|
|
|
90
98
|
uuid: entry.uuid || null,
|
|
91
99
|
parentUuid: entry.parentUuid || null,
|
|
92
100
|
model: entry.message.model || null,
|
|
101
|
+
usage,
|
|
93
102
|
};
|
|
94
103
|
|
|
95
104
|
// Skip empty assistant messages that are just tool call continuations
|
|
@@ -120,11 +129,19 @@ function groupIntoExchanges(turns) {
|
|
|
120
129
|
assistantResponses: [],
|
|
121
130
|
toolCalls: [],
|
|
122
131
|
thinkingContent: [],
|
|
132
|
+
tokenUsage: { inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreationTokens: 0 },
|
|
123
133
|
};
|
|
124
134
|
} else if (turn.role === 'assistant' && current) {
|
|
125
135
|
if (turn.text) current.assistantResponses.push(turn.text);
|
|
126
136
|
if (turn.thinking) current.thinkingContent.push(turn.thinking);
|
|
127
137
|
current.toolCalls.push(...turn.toolCalls);
|
|
138
|
+
// Accumulate token usage across all assistant turns in this exchange
|
|
139
|
+
if (turn.usage) {
|
|
140
|
+
current.tokenUsage.inputTokens += turn.usage.inputTokens;
|
|
141
|
+
current.tokenUsage.outputTokens += turn.usage.outputTokens;
|
|
142
|
+
current.tokenUsage.cacheReadTokens += turn.usage.cacheReadTokens;
|
|
143
|
+
current.tokenUsage.cacheCreationTokens += turn.usage.cacheCreationTokens;
|
|
144
|
+
}
|
|
128
145
|
}
|
|
129
146
|
}
|
|
130
147
|
|
|
@@ -159,6 +176,15 @@ export function parseProject(projectPath) {
|
|
|
159
176
|
.map(t => new Date(t).getTime())
|
|
160
177
|
.sort();
|
|
161
178
|
|
|
179
|
+
// Aggregate token usage across all exchanges in this session
|
|
180
|
+
const sessionTokens = { inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheCreationTokens: 0 };
|
|
181
|
+
for (const ex of exchanges) {
|
|
182
|
+
sessionTokens.inputTokens += ex.tokenUsage.inputTokens;
|
|
183
|
+
sessionTokens.outputTokens += ex.tokenUsage.outputTokens;
|
|
184
|
+
sessionTokens.cacheReadTokens += ex.tokenUsage.cacheReadTokens;
|
|
185
|
+
sessionTokens.cacheCreationTokens += ex.tokenUsage.cacheCreationTokens;
|
|
186
|
+
}
|
|
187
|
+
|
|
162
188
|
sessions.push({
|
|
163
189
|
id: file.replace('.jsonl', ''),
|
|
164
190
|
file,
|
|
@@ -170,6 +196,7 @@ export function parseProject(projectPath) {
|
|
|
170
196
|
durationMinutes: timestamps.length >= 2
|
|
171
197
|
? Math.round((timestamps[timestamps.length - 1] - timestamps[0]) / 60000)
|
|
172
198
|
: 0,
|
|
199
|
+
tokenUsage: sessionTokens,
|
|
173
200
|
});
|
|
174
201
|
}
|
|
175
202
|
|
package/src/upload.js
CHANGED
|
@@ -15,7 +15,7 @@ function truncateExamples(examples, maxLen = 120) {
|
|
|
15
15
|
}));
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
export async function generateProse(metrics, result, sessionStats) {
|
|
18
|
+
export async function generateProse(metrics, result, sessionStats, tokenEfficiency = null) {
|
|
19
19
|
const payload = {
|
|
20
20
|
metrics: {
|
|
21
21
|
decomposition: {
|
|
@@ -46,6 +46,15 @@ export async function generateProse(metrics, result, sessionStats) {
|
|
|
46
46
|
tier: result.tier,
|
|
47
47
|
},
|
|
48
48
|
sessionStats,
|
|
49
|
+
// Include token analytics summary for richer prose generation
|
|
50
|
+
tokenEfficiency: tokenEfficiency && tokenEfficiency.hasData ? {
|
|
51
|
+
grandTotal: tokenEfficiency.grandTotal,
|
|
52
|
+
estimatedCostTotal: tokenEfficiency.estimatedCostTotal,
|
|
53
|
+
contextRereadRatio: tokenEfficiency.contextRereadRatio,
|
|
54
|
+
composition: tokenEfficiency.composition,
|
|
55
|
+
avgTokensPerExchange: tokenEfficiency.avgTokensPerExchange,
|
|
56
|
+
sessionsAnalyzed: tokenEfficiency.sessionsAnalyzed,
|
|
57
|
+
} : null,
|
|
49
58
|
};
|
|
50
59
|
|
|
51
60
|
const response = await fetch(`${API_BASE}/public/cli/analyze`, {
|