@ekkos/cli 1.5.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -73,13 +73,21 @@ const MAX_OUTPUT_200K_OPUS_SONNET = 16384;
73
73
  // ── Pricing ──
74
74
  // Pricing per MTok from https://docs.anthropic.com/en/docs/about-claude/pricing
75
75
  // Includes prompt caching rates (5m cache write=1.25x input, cache read=0.1x input).
76
+ // Pricing per MTok — source: https://platform.claude.com/docs/en/about-claude/pricing
77
+ // Cache write = 5-minute TTL (1.25x input). 1-hour TTL (2x input) not tracked separately.
78
+ // Fast mode (Opus 4.6 only, 6x standard) detected via x-ekkos-fast-mode header.
79
+ // Data residency (US-only, 1.1x all categories) detected via inference_geo header.
76
80
  const MODEL_PRICING = {
81
+ // ── Opus family ──
77
82
  'claude-opus-4-6-20260514': { input: 5, output: 25, cacheWrite: 6.25, cacheRead: 0.50 },
78
83
  'claude-opus-4-6': { input: 5, output: 25, cacheWrite: 6.25, cacheRead: 0.50 },
84
+ 'claude-opus-4-6-fast': { input: 30, output: 150, cacheWrite: 37.50, cacheRead: 3.00 }, // Fast mode (6x)
79
85
  'claude-opus-4-5-20251101': { input: 5, output: 25, cacheWrite: 6.25, cacheRead: 0.50 },
80
86
  'claude-opus-4-5': { input: 5, output: 25, cacheWrite: 6.25, cacheRead: 0.50 },
81
- 'claude-opus-4-20250514': { input: 5, output: 25, cacheWrite: 6.25, cacheRead: 0.50 },
82
- 'claude-opus-4': { input: 5, output: 25, cacheWrite: 6.25, cacheRead: 0.50 },
87
+ 'claude-opus-4-1': { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.50 },
88
+ 'claude-opus-4-20250514': { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.50 },
89
+ 'claude-opus-4': { input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.50 },
90
+ // ── Sonnet family ──
83
91
  'claude-sonnet-4-6-20260514': { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
84
92
  'claude-sonnet-4-6': { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
85
93
  'claude-sonnet-4-5-20250929': { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
@@ -87,8 +95,12 @@ const MODEL_PRICING = {
87
95
  'claude-sonnet-4-5-20250514': { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
88
96
  'claude-sonnet-4-20250514': { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
89
97
  'claude-sonnet-4': { input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
98
+ // ── Haiku family ──
90
99
  'claude-haiku-4-5-20251001': { input: 1, output: 5, cacheWrite: 1.25, cacheRead: 0.10 },
91
100
  'claude-haiku-4-5': { input: 1, output: 5, cacheWrite: 1.25, cacheRead: 0.10 },
101
+ 'claude-haiku-3-5': { input: 0.80, output: 4, cacheWrite: 1.00, cacheRead: 0.08 },
102
+ 'claude-haiku-3': { input: 0.25, output: 1.25, cacheWrite: 0.30, cacheRead: 0.03 },
103
+ // ── Gemini family ──
92
104
  'gemini-3.1-flash-lite-preview': { input: 0.05, output: 0.20, cacheWrite: 0, cacheRead: 0.05 },
93
105
  'gemini-3.1-pro-preview': { input: 1.25, output: 5.00, cacheWrite: 0, cacheRead: 1.25 },
94
106
  'gemini-3-pro-preview': { input: 1.25, output: 5.00, cacheWrite: 0, cacheRead: 1.25 },
@@ -100,25 +112,42 @@ const MODEL_PRICING = {
100
112
  function getModelPricing(modelId) {
101
113
  if (MODEL_PRICING[modelId])
102
114
  return MODEL_PRICING[modelId];
103
- if (modelId.includes('gemini-3.1-pro'))
115
+ const lower = (modelId || '').toLowerCase();
116
+ // Gemini variants
117
+ if (lower.includes('gemini-3.1-pro'))
104
118
  return MODEL_PRICING['gemini-3.1-pro-preview'];
105
- if (modelId.includes('gemini-3-pro'))
119
+ if (lower.includes('gemini-3-pro'))
106
120
  return MODEL_PRICING['gemini-3-pro-preview'];
107
- if (modelId.includes('gemini-3.1-flash-lite'))
121
+ if (lower.includes('gemini-3.1-flash-lite'))
108
122
  return MODEL_PRICING['gemini-3.1-flash-lite-preview'];
109
- if (modelId.includes('gemini-3-flash'))
123
+ if (lower.includes('gemini-3-flash'))
110
124
  return MODEL_PRICING['gemini-3-flash-preview'];
111
- if (modelId.includes('gemini-2.5-pro'))
125
+ if (lower.includes('gemini-2.5-pro'))
112
126
  return MODEL_PRICING['gemini-2.5-pro'];
113
- if (modelId.includes('gemini-2.5-flash-lite'))
127
+ if (lower.includes('gemini-2.5-flash-lite'))
114
128
  return MODEL_PRICING['gemini-2.5-flash-lite'];
115
- if (modelId.includes('gemini-2.5-flash'))
129
+ if (lower.includes('gemini-2.5-flash'))
116
130
  return MODEL_PRICING['gemini-2.5-flash'];
117
- if (modelId.includes('opus'))
131
+ // Claude Opus — version-aware (4.6/4.5 = $5, 4.1/4 = $15)
132
+ if (lower.includes('opus-4-6') || lower.includes('opus-4-5'))
118
133
  return MODEL_PRICING['claude-opus-4-6'];
119
- if (modelId.includes('sonnet'))
134
+ if (lower.includes('opus-4-1') || lower.includes('opus-4-2'))
135
+ return MODEL_PRICING['claude-opus-4-1'];
136
+ if (lower.includes('opus-4'))
137
+ return MODEL_PRICING['claude-opus-4'];
138
+ if (lower.includes('opus'))
139
+ return MODEL_PRICING['claude-opus-4-6']; // Default to latest
140
+ // Claude Sonnet
141
+ if (lower.includes('sonnet'))
120
142
  return MODEL_PRICING['claude-sonnet-4-6'];
121
- if (modelId.includes('haiku'))
143
+ // Claude Haiku — version-aware
144
+ if (lower.includes('haiku-4-5') || lower.includes('haiku-4'))
145
+ return MODEL_PRICING['claude-haiku-4-5'];
146
+ if (lower.includes('haiku-3-5'))
147
+ return MODEL_PRICING['claude-haiku-3-5'];
148
+ if (lower.includes('haiku-3'))
149
+ return MODEL_PRICING['claude-haiku-3'];
150
+ if (lower.includes('haiku'))
122
151
  return MODEL_PRICING['claude-haiku-4-5-20251001'];
123
152
  return MODEL_PRICING['claude-sonnet-4-6'];
124
153
  }
@@ -599,7 +628,8 @@ function buildDashboardData(params) {
599
628
  model: params.model,
600
629
  turnCount: turns.length,
601
630
  totalCost,
602
- totalTokens: totalInput + totalCacheRead + totalCacheCreate + totalOutput,
631
+ // Unique content tokens: excludes cache_read (same prefix re-billed per turn)
632
+ totalTokens: totalInput + totalCacheCreate + totalOutput,
603
633
  totalInput,
604
634
  totalCacheRead,
605
635
  totalCacheCreate,
@@ -1915,7 +1945,12 @@ async function launchDashboard(initialSessionName, initialJsonlPath, refreshMs,
1915
1945
  if (!d)
1916
1946
  return;
1917
1947
  try {
1918
- const totalTokensM = ((d.totalInput + d.totalCacheRead + d.totalCacheCreate + d.totalOutput) / 1000000).toFixed(2);
1948
+ // Show unique content tokens: output (model-generated) + cache_create (new content cached)
1949
+ // + uncached input. Excludes cache_read (same prefix re-billed each turn — NOT unique content).
1950
+ // Previously showed cumulative API total (input+read+create+output) which inflated by ~10-20x
1951
+ // because the same prefix was counted on every tool round-trip.
1952
+ const uniqueTokens = d.totalOutput + d.totalCacheCreate + d.totalInput;
1953
+ const totalTokensM = (uniqueTokens / 1000000).toFixed(2);
1919
1954
  const totalSavings = d.turns.reduce((s, t) => s + t.savings, 0);
1920
1955
  // Model routing breakdown (uses routedModel for actual model counts)
1921
1956
  const opusCount = d.turns.filter(t => t.routedModel.includes('opus')).length;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ekkos/cli",
3
- "version": "1.5.0",
3
+ "version": "1.5.2",
4
4
  "description": "ekkOS memory CLI — persistent memory for AI coding assistants (Claude Code, Gemini, Cursor, Windsurf)",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",