tokentracker-cli 0.5.79 → 0.5.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tokentracker-cli",
3
- "version": "0.5.79",
3
+ "version": "0.5.80",
4
4
  "description": "Token usage tracker for AI agent CLIs (Claude Code, Codex, Cursor, Kiro, Gemini, OpenCode, OpenClaw, Hermes, GitHub Copilot)",
5
5
  "main": "src/cli.js",
6
6
  "bin": {
@@ -106,12 +106,22 @@ function getModelPricing(model) {
106
106
 
107
107
  function computeRowCost(row) {
108
108
  const pricing = getModelPricing(row.model);
109
+ // For OpenAI/Codex-family rollouts, `output_tokens` already includes any
110
+ // reasoning tokens (the OpenAI API's `completion_tokens` is inclusive),
111
+ // so adding a separate `reasoning_output_tokens * output_rate` term
112
+ // double-charges that slice. ccusage models this the same way. For other
113
+ // sources we keep the explicit reasoning term because `reasoning` is not
114
+ // guaranteed to be folded into `output_tokens`.
115
+ const reasoningIncludedInOutput = row.source === "codex" || row.source === "every-code";
116
+ const reasoningCost = reasoningIncludedInOutput
117
+ ? 0
118
+ : (row.reasoning_output_tokens || 0) * (pricing.output || 0);
109
119
  return (
110
120
  ((row.input_tokens || 0) * (pricing.input || 0) +
111
121
  (row.output_tokens || 0) * (pricing.output || 0) +
112
122
  (row.cached_input_tokens || 0) * (pricing.cache_read || 0) +
113
123
  (row.cache_creation_input_tokens || 0) * (pricing.cache_write || 0) +
114
- (row.reasoning_output_tokens || 0) * (pricing.output || 0)) /
124
+ reasoningCost) /
115
125
  1_000_000
116
126
  );
117
127
  }
@@ -2150,12 +2150,12 @@ function pickDelta(lastUsage, totalUsage, prevTotals) {
2150
2150
  const hasTotal = isNonEmptyObject(totalUsage);
2151
2151
  const hasPrevTotals = isNonEmptyObject(prevTotals);
2152
2152
 
2153
- // Codex rollout logs sometimes emit duplicate token_count records where total_token_usage does not
2154
- // change between adjacent entries. Counting last_token_usage in those cases will double-count.
2155
- if (hasTotal && hasPrevTotals && sameUsage(totalUsage, prevTotals)) {
2156
- return null;
2157
- }
2158
-
2153
+ // NOTE: We used to guard against "duplicate token_count records where
2154
+ // total_token_usage is unchanged" by returning null here. We removed that
2155
+ // guard to align token counts with ccusage exactly (audited against 10 days
2156
+ // of real rollouts). When last_token_usage is present we trust it as the
2157
+ // per-turn delta; when it's absent the cumulative-subtract path naturally
2158
+ // yields an all-zero delta on duplicates and is still filtered below.
2159
2159
  if (!hasLast && hasTotal && hasPrevTotals && totalsReset(totalUsage, prevTotals)) {
2160
2160
  const normalized = normalizeUsage(totalUsage);
2161
2161
  return isAllZeroUsage(normalized) ? null : normalized;
@@ -2203,6 +2203,19 @@ function normalizeUsage(u) {
2203
2203
  const n = Number(u[k] || 0);
2204
2204
  out[k] = Number.isFinite(n) && n >= 0 ? Math.floor(n) : 0;
2205
2205
  }
2206
+ // Codex rollouts (and Every Code, which shares the format) report
2207
+ // `input_tokens` as the TOTAL prompt, with `cached_input_tokens` as the
2208
+ // cached subset — i.e. the cached slice is INSIDE the input count. Our
2209
+ // queue schema (CLAUDE.md → Token Normalization Convention) stores
2210
+ // `input_tokens` as pure non-cached input and `cached_input_tokens`
2211
+ // separately. Without this subtraction the cost formula bills the cached
2212
+ // bytes twice: once at the full input rate and again at the cache_read
2213
+ // rate, producing ~6–7x cost inflation on cache-heavy Codex sessions
2214
+ // (verified against ccusage's per-day numbers on the same rollouts).
2215
+ // We intentionally leave `total_tokens` unchanged: Codex reports
2216
+ // total = input(inclusive of cached) + output, which numerically equals
2217
+ // our schema's non_cached + cached + output + 0 (cache_creation=0 here).
2218
+ out.input_tokens = Math.max(0, out.input_tokens - out.cached_input_tokens);
2206
2219
  return out;
2207
2220
  }
2208
2221
 
@@ -2241,20 +2254,6 @@ function isAllZeroUsage(u) {
2241
2254
  return true;
2242
2255
  }
2243
2256
 
2244
- function sameUsage(a, b) {
2245
- for (const k of [
2246
- "input_tokens",
2247
- "cached_input_tokens",
2248
- "cache_creation_input_tokens",
2249
- "output_tokens",
2250
- "reasoning_output_tokens",
2251
- "total_tokens",
2252
- ]) {
2253
- if (toNonNegativeInt(a?.[k]) !== toNonNegativeInt(b?.[k])) return false;
2254
- }
2255
- return true;
2256
- }
2257
-
2258
2257
  function totalsReset(curr, prev) {
2259
2258
  const currTotal = curr?.total_tokens;
2260
2259
  const prevTotal = prev?.total_tokens;