tokentracker-cli 0.5.79 → 0.5.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/lib/local-api.js +11 -1
- package/src/lib/rollout.js +19 -20
package/package.json
CHANGED
package/src/lib/local-api.js
CHANGED
|
@@ -106,12 +106,22 @@ function getModelPricing(model) {
|
|
|
106
106
|
|
|
107
107
|
function computeRowCost(row) {
|
|
108
108
|
const pricing = getModelPricing(row.model);
|
|
109
|
+
// For OpenAI/Codex-family rollouts, `output_tokens` already includes any
|
|
110
|
+
// reasoning tokens (the OpenAI API's `completion_tokens` is inclusive),
|
|
111
|
+
// so adding a separate `reasoning_output_tokens * output_rate` term
|
|
112
|
+
// double-charges that slice. ccusage models this the same way. For other
|
|
113
|
+
// sources we keep the explicit reasoning term because `reasoning` is not
|
|
114
|
+
// guaranteed to be folded into `output_tokens`.
|
|
115
|
+
const reasoningIncludedInOutput = row.source === "codex" || row.source === "every-code";
|
|
116
|
+
const reasoningCost = reasoningIncludedInOutput
|
|
117
|
+
? 0
|
|
118
|
+
: (row.reasoning_output_tokens || 0) * (pricing.output || 0);
|
|
109
119
|
return (
|
|
110
120
|
((row.input_tokens || 0) * (pricing.input || 0) +
|
|
111
121
|
(row.output_tokens || 0) * (pricing.output || 0) +
|
|
112
122
|
(row.cached_input_tokens || 0) * (pricing.cache_read || 0) +
|
|
113
123
|
(row.cache_creation_input_tokens || 0) * (pricing.cache_write || 0) +
|
|
114
|
-
|
|
124
|
+
reasoningCost) /
|
|
115
125
|
1_000_000
|
|
116
126
|
);
|
|
117
127
|
}
|
package/src/lib/rollout.js
CHANGED
|
@@ -2150,12 +2150,12 @@ function pickDelta(lastUsage, totalUsage, prevTotals) {
|
|
|
2150
2150
|
const hasTotal = isNonEmptyObject(totalUsage);
|
|
2151
2151
|
const hasPrevTotals = isNonEmptyObject(prevTotals);
|
|
2152
2152
|
|
|
2153
|
-
//
|
|
2154
|
-
//
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2153
|
+
// NOTE: We used to guard against "duplicate token_count records where
|
|
2154
|
+
// total_token_usage is unchanged" by returning null here. We removed that
|
|
2155
|
+
// guard to align token counts with ccusage exactly (audited against 10 days
|
|
2156
|
+
// of real rollouts). When last_token_usage is present we trust it as the
|
|
2157
|
+
// per-turn delta; when it's absent the cumulative-subtract path naturally
|
|
2158
|
+
// yields an all-zero delta on duplicates and is still filtered below.
|
|
2159
2159
|
if (!hasLast && hasTotal && hasPrevTotals && totalsReset(totalUsage, prevTotals)) {
|
|
2160
2160
|
const normalized = normalizeUsage(totalUsage);
|
|
2161
2161
|
return isAllZeroUsage(normalized) ? null : normalized;
|
|
@@ -2203,6 +2203,19 @@ function normalizeUsage(u) {
|
|
|
2203
2203
|
const n = Number(u[k] || 0);
|
|
2204
2204
|
out[k] = Number.isFinite(n) && n >= 0 ? Math.floor(n) : 0;
|
|
2205
2205
|
}
|
|
2206
|
+
// Codex rollouts (and Every Code, which shares the format) report
|
|
2207
|
+
// `input_tokens` as the TOTAL prompt, with `cached_input_tokens` as the
|
|
2208
|
+
// cached subset — i.e. the cached slice is INSIDE the input count. Our
|
|
2209
|
+
// queue schema (CLAUDE.md → Token Normalization Convention) stores
|
|
2210
|
+
// `input_tokens` as pure non-cached input and `cached_input_tokens`
|
|
2211
|
+
// separately. Without this subtraction the cost formula bills the cached
|
|
2212
|
+
// bytes twice: once at the full input rate and again at the cache_read
|
|
2213
|
+
// rate, producing ~6–7x cost inflation on cache-heavy Codex sessions
|
|
2214
|
+
// (verified against ccusage's per-day numbers on the same rollouts).
|
|
2215
|
+
// We intentionally leave `total_tokens` unchanged: Codex reports
|
|
2216
|
+
// total = input(inclusive of cached) + output, which numerically equals
|
|
2217
|
+
// our schema's non_cached + cached + output + 0 (cache_creation=0 here).
|
|
2218
|
+
out.input_tokens = Math.max(0, out.input_tokens - out.cached_input_tokens);
|
|
2206
2219
|
return out;
|
|
2207
2220
|
}
|
|
2208
2221
|
|
|
@@ -2241,20 +2254,6 @@ function isAllZeroUsage(u) {
|
|
|
2241
2254
|
return true;
|
|
2242
2255
|
}
|
|
2243
2256
|
|
|
2244
|
-
function sameUsage(a, b) {
|
|
2245
|
-
for (const k of [
|
|
2246
|
-
"input_tokens",
|
|
2247
|
-
"cached_input_tokens",
|
|
2248
|
-
"cache_creation_input_tokens",
|
|
2249
|
-
"output_tokens",
|
|
2250
|
-
"reasoning_output_tokens",
|
|
2251
|
-
"total_tokens",
|
|
2252
|
-
]) {
|
|
2253
|
-
if (toNonNegativeInt(a?.[k]) !== toNonNegativeInt(b?.[k])) return false;
|
|
2254
|
-
}
|
|
2255
|
-
return true;
|
|
2256
|
-
}
|
|
2257
|
-
|
|
2258
2257
|
function totalsReset(curr, prev) {
|
|
2259
2258
|
const currTotal = curr?.total_tokens;
|
|
2260
2259
|
const prevTotal = prev?.total_tokens;
|