npm - tokentracker-cli - Versions diffs - 0.5.92 → 0.5.94 - Mend

tokentracker-cli 0.5.92 → 0.5.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +7 -6
package/dashboard/dist/assets/{main-CHSJgtKj.js → main-BvlEZHQ6.js} +179 -178
package/dashboard/dist/index.html +1 -1
package/dashboard/dist/share.html +1 -1
package/package.json +3 -1
package/src/commands/init.js +41 -2
package/src/commands/serve.js +11 -0
package/src/commands/status.js +31 -2
package/src/commands/sync.js +28 -0
package/src/commands/uninstall.js +18 -1
package/src/lib/claude-config.js +16 -2
package/src/lib/local-api.js +11 -116
package/src/lib/pricing/curated-overrides.json +33 -0
package/src/lib/pricing/index.js +135 -0
package/src/lib/pricing/litellm-fetcher.js +172 -0
package/src/lib/pricing/matcher.js +149 -0
package/src/lib/pricing/seed-snapshot.json +1 -0
package/src/lib/rollout.js +284 -0
package/src/lib/tracker-paths.js +1 -0

package/src/lib/pricing/index.js ADDED Viewed

@@ -0,0 +1,135 @@
+// Public pricing API. Replaces the hard-coded MODEL_PRICING table that used
+// to live in src/lib/local-api.js. Keeps the same synchronous shape so all
+// existing callers (computeRowCost, /functions/* handlers, tests) work
+// unchanged after `await ensurePricingLoaded()` is awaited once at startup.
+const fs = require("node:fs");
+const path = require("node:path");
+const os = require("node:os");
+const curatedOverrides = require("./curated-overrides.json");
+const {
+  lookupPricing,
+  buildLitellmPerMillionMap,
+} = require("./matcher");
+const { loadLitellmData } = require("./litellm-fetcher");
+const ZERO_PRICING = { input: 0, output: 0, cache_read: 0, cache_write: 0 };
+const SEED_SNAPSHOT_PATH = path.resolve(__dirname, "seed-snapshot.json");
+// Sync seed load. Done at require-time so callers that haven't awaited
+// ensurePricingLoaded() (e.g. tests, vite mock startup, edge functions) still
+// get LiteLLM-backed pricing instead of all-zero. ensurePricingLoaded() will
+// later upgrade this to fresh disk cache or upstream data.
+function loadSeedSync() {
+  try {
+    const raw = fs.readFileSync(SEED_SNAPSHOT_PATH, "utf8");
+    const parsed = JSON.parse(raw);
+    delete parsed._meta;
+    return parsed;
+  } catch (e) {
+    return {};
+  }
+}
+const seedRaw = loadSeedSync();
+const state = {
+  loaded: false,
+  loadingPromise: null,
+  litellmRawMap: seedRaw, // raw per-token; field shape from LiteLLM JSON
+  litellmPerMillionMap: buildLitellmPerMillionMap(seedRaw), // USD/MTok
+  source: Object.keys(seedRaw).length ? "seed-snapshot:sync" : null,
+  // negativeCache prevents re-walking the LiteLLM map for models we've already
+  // determined are unknown. Cleared on every reload.
+  negativeCache: new Set(),
+};
+function defaultCachePath() {
+  return path.join(os.homedir(), ".tokentracker", "cache", "pricing.json");
+}
+async function ensurePricingLoaded(opts = {}) {
+  if (state.loaded) return state;
+  if (state.loadingPromise) return state.loadingPromise;
+  state.loadingPromise = (async () => {
+    try {
+      const cachePath = opts.cachePath || defaultCachePath();
+      const { data, source } = await loadLitellmData({ ...opts, cachePath });
+      state.litellmRawMap = data || {};
+      state.litellmPerMillionMap = buildLitellmPerMillionMap(state.litellmRawMap);
+      state.source = source;
+      state.loaded = true;
+      state.negativeCache.clear();
+      return state;
+    } finally {
+      state.loadingPromise = null;
+    }
+  })();
+  return state.loadingPromise;
+}
+// For tests: drop loaded state so a fresh call can re-load. Seeds with the
+// bundled snapshot so getModelPricing() still works without ensurePricingLoaded.
+function resetPricingForTests() {
+  state.loaded = false;
+  state.loadingPromise = null;
+  state.litellmRawMap = seedRaw;
+  state.litellmPerMillionMap = buildLitellmPerMillionMap(seedRaw);
+  state.source = Object.keys(seedRaw).length ? "seed-snapshot:sync" : null;
+  state.negativeCache.clear();
+}
+function getModelPricing(model) {
+  if (!model) return ZERO_PRICING;
+  if (state.negativeCache.has(model)) return ZERO_PRICING;
+  const result = lookupPricing(model, {
+    curated: curatedOverrides,
+    litellm: state.litellmPerMillionMap,
+  });
+  if (result.hit) return result.value;
+  state.negativeCache.add(model);
+  return ZERO_PRICING;
+}
+// Same formula and Codex/every-code reasoning-folding rule as the previous
+// computeRowCost in src/lib/local-api.js. Moved here so vite mock + local
+// server share one source of truth.
+function computeRowCost(row) {
+  const pricing = getModelPricing(row.model);
+  const reasoningIncludedInOutput = row.source === "codex" || row.source === "every-code";
+  const reasoningCost = reasoningIncludedInOutput
+    ? 0
+    : (row.reasoning_output_tokens || 0) * (pricing.output || 0);
+  return (
+    ((row.input_tokens || 0) * (pricing.input || 0) +
+      (row.output_tokens || 0) * (pricing.output || 0) +
+      (row.cached_input_tokens || 0) * (pricing.cache_read || 0) +
+      (row.cache_creation_input_tokens || 0) * (pricing.cache_write || 0) +
+      reasoningCost) /
+    1_000_000
+  );
+}
+// Backwards-compatible MODEL_PRICING export. Test at
+// test/model-breakdown.test.js:236 reads `localApi.MODEL_PRICING["kiro-agent"]`
+// and expects { input, output, cache_read, cache_write } shape. We expose the
+// CURATED.exact map (which contains the kiro entries by design); LiteLLM
+// entries are NOT included here because they're keyed dynamically and the old
+// table was authoritative for what is now CURATED.
+const MODEL_PRICING = curatedOverrides.exact;
+module.exports = {
+  ensurePricingLoaded,
+  getModelPricing,
+  computeRowCost,
+  resetPricingForTests,
+  MODEL_PRICING,
+  ZERO_PRICING,
+  // Internal hooks for tests.
+  __getStateForTests: () => state,
+};

package/src/lib/pricing/litellm-fetcher.js ADDED Viewed

@@ -0,0 +1,172 @@
+// LiteLLM data loader: 24h disk cache + bundled seed snapshot fallback.
+// Fetches from upstream once when cache is missing or stale, then keeps a
+// per-process in-memory map. fetchModelPricing() is async; subsequent reads
+// (lookupPricing in matcher.js) operate on the in-memory map synchronously.
+const fs = require("node:fs");
+const fsp = require("node:fs/promises");
+const path = require("node:path");
+const LITELLM_PRICING_URL =
+  "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
+const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000; // 24h
+const DEFAULT_FETCH_TIMEOUT_MS = 10_000;
+const SEED_SNAPSHOT_PATH = path.resolve(__dirname, "seed-snapshot.json");
+function readJsonSync(p) {
+  const raw = fs.readFileSync(p, "utf8");
+  return JSON.parse(raw);
+}
+async function readJsonAsync(p) {
+  const raw = await fsp.readFile(p, "utf8");
+  return JSON.parse(raw);
+}
+function isFresh(stat, ttlMs) {
+  if (!stat) return false;
+  return Date.now() - stat.mtimeMs < ttlMs;
+}
+async function statSafe(p) {
+  try {
+    return await fsp.stat(p);
+  } catch (e) {
+    if (e?.code === "ENOENT") return null;
+    throw e;
+  }
+}
+async function loadSeedSnapshot() {
+  // Sync read is fine — file is bundled and small (~250KB). Falling back to
+  // sync avoids a race with caller's synchronous lookup if seed must answer
+  // immediately.
+  try {
+    return readJsonSync(SEED_SNAPSHOT_PATH);
+  } catch (e) {
+    // Tolerate missing seed in dev environments where the build script
+    // hasn't run yet. Empty data = LiteLLM lookup miss = falls back to
+    // CURATED only.
+    return {};
+  }
+}
+async function fetchUpstream({ url = LITELLM_PRICING_URL, timeoutMs = DEFAULT_FETCH_TIMEOUT_MS } = {}) {
+  const ctrl = new AbortController();
+  const timer = setTimeout(() => ctrl.abort(), timeoutMs);
+  try {
+    const res = await fetch(url, { signal: ctrl.signal });
+    if (!res.ok) {
+      throw new Error(`LiteLLM fetch failed: HTTP ${res.status} ${res.statusText}`);
+    }
+    return await res.json();
+  } finally {
+    clearTimeout(timer);
+  }
+}
+async function writeCache(cachePath, data) {
+  await fsp.mkdir(path.dirname(cachePath), { recursive: true });
+  // Persist only the slimmed shape (4 cost fields) to keep disk small and
+  // make the cache file easy to inspect/edit.
+  const slim = {};
+  let kept = 0;
+  for (const [name, entry] of Object.entries(data)) {
+    if (!entry || typeof entry !== "object" || name.startsWith("_")) continue;
+    const out = {};
+    let hasAny = false;
+    for (const f of [
+      "input_cost_per_token",
+      "output_cost_per_token",
+      "cache_read_input_token_cost",
+      "cache_creation_input_token_cost",
+    ]) {
+      const v = entry[f];
+      if (typeof v === "number" && Number.isFinite(v)) {
+        out[f] = v;
+        hasAny = true;
+      }
+    }
+    if (hasAny) {
+      slim[name] = out;
+      kept++;
+    }
+  }
+  const payload = {
+    _meta: {
+      source: LITELLM_PRICING_URL,
+      cached_at: new Date().toISOString(),
+      kept_models: kept,
+    },
+    ...slim,
+  };
+  await fsp.writeFile(cachePath, JSON.stringify(payload) + "\n");
+  return slim;
+}
+// Public: load LiteLLM data into memory. Resolution chain:
+//   1. disk cache (if mtime < ttl)
+//   2. fetch upstream + write disk cache
+//   3. stale disk cache (network failed)
+//   4. bundled seed snapshot (fresh install / offline)
+async function loadLitellmData({
+  cachePath,
+  ttlMs = DEFAULT_TTL_MS,
+  fetchTimeoutMs = DEFAULT_FETCH_TIMEOUT_MS,
+  fetchImpl = fetchUpstream,
+  url = LITELLM_PRICING_URL,
+  logger = null,
+} = {}) {
+  if (!cachePath) {
+    throw new Error("loadLitellmData: cachePath is required");
+  }
+  const log = (level, msg) => {
+    if (logger && typeof logger[level] === "function") logger[level](msg);
+  };
+  // 1. Fresh disk cache
+  const stat = await statSafe(cachePath);
+  if (isFresh(stat, ttlMs)) {
+    try {
+      const data = await readJsonAsync(cachePath);
+      delete data._meta;
+      return { data, source: "disk-cache" };
+    } catch (e) {
+      log("warn", `[pricing] disk cache unreadable: ${e?.message || e}`);
+    }
+  }
+  // 2. Fetch upstream
+  try {
+    const upstream = await fetchImpl({ url, timeoutMs: fetchTimeoutMs });
+    const slim = await writeCache(cachePath, upstream);
+    return { data: slim, source: "upstream" };
+  } catch (e) {
+    log("warn", `[pricing] upstream fetch failed: ${e?.message || e}`);
+  }
+  // 3. Stale disk cache (better than seed)
+  if (stat) {
+    try {
+      const data = await readJsonAsync(cachePath);
+      delete data._meta;
+      log("warn", "[pricing] using stale disk cache");
+      return { data, source: "stale-cache" };
+    } catch (e) {
+      log("warn", `[pricing] stale cache unreadable: ${e?.message || e}`);
+    }
+  }
+  // 4. Bundled seed snapshot
+  const seed = await loadSeedSnapshot();
+  delete seed._meta;
+  return { data: seed, source: "seed-snapshot" };
+}
+module.exports = {
+  LITELLM_PRICING_URL,
+  DEFAULT_TTL_MS,
+  loadLitellmData,
+  loadSeedSnapshot,
+  fetchUpstream,
+};

package/src/lib/pricing/matcher.js ADDED Viewed

@@ -0,0 +1,149 @@
+// Pure pricing-lookup logic. No I/O, no async. Tested in isolation.
+//
+// Resolve order:
+//   1. CURATED exact match (self-defined aliases like kiro-*, hy3-*)
+//   2. LiteLLM exact match (mainstream claude/gpt-5/gemini)
+//   3. CURATED alias (e.g. "auto" -> "composer-1")
+//   4. CURATED fuzzy substring (e.g. "kiro-future-xyz" matches via "kiro")
+//   5. LiteLLM suffix-strip (gpt-5-codex-high-fast -> gpt-5-codex)
+//   6. LiteLLM reverse substring (longest-key first)
+//   7. null  (caller decides what to do — typically zero-pricing + negative cache)
+const SUFFIX_STRIP_PATTERNS = [
+  /-xhigh-fast$/,
+  /-high-fast$/,
+  /-medium-fast$/,
+  /-low-fast$/,
+  /-xhigh$/,
+  /-high$/,
+  /-medium$/,
+  /-low$/,
+  /-fast$/,
+];
+function stripReasoningSuffix(model) {
+  for (const re of SUFFIX_STRIP_PATTERNS) {
+    if (re.test(model)) return model.replace(re, "");
+  }
+  return model;
+}
+// Memoise the sorted-by-length LiteLLM key list. Reverse-substring scan walks
+// this once per uncached model; ~2k keys × negligible per-iteration cost, but
+// computing the sort on every call would add up across a sync.
+const sortedKeysCache = new WeakMap();
+function getSortedKeys(litellm) {
+  let cached = sortedKeysCache.get(litellm);
+  if (!cached) {
+    cached = Object.keys(litellm).sort((a, b) => b.length - a.length);
+    sortedKeysCache.set(litellm, cached);
+  }
+  return cached;
+}
+function lookupPricing(model, { curated, litellm }) {
+  if (!model || typeof model !== "string") {
+    return { hit: false, source: "empty", value: null };
+  }
+  const lower = model.toLowerCase();
+  // 1. CURATED exact
+  if (curated.exact && curated.exact[model]) {
+    return { hit: true, source: "curated:exact", value: curated.exact[model] };
+  }
+  // 2. LiteLLM exact
+  if (litellm && litellm[model]) {
+    return { hit: true, source: "litellm:exact", value: litellm[model] };
+  }
+  // 3. CURATED alias (literal mapping like "auto" -> "composer-1")
+  if (curated.alias && curated.alias[model] && curated.exact[curated.alias[model]]) {
+    return {
+      hit: true,
+      source: "curated:alias",
+      value: curated.exact[curated.alias[model]],
+    };
+  }
+  // 4. CURATED fuzzy substring
+  if (Array.isArray(curated.fuzzy)) {
+    for (const { match, ref } of curated.fuzzy) {
+      if (!match || !ref) continue;
+      if (lower.includes(match.toLowerCase()) && curated.exact[ref]) {
+        return { hit: true, source: "curated:fuzzy", value: curated.exact[ref] };
+      }
+    }
+  }
+  // 5. LiteLLM suffix-strip
+  if (litellm) {
+    const stripped = stripReasoningSuffix(model);
+    if (stripped !== model && litellm[stripped]) {
+      return { hit: true, source: "litellm:strip", value: litellm[stripped] };
+    }
+  }
+  // 6. LiteLLM reverse substring (longest-key first)
+  if (litellm) {
+    const sorted = getSortedKeys(litellm);
+    for (const key of sorted) {
+      const keyLower = key.toLowerCase();
+      // Only accept if model is a superset of key (model contains key), to
+      // avoid e.g. "gpt-5" matching "gpt-5-pro" in the wrong direction.
+      if (lower.includes(keyLower)) {
+        return { hit: true, source: "litellm:fuzzy", value: litellm[key] };
+      }
+    }
+  }
+  return { hit: false, source: "miss", value: null };
+}
+// Convert one LiteLLM entry (per-token) to internal per-million USD shape.
+// Missing fields stay missing — callers default with `(pricing.x || 0)`.
+//
+// Why the round: floating-point math means 1e-7 * 1e6 = 0.09999999999999999.
+// Rounding to 10 significant decimals ($0.0000000001 / MTok) is well below
+// any realistic price step but cleans up the printed/asserted numbers.
+function roundToTenDecimals(n) {
+  return Math.round(n * 1e10) / 1e10;
+}
+function convertLitellmEntry(entry) {
+  if (!entry || typeof entry !== "object") return null;
+  const out = {};
+  if (typeof entry.input_cost_per_token === "number") {
+    out.input = roundToTenDecimals(entry.input_cost_per_token * 1_000_000);
+  }
+  if (typeof entry.output_cost_per_token === "number") {
+    out.output = roundToTenDecimals(entry.output_cost_per_token * 1_000_000);
+  }
+  if (typeof entry.cache_read_input_token_cost === "number") {
+    out.cache_read = roundToTenDecimals(entry.cache_read_input_token_cost * 1_000_000);
+  }
+  if (typeof entry.cache_creation_input_token_cost === "number") {
+    out.cache_write = roundToTenDecimals(entry.cache_creation_input_token_cost * 1_000_000);
+  }
+  return Object.keys(out).length ? out : null;
+}
+// Build a per-million-USD map from a LiteLLM raw map (or seed snapshot which
+// uses the same field names). Skips meta keys starting with "_".
+function buildLitellmPerMillionMap(rawData) {
+  if (!rawData || typeof rawData !== "object") return {};
+  const out = {};
+  for (const [name, entry] of Object.entries(rawData)) {
+    if (name.startsWith("_")) continue;
+    const converted = convertLitellmEntry(entry);
+    if (converted) out[name] = converted;
+  }
+  return out;
+}
+module.exports = {
+  lookupPricing,
+  stripReasoningSuffix,
+  convertLitellmEntry,
+  buildLitellmPerMillionMap,
+};