npm - modelmeter-collect - Versions diffs - 0.3.0 → 0.5.0 - Mend

modelmeter-collect 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -28,6 +28,16 @@ MODELMETER_DRYRUN=1 npx modelmeter-collect
 `init` writes `~/.modelmeter/config.json` (chmod 600) with your token and the ingest URL.
 Prefer env vars? Set `MODELMETER_TOKEN` and `MODELMETER_INGEST_URL` and skip `init`.
+## Check your setup
+```bash
+npx modelmeter-collect doctor            # which logs were found, last activity, config
+npx modelmeter-collect doctor --payload  # + the exact JSON that would be sent
+```
+`doctor` confirms it found your Claude Code and Codex logs and shows precisely what leaves
+your machine: model names, token counts, and tool/MCP names only. Never prompts or keys.
 ## Keep it live (per prompt)
 **Claude Code** — add a `Stop` hook (fires after every response). It passes the session

package/cli.mjs CHANGED Viewed

@@ -6,10 +6,19 @@
 //   npx modelmeter-collect init <mm_live_token>   # one-time: save the token
 //   npx modelmeter-collect                          # scan local logs and report
 //   MODELMETER_DRYRUN=1 npx modelmeter-collect      # preview without sending
-import { readFileSync, writeFileSync, mkdirSync, existsSync, chmodSync } from 'node:fs'
+import {
+  readFileSync,
+  writeFileSync,
+  mkdirSync,
+  existsSync,
+  chmodSync,
+  readdirSync,
+  statSync,
+} from 'node:fs'
 import { homedir } from 'node:os'
 import { join, dirname } from 'node:path'
 import { fileURLToPath } from 'node:url'
+import { formatDoctorReport } from './lib.mjs'
 const HOME = homedir()
 const MM_DIR = join(HOME, '.modelmeter')
@@ -47,12 +56,16 @@ function printHelp() {
 Usage:
   npx modelmeter-collect init <token> [--url <ingest-url>]
+  npx modelmeter-collect doctor [--payload]
   npx modelmeter-collect                 scan local logs and report
   npx modelmeter-collect --help
 Commands:
   init    Save your ingest token to ~/.modelmeter/config.json (chmod 600).
           Pass the token as an argument or via MODELMETER_TOKEN.
+  doctor  Check your setup: which logs were found, last activity, config
+          status, and exactly what would be sent. Add --payload for the raw
+          JSON (token counts only, never transcript text).
   (none)  Scan Claude Code + Codex logs and report token counts. Deduped,
           so it is safe to run repeatedly. MODELMETER_DRYRUN=1 previews only.
@@ -93,5 +106,70 @@ if (cmd === 'init' || cmd === 'setup') {
   process.exit(0)
 }
+// Count .jsonl session files (recent + newest mtime) under a logs directory.
+function discoverLogs(dir, cutoffMs) {
+  try {
+    statSync(dir)
+  } catch {
+    return { dir, found: false }
+  }
+  let recentCount = 0
+  let lastWriteMs = 0
+  const stack = [dir]
+  while (stack.length) {
+    const d = stack.pop()
+    let entries = []
+    try {
+      entries = readdirSync(d, { withFileTypes: true })
+    } catch {
+      continue
+    }
+    for (const e of entries) {
+      const p = join(d, e.name)
+      if (e.isDirectory()) stack.push(p)
+      else if (e.isFile() && p.endsWith('.jsonl')) {
+        let m = 0
+        try {
+          m = statSync(p).mtimeMs
+        } catch {
+          continue
+        }
+        if (m > lastWriteMs) lastWriteMs = m
+        if (m >= cutoffMs) recentCount++
+      }
+    }
+  }
+  return { dir, found: true, recentCount, lastWriteMs }
+}
+if (cmd === 'doctor') {
+  const cfg = readConfig()
+  const lookbackDays = 14
+  const nowMs = Date.now()
+  const cutoffMs = nowMs - lookbackDays * 86_400_000
+  console.log(
+    formatDoctorReport({
+      configPath: CONFIG_PATH,
+      configFound: existsSync(CONFIG_PATH),
+      token: process.env.MODELMETER_TOKEN || cfg.token,
+      ingestUrl: process.env.MODELMETER_INGEST_URL || cfg.ingestUrl,
+      lookbackDays,
+      nowMs,
+      claude: discoverLogs(join(HOME, '.claude', 'projects'), cutoffMs),
+      codex: discoverLogs(join(HOME, '.codex', 'sessions'), cutoffMs),
+    }),
+  )
+  if (args.includes('--payload')) {
+    console.log('\nNext batch (dry run, nothing is sent):')
+    process.env.MODELMETER_DRYRUN = '1'
+    await runCollector() // prints the exact payload (counts only), then exits
+  } else {
+    console.log(
+      '\nRun `npx modelmeter-collect doctor --payload` to preview the exact JSON that would be sent.',
+    )
+    process.exit(0)
+  }
+}
 // Default: scan and report.
 await runCollector()

package/collect.mjs CHANGED Viewed

@@ -9,15 +9,48 @@
 //
 // Config: MODELMETER_TOKEN + MODELMETER_INGEST_URL from env, or ~/.modelmeter/config.json
 //   { "token": "mm_live_...", "ingestUrl": "https://<ref>.supabase.co/functions/v1/ingest" }
-import { readFileSync, writeFileSync, mkdirSync, existsSync, statSync, readdirSync } from 'node:fs'
+import {
+  readFileSync,
+  writeFileSync,
+  mkdirSync,
+  existsSync,
+  statSync,
+  readdirSync,
+  chmodSync,
+} from 'node:fs'
 import { homedir } from 'node:os'
 import { join } from 'node:path'
+import {
+  findLastTokenCount,
+  codexToolFromEvent,
+  deriveHourlyUrl,
+  claudeEventFromLine,
+  codexDelta,
+  aggregateDaily,
+  aggregateHourly,
+  aggregateTools,
+} from './lib.mjs'
 const HOME = homedir()
 const MM_DIR = join(HOME, '.modelmeter')
 const STATE_PATH = join(MM_DIR, 'collector-state.json')
 const CONFIG_PATH = join(MM_DIR, 'config.json')
-const LOOKBACK_DAYS = Number(process.env.MODELMETER_LOOKBACK_DAYS) || 14
+// Clamp the lookback to a sane range so a bad env var cannot scan nothing
+// (negative) or traverse months of logs (huge).
+const RAW_LOOKBACK = Number(process.env.MODELMETER_LOOKBACK_DAYS)
+const LOOKBACK_DAYS =
+  Number.isFinite(RAW_LOOKBACK) && RAW_LOOKBACK > 0 ? Math.min(RAW_LOOKBACK, 90) : 14
+if (
+  process.env.MODELMETER_LOOKBACK_DAYS !== undefined &&
+  (!Number.isFinite(RAW_LOOKBACK) || RAW_LOOKBACK <= 0 || RAW_LOOKBACK > 90)
+) {
+  console.error(`modelmeter: MODELMETER_LOOKBACK_DAYS out of range, using ${LOOKBACK_DAYS}`)
+}
+const FETCH_TIMEOUT_MS = 8000
+// Cap the per-message dedup set so the state file cannot grow without bound. Older
+// entries fall out of the lookback window, so dropping them is safe.
+const CLAUDE_STATE_CAP = 200_000
 let cfg = {}
 try {
@@ -29,6 +62,23 @@ const TOKEN = process.env.MODELMETER_TOKEN || cfg.token
 const INGEST_URL = process.env.MODELMETER_INGEST_URL || cfg.ingestUrl
 if (!TOKEN || !INGEST_URL) process.exit(0) // not configured: do nothing, never block
+// POST JSON with a hard timeout so a stuck network path can never hang a Stop
+// hook or pile up scheduled collectors. Callers handle the thrown abort/error.
+async function postJson(url, body) {
+  const controller = new AbortController()
+  const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS)
+  try {
+    return await fetch(url, {
+      method: 'POST',
+      headers: { Authorization: `Bearer ${TOKEN}`, 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+      signal: controller.signal,
+    })
+  } finally {
+    clearTimeout(timer)
+  }
+}
 let state = { claude: {}, codex: {} }
 try {
   state = { claude: {}, codex: {}, ...JSON.parse(readFileSync(STATE_PATH, 'utf8')) }
@@ -86,6 +136,9 @@ function recentFiles(dir, limit = Infinity) {
 }
 const events = []
+// Codex tool attribution is computed per session (its token accounting is periodic,
+// not per-turn), so it is collected here and folded into the tool aggregation later.
+const codexToolDeltas = []
 // --- Claude Code: assistant turns carry message.usage; dedup by message uuid.
 function scanClaude(files) {
@@ -104,53 +157,25 @@ function scanClaude(files) {
       } catch {
         continue
       }
-      const msg = o.message
-      if (!msg || msg.role !== 'assistant' || !msg.usage) continue
-      const id = o.uuid || `${o.timestamp ?? ''}:${msg.id ?? ''}`
-      if (!id || state.claude[id]) continue
-      state.claude[id] = 1
-      const u = msg.usage
-      const toolNames = Array.isArray(msg.content)
-        ? msg.content.filter((b) => b && b.type === 'tool_use').map((b) => b.name).filter(Boolean)
-        : []
-      events.push({
-        provider: 'anthropic',
-        model: msg.model || 'claude-unknown',
-        occurredOn: (o.timestamp || '').slice(0, 10) || undefined,
-        occurredAt: o.timestamp || undefined,
-        tools: toolNames,
-        uncachedInputTokens: u.input_tokens || 0,
-        cacheReadInputTokens: u.cache_read_input_tokens || 0,
-        cacheCreationInputTokens: u.cache_creation_input_tokens || 0,
-        outputTokens: u.output_tokens || 0,
-        numRequests: 1,
-      })
+      const ev = claudeEventFromLine(o)
+      if (!ev || !ev.id || state.claude[ev.id]) continue
+      state.claude[ev.id] = 1
+      events.push(ev)
     }
   }
 }
 // --- Codex: cumulative token_count events; report per-session delta.
-function findLastTokenCount(obj) {
-  let last = null
-  const stack = [obj]
-  while (stack.length) {
-    const d = stack.pop()
-    if (Array.isArray(d)) stack.push(...d)
-    else if (d && typeof d === 'object') {
-      if (d.type === 'token_count' && d.info?.total_token_usage) last = d.info.total_token_usage
-      for (const v of Object.values(d)) stack.push(v)
-    }
-  }
-  return last
-}
 function scanCodex(files) {
   for (const file of files) {
     const m = file.match(/rollout-(\d{4}-\d{2}-\d{2})T[\d-]+-([0-9a-f-]+)\.jsonl$/)
     if (!m) continue
-    const date = m[1]
     const sessionId = m[2]
     let totals = null
+    let totalsTs = null // timestamp of the latest token_count event, for the 5-hour window
+    let maxTs = ''
     let model = 'gpt-5'
+    const toolCalls = [] // { ts, group }
     let text = ''
     try {
       text = readFileSync(file, 'utf8')
@@ -165,34 +190,28 @@ function scanCodex(files) {
       } catch {
         continue
       }
-      const t = findLastTokenCount(o)
-      if (t) totals = t
+      const ts = typeof o.timestamp === 'string' ? o.timestamp : null
+      if (ts && ts > maxTs) maxTs = ts
+      const p = o.payload || o
+      const ptype = p.type || o.type
       if (typeof o.model === 'string') model = o.model
-      else if (typeof o.payload?.model === 'string') model = o.payload.model
+      else if (typeof p.model === 'string') model = p.model
+      const tc = findLastTokenCount(o)
+      if (tc) {
+        totals = tc
+        if (ts) totalsTs = ts
+      }
+      const g = codexToolFromEvent(p, ptype)
+      if (g) toolCalls.push({ ts: ts || totalsTs || '', group: g })
     }
-    if (!totals) continue
-    const prev = state.codex[sessionId] || {
-      input_tokens: 0,
-      cached_input_tokens: 0,
-      output_tokens: 0,
-      reasoning_output_tokens: 0,
-    }
-    const dInput = Math.max(0, (totals.input_tokens || 0) - prev.input_tokens)
-    const dCached = Math.max(0, (totals.cached_input_tokens || 0) - prev.cached_input_tokens)
-    const dOut = Math.max(0, (totals.output_tokens || 0) - prev.output_tokens)
-    const dReason = Math.max(0, (totals.reasoning_output_tokens || 0) - prev.reasoning_output_tokens)
-    if (dInput + dCached + dOut + dReason > 0) {
-      events.push({
-        provider: 'openai',
-        model,
-        occurredOn: date,
-        uncachedInputTokens: Math.max(0, dInput - dCached),
-        cacheReadInputTokens: dCached,
-        cacheCreationInputTokens: 0,
-        outputTokens: dOut + dReason, // reasoning tokens bill as output
-        numRequests: 1,
-      })
-      state.codex[sessionId] = totals
+    const { event, toolDeltas, state: nextState } = codexDelta(
+      { totals, totalsTs, maxTs, model, fileDate: m[1], toolCalls },
+      state.codex[sessionId],
+    )
+    if (event) {
+      events.push(event)
+      for (const d of toolDeltas) codexToolDeltas.push(d)
+      state.codex[sessionId] = nextState
     }
   }
 }
@@ -207,99 +226,17 @@ if (hookInput?.transcript_path) {
 }
 // Collapse to one row per (provider, model, day) so the request stays small.
-const byKey = new Map()
-for (const e of events) {
-  const date = e.occurredOn || new Date().toISOString().slice(0, 10)
-  const key = `${e.provider}|${e.model}|${date}`
-  const cur = byKey.get(key) || {
-    provider: e.provider,
-    model: e.model,
-    occurredOn: date,
-    uncachedInputTokens: 0,
-    cacheReadInputTokens: 0,
-    cacheCreationInputTokens: 0,
-    outputTokens: 0,
-    numRequests: 0,
-  }
-  cur.uncachedInputTokens += e.uncachedInputTokens || 0
-  cur.cacheReadInputTokens += e.cacheReadInputTokens || 0
-  cur.cacheCreationInputTokens += e.cacheCreationInputTokens || 0
-  cur.outputTokens += e.outputTokens || 0
-  cur.numRequests += e.numRequests || 1
-  byKey.set(key, cur)
-}
-const payload = [...byKey.values()]
+const today = new Date().toISOString().slice(0, 10)
+const payload = aggregateDaily(events, today)
-// Recent hourly buckets feed the 5-hour rolling window via a separate, additive
-// endpoint. Only events with a real timestamp in the last 8 hours qualify, so a
-// backfill scan never pollutes the recent window. (Codex deltas lack per-event
-// timestamps, so the 5-hour window is Claude Code for now.)
-const HOUR_MS = 3_600_000
-const recentCutoff = Date.now() - 8 * HOUR_MS
-const byHour = new Map()
-for (const e of events) {
-  if (!e.occurredAt) continue
-  const t = new Date(e.occurredAt).getTime()
-  if (Number.isNaN(t) || t < recentCutoff) continue
-  const d = new Date(t)
-  d.setMinutes(0, 0, 0)
-  const hourIso = d.toISOString()
-  const key = `${e.provider}|${e.model}|${hourIso}`
-  const cur = byHour.get(key) || {
-    provider: e.provider,
-    model: e.model,
-    bucketHour: hourIso,
-    uncachedInputTokens: 0,
-    cacheReadInputTokens: 0,
-    cacheCreationInputTokens: 0,
-    outputTokens: 0,
-    numRequests: 0,
-  }
-  cur.uncachedInputTokens += e.uncachedInputTokens || 0
-  cur.cacheReadInputTokens += e.cacheReadInputTokens || 0
-  cur.cacheCreationInputTokens += e.cacheCreationInputTokens || 0
-  cur.outputTokens += e.outputTokens || 0
-  cur.numRequests += e.numRequests || 1
-  byHour.set(key, cur)
-}
-const hourly = [...byHour.values()]
-const HOURLY_URL = INGEST_URL.replace(/\/ingest$/, '/ingest-hourly')
+// Recent hourly buckets (last 8h, timestamped events only) feed the 5-hour rolling
+// window via a separate, additive endpoint, so a backfill cannot pollute it.
+const hourly = aggregateHourly(events, Date.now())
+const HOURLY_URL = deriveHourlyUrl(INGEST_URL, process.env.MODELMETER_HOURLY_INGEST_URL)
-// Per-tool / per-MCP attribution. Group MCP tools by server (mcp__server__tool ->
-// mcp:server) and keep built-ins by name. Calls are exact; tokens are an even
-// split of each turn's usage across the distinct tool groups it called.
-function toolGroup(name) {
-  if (typeof name !== 'string' || !name) return 'unknown'
-  if (name.startsWith('mcp__')) {
-    const parts = name.split('__')
-    return parts[1] ? `mcp:${parts[1]}` : 'mcp:unknown'
-  }
-  return name
-}
-const byTool = new Map()
-for (const e of events) {
-  if (!Array.isArray(e.tools) || e.tools.length === 0) continue
-  const date = e.occurredOn || new Date().toISOString().slice(0, 10)
-  const callsByGroup = new Map()
-  for (const name of e.tools) {
-    const g = toolGroup(name)
-    callsByGroup.set(g, (callsByGroup.get(g) || 0) + 1)
-  }
-  const eventTokens =
-    (e.uncachedInputTokens || 0) +
-    (e.cacheReadInputTokens || 0) +
-    (e.cacheCreationInputTokens || 0) +
-    (e.outputTokens || 0)
-  const tokenShare = Math.round(eventTokens / callsByGroup.size)
-  for (const [g, calls] of callsByGroup) {
-    const key = `${g}|${date}`
-    const cur = byTool.get(key) || { tool: g, bucketDate: date, calls: 0, tokens: 0 }
-    cur.calls += calls
-    cur.tokens += tokenShare
-    byTool.set(key, cur)
-  }
-}
-const toolsPayload = [...byTool.values()]
+// Per-tool / per-MCP attribution. Claude even-splits each turn's tokens across the
+// tools it called; Codex contributes precomputed deltas. Calls are exact.
+const toolsPayload = aggregateTools(events, codexToolDeltas, today)
 if (payload.length === 0) {
   process.exit(0)
@@ -317,14 +254,21 @@ if (process.env.MODELMETER_DRYRUN) {
 let committed = false
 try {
-  const res = await fetch(INGEST_URL, {
-    method: 'POST',
-    headers: { Authorization: `Bearer ${TOKEN}`, 'Content-Type': 'application/json' },
-    body: JSON.stringify({ source: 'collector', events: payload }),
-  })
+  const res = await postJson(INGEST_URL, { source: 'collector', events: payload })
   if (res.ok) {
     if (!existsSync(MM_DIR)) mkdirSync(MM_DIR, { recursive: true })
+    const claudeIds = Object.keys(state.claude)
+    if (claudeIds.length > CLAUDE_STATE_CAP) {
+      const next = {}
+      for (const id of claudeIds.slice(-CLAUDE_STATE_CAP)) next[id] = 1
+      state.claude = next
+    }
     writeFileSync(STATE_PATH, JSON.stringify(state))
+    try {
+      chmodSync(STATE_PATH, 0o600) // usage metadata is not secret, but keep it owner-only
+    } catch {
+      // best effort on platforms without POSIX perms
+    }
     committed = true
     console.error(`modelmeter: reported ${payload.length} usage rows`)
   } else {
@@ -335,16 +279,21 @@ try {
 }
 // Additive + best-effort: only after the daily batch is committed (state written),
-// so a retry cannot double-count into the hourly window.
+// so a retry cannot double-count into the hourly window. The daily state is already
+// committed, so these detail rows will not be resent; retry once to cover a transient
+// failure, then give up (the window self-heals as new data flows).
 if (committed && (hourly.length > 0 || toolsPayload.length > 0)) {
-  try {
-    await fetch(HOURLY_URL, {
-      method: 'POST',
-      headers: { Authorization: `Bearer ${TOKEN}`, 'Content-Type': 'application/json' },
-      body: JSON.stringify({ source: 'collector', hours: hourly, tools: toolsPayload }),
-    })
-  } catch {
-    // detail (hourly + per-tool) is best-effort; never block the collector on it
+  for (let attempt = 0; attempt < 2; attempt++) {
+    try {
+      const res = await postJson(HOURLY_URL, {
+        source: 'collector',
+        hours: hourly,
+        tools: toolsPayload,
+      })
+      if (res.ok) break
+    } catch {
+      // fall through to one retry, then give up
+    }
   }
 }
 process.exit(0)

package/lib.mjs ADDED Viewed

@@ -0,0 +1,272 @@
+// Pure, unit-tested core of the collector. All file I/O, networking, and state
+// persistence live in collect.mjs; everything here is deterministic given its
+// inputs, so it can be fixture-tested (see lib.test.mjs).
+// Group a Claude tool name: MCP tools (mcp__server__tool) collapse to mcp:server,
+// built-ins keep their name.
+export function toolGroup(name) {
+  if (typeof name !== 'string' || !name) return 'unknown'
+  if (name.startsWith('mcp__')) {
+    const parts = name.split('__')
+    return parts[1] ? `mcp:${parts[1]}` : 'mcp:unknown'
+  }
+  return name
+}
+// Group a Codex tool event. Built-in calls carry a plain name; MCP calls carry an
+// invocation with a server. Returns a group key, or null if it is not a tool call.
+export function codexToolFromEvent(payload, ptype) {
+  if (ptype === 'function_call' || ptype === 'custom_tool_call') {
+    return typeof payload.name === 'string' && payload.name ? payload.name : null
+  }
+  if (ptype === 'mcp_tool_call_end' || ptype === 'mcp_tool_call_begin') {
+    const inv = payload.invocation || {}
+    const server = inv.server || inv.server_name
+    return server ? `mcp:${server}` : 'mcp'
+  }
+  return null
+}
+// Depth-first find of the last token_count usage block in a Codex line.
+export function findLastTokenCount(obj) {
+  let last = null
+  const stack = [obj]
+  while (stack.length) {
+    const d = stack.pop()
+    if (Array.isArray(d)) stack.push(...d)
+    else if (d && typeof d === 'object') {
+      if (d.type === 'token_count' && d.info?.total_token_usage) last = d.info.total_token_usage
+      for (const v of Object.values(d)) stack.push(v)
+    }
+  }
+  return last
+}
+// Detail endpoint from the ingest URL, tolerating a trailing slash, or an override.
+export function deriveHourlyUrl(ingestUrl, override) {
+  if (override) return override
+  try {
+    const u = new URL(ingestUrl)
+    u.pathname = u.pathname.replace(/\/ingest\/?$/, '/ingest-hourly')
+    return u.toString()
+  } catch {
+    return ingestUrl.replace(/\/ingest\/?$/, '/ingest-hourly')
+  }
+}
+// Build an event from one parsed Claude transcript line, or null if it is not an
+// assistant message with usage. Carries an `id` for the caller to dedup on.
+export function claudeEventFromLine(o) {
+  const msg = o && o.message
+  if (!msg || msg.role !== 'assistant' || !msg.usage) return null
+  const u = msg.usage
+  const tools = Array.isArray(msg.content)
+    ? msg.content
+        .filter((b) => b && b.type === 'tool_use')
+        .map((b) => b.name)
+        .filter(Boolean)
+    : []
+  return {
+    id: o.uuid || `${o.timestamp ?? ''}:${msg.id ?? ''}`,
+    provider: 'anthropic',
+    model: msg.model || 'claude-unknown',
+    occurredOn: (o.timestamp || '').slice(0, 10) || undefined,
+    occurredAt: o.timestamp || undefined,
+    tools,
+    uncachedInputTokens: u.input_tokens || 0,
+    cacheReadInputTokens: u.cache_read_input_tokens || 0,
+    cacheCreationInputTokens: u.cache_creation_input_tokens || 0,
+    outputTokens: u.output_tokens || 0,
+    numRequests: 1,
+  }
+}
+// Codex cumulative-delta + tool attribution. Given what a session walk collected and
+// the previous session state, returns the new event (or null), the tool deltas, and
+// the next state. Tool calls are only attributed up to the token-accounted watermark
+// (totalsTs), so calls after the latest token_count are left for the next run.
+export function codexDelta({ totals, totalsTs, maxTs, model, fileDate, toolCalls }, prev) {
+  if (!totals) return { event: null, toolDeltas: [], state: prev ?? null }
+  const prevTotal =
+    (prev && prev.total) ||
+    (prev && prev.input_tokens != null ? prev : null) || {
+      input_tokens: 0,
+      cached_input_tokens: 0,
+      output_tokens: 0,
+      reasoning_output_tokens: 0,
+    }
+  const prevLastTs = (prev && prev.lastTs) || ''
+  const dInput = Math.max(0, (totals.input_tokens || 0) - prevTotal.input_tokens)
+  const dCached = Math.max(0, (totals.cached_input_tokens || 0) - prevTotal.cached_input_tokens)
+  const dOut = Math.max(0, (totals.output_tokens || 0) - prevTotal.output_tokens)
+  const dReason = Math.max(0, (totals.reasoning_output_tokens || 0) - prevTotal.reasoning_output_tokens)
+  if (dInput + dCached + dOut + dReason <= 0) {
+    return { event: null, toolDeltas: [], state: prev ?? null }
+  }
+  const newTotalTokens = Math.max(0, dInput - dCached) + dCached + (dOut + dReason)
+  const occurredOn = (totalsTs || '').slice(0, 10) || fileDate
+  const event = {
+    provider: 'openai',
+    model: model || 'gpt-5',
+    occurredOn,
+    occurredAt: totalsTs || undefined,
+    uncachedInputTokens: Math.max(0, dInput - dCached),
+    cacheReadInputTokens: dCached,
+    cacheCreationInputTokens: 0,
+    outputTokens: dOut + dReason,
+    numRequests: 1,
+  }
+  // Tool calls are deduped by the maxTs watermark: each is counted exactly once, in
+  // the run where it is first seen, and never recounted. Calls are exact. The
+  // even-split token figure is an estimate; a tail call's tokens can land in a later
+  // run's batch, but no call is ever dropped.
+  const newCalls = (toolCalls || []).filter((c) => c.ts && c.ts > prevLastTs)
+  const toolDeltas = []
+  if (newCalls.length > 0 && newTotalTokens > 0) {
+    const share = Math.round(newTotalTokens / newCalls.length)
+    for (const c of newCalls) {
+      toolDeltas.push({
+        tool: c.group,
+        bucketDate: (c.ts || '').slice(0, 10) || occurredOn,
+        calls: 1,
+        tokens: share,
+      })
+    }
+  }
+  return { event, toolDeltas, state: { total: totals, lastTs: maxTs || totalsTs || '' } }
+}
+// Show enough of the token to recognize it, never the secret part.
+export function maskToken(token) {
+  if (!token) return '(not set)'
+  return token.length > 12 ? `${token.slice(0, 12)}...` : token
+}
+function relAgo(ms, nowMs) {
+  if (!ms) return 'never'
+  const s = Math.max(0, Math.round((nowMs - ms) / 1000))
+  if (s < 60) return `${s}s ago`
+  if (s < 3600) return `${Math.round(s / 60)}m ago`
+  if (s < 86_400) return `${Math.round(s / 3600)}h ago`
+  return `${Math.round(s / 86_400)}d ago`
+}
+// Render the `doctor` report from gathered facts (pure, so it is unit-tested).
+// info: { configPath, configFound, token, ingestUrl, lookbackDays, nowMs,
+//         claude/codex: { dir, found, recentCount, lastWriteMs } }
+export function formatDoctorReport(info) {
+  const lines = ['modelmeter-collect doctor', '']
+  lines.push(`Config  ${info.configPath}`)
+  lines.push(`  status:     ${info.configFound ? 'found' : 'not found'}`)
+  lines.push(`  token:      ${maskToken(info.token)}`)
+  lines.push(`  ingest URL: ${info.ingestUrl || '(not set)'}`)
+  lines.push('')
+  for (const [label, d] of [
+    ['Claude Code', info.claude],
+    ['Codex', info.codex],
+  ]) {
+    lines.push(`${label}  ${d.dir}`)
+    if (!d.found) {
+      lines.push('  logs:       not found')
+    } else {
+      const n = d.recentCount
+      lines.push(`  logs:       found, ${n} session file${n === 1 ? '' : 's'} in the last ${info.lookbackDays} days`)
+      lines.push(`  last write: ${relAgo(d.lastWriteMs, info.nowMs)}`)
+    }
+    lines.push('')
+  }
+  lines.push('Privacy')
+  lines.push('  sent:       model names, token counts, tool and MCP names, dates')
+  lines.push('  never sent: prompts, responses, file contents, API keys')
+  return lines.join('\n')
+}
+// Collapse events to one daily row per (provider, model, date).
+export function aggregateDaily(events, today) {
+  const byKey = new Map()
+  for (const e of events) {
+    const date = e.occurredOn || today
+    const key = `${e.provider}|${e.model}|${date}`
+    const cur = byKey.get(key) || {
+      provider: e.provider,
+      model: e.model,
+      occurredOn: date,
+      uncachedInputTokens: 0,
+      cacheReadInputTokens: 0,
+      cacheCreationInputTokens: 0,
+      outputTokens: 0,
+      numRequests: 0,
+    }
+    cur.uncachedInputTokens += e.uncachedInputTokens || 0
+    cur.cacheReadInputTokens += e.cacheReadInputTokens || 0
+    cur.cacheCreationInputTokens += e.cacheCreationInputTokens || 0
+    cur.outputTokens += e.outputTokens || 0
+    cur.numRequests += e.numRequests || 1
+    byKey.set(key, cur)
+  }
+  return [...byKey.values()]
+}
+// Recent hourly buckets for the 5-hour window. Only events with a real timestamp in
+// the lookback window qualify, so a backfill cannot pollute the recent window.
+export function aggregateHourly(events, nowMs, lookbackMs = 8 * 3_600_000) {
+  const cutoff = nowMs - lookbackMs
+  const byHour = new Map()
+  for (const e of events) {
+    if (!e.occurredAt) continue
+    const t = new Date(e.occurredAt).getTime()
+    if (Number.isNaN(t) || t < cutoff) continue
+    const d = new Date(t)
+    d.setMinutes(0, 0, 0)
+    const hourIso = d.toISOString()
+    const key = `${e.provider}|${e.model}|${hourIso}`
+    const cur = byHour.get(key) || {
+      provider: e.provider,
+      model: e.model,
+      bucketHour: hourIso,
+      uncachedInputTokens: 0,
+      cacheReadInputTokens: 0,
+      cacheCreationInputTokens: 0,
+      outputTokens: 0,
+      numRequests: 0,
+    }
+    cur.uncachedInputTokens += e.uncachedInputTokens || 0
+    cur.cacheReadInputTokens += e.cacheReadInputTokens || 0
+    cur.cacheCreationInputTokens += e.cacheCreationInputTokens || 0
+    cur.outputTokens += e.outputTokens || 0
+    cur.numRequests += e.numRequests || 1
+    byHour.set(key, cur)
+  }
+  return [...byHour.values()]
+}
+// Per-tool / MCP rows. Claude events carry a `tools` array (even-split tokens across
+// the distinct groups a turn called); Codex contributes precomputed tool deltas.
+export function aggregateTools(events, codexToolDeltas = [], today) {
+  const byTool = new Map()
+  const add = (tool, date, calls, tokens) => {
+    const key = `${tool}|${date}`
+    const cur = byTool.get(key) || { tool, bucketDate: date, calls: 0, tokens: 0 }
+    cur.calls += calls
+    cur.tokens += tokens
+    byTool.set(key, cur)
+  }
+  for (const e of events) {
+    if (!Array.isArray(e.tools) || e.tools.length === 0) continue
+    const date = e.occurredOn || today
+    const callsByGroup = new Map()
+    for (const name of e.tools) {
+      const g = toolGroup(name)
+      callsByGroup.set(g, (callsByGroup.get(g) || 0) + 1)
+    }
+    const eventTokens =
+      (e.uncachedInputTokens || 0) +
+      (e.cacheReadInputTokens || 0) +
+      (e.cacheCreationInputTokens || 0) +
+      (e.outputTokens || 0)
+    const tokenShare = Math.round(eventTokens / callsByGroup.size)
+    for (const [g, calls] of callsByGroup) add(g, date, calls, tokenShare)
+  }
+  for (const d of codexToolDeltas) add(d.tool, d.bucketDate, d.calls, d.tokens)
+  return [...byTool.values()]
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "modelmeter-collect",
-  "version": "0.3.0",
+  "version": "0.5.0",
   "description": "Report LLM token usage from local Claude Code / Codex logs to ModelMeter. Token counts only, never prompts or keys.",
   "type": "module",
   "bin": {
@@ -9,6 +9,7 @@
   "files": [
     "cli.mjs",
     "collect.mjs",
+    "lib.mjs",
     "README.md"
   ],
   "engines": {