npm - modelmeter-collect - Versions diffs - 0.1.0 → 0.3.0 - Mend

modelmeter-collect 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/collect.mjs +94 -0
package/package.json +1 -1

package/collect.mjs CHANGED Viewed

@@ -110,10 +110,15 @@ function scanClaude(files) {
       if (!id || state.claude[id]) continue
       state.claude[id] = 1
       const u = msg.usage
+      const toolNames = Array.isArray(msg.content)
+        ? msg.content.filter((b) => b && b.type === 'tool_use').map((b) => b.name).filter(Boolean)
+        : []
       events.push({
         provider: 'anthropic',
         model: msg.model || 'claude-unknown',
         occurredOn: (o.timestamp || '').slice(0, 10) || undefined,
+        occurredAt: o.timestamp || undefined,
+        tools: toolNames,
         uncachedInputTokens: u.input_tokens || 0,
         cacheReadInputTokens: u.cache_read_input_tokens || 0,
         cacheCreationInputTokens: u.cache_creation_input_tokens || 0,
@@ -225,6 +230,77 @@ for (const e of events) {
 }
 const payload = [...byKey.values()]
+// Recent hourly buckets feed the 5-hour rolling window via a separate, additive
+// endpoint. Only events with a real timestamp in the last 8 hours qualify, so a
+// backfill scan never pollutes the recent window. (Codex deltas lack per-event
+// timestamps, so the 5-hour window is Claude Code for now.)
+const HOUR_MS = 3_600_000
+const recentCutoff = Date.now() - 8 * HOUR_MS
+const byHour = new Map()
+for (const e of events) {
+  if (!e.occurredAt) continue
+  const t = new Date(e.occurredAt).getTime()
+  if (Number.isNaN(t) || t < recentCutoff) continue
+  const d = new Date(t)
+  d.setMinutes(0, 0, 0)
+  const hourIso = d.toISOString()
+  const key = `${e.provider}|${e.model}|${hourIso}`
+  const cur = byHour.get(key) || {
+    provider: e.provider,
+    model: e.model,
+    bucketHour: hourIso,
+    uncachedInputTokens: 0,
+    cacheReadInputTokens: 0,
+    cacheCreationInputTokens: 0,
+    outputTokens: 0,
+    numRequests: 0,
+  }
+  cur.uncachedInputTokens += e.uncachedInputTokens || 0
+  cur.cacheReadInputTokens += e.cacheReadInputTokens || 0
+  cur.cacheCreationInputTokens += e.cacheCreationInputTokens || 0
+  cur.outputTokens += e.outputTokens || 0
+  cur.numRequests += e.numRequests || 1
+  byHour.set(key, cur)
+}
+const hourly = [...byHour.values()]
+const HOURLY_URL = INGEST_URL.replace(/\/ingest$/, '/ingest-hourly')
+// Per-tool / per-MCP attribution. Group MCP tools by server (mcp__server__tool ->
+// mcp:server) and keep built-ins by name. Calls are exact; tokens are an even
+// split of each turn's usage across the distinct tool groups it called.
+function toolGroup(name) {
+  if (typeof name !== 'string' || !name) return 'unknown'
+  if (name.startsWith('mcp__')) {
+    const parts = name.split('__')
+    return parts[1] ? `mcp:${parts[1]}` : 'mcp:unknown'
+  }
+  return name
+}
+const byTool = new Map()
+for (const e of events) {
+  if (!Array.isArray(e.tools) || e.tools.length === 0) continue
+  const date = e.occurredOn || new Date().toISOString().slice(0, 10)
+  const callsByGroup = new Map()
+  for (const name of e.tools) {
+    const g = toolGroup(name)
+    callsByGroup.set(g, (callsByGroup.get(g) || 0) + 1)
+  }
+  const eventTokens =
+    (e.uncachedInputTokens || 0) +
+    (e.cacheReadInputTokens || 0) +
+    (e.cacheCreationInputTokens || 0) +
+    (e.outputTokens || 0)
+  const tokenShare = Math.round(eventTokens / callsByGroup.size)
+  for (const [g, calls] of callsByGroup) {
+    const key = `${g}|${date}`
+    const cur = byTool.get(key) || { tool: g, bucketDate: date, calls: 0, tokens: 0 }
+    cur.calls += calls
+    cur.tokens += tokenShare
+    byTool.set(key, cur)
+  }
+}
+const toolsPayload = [...byTool.values()]
 if (payload.length === 0) {
   process.exit(0)
 }
@@ -233,10 +309,13 @@ if (process.env.MODELMETER_DRYRUN) {
   const tally = {}
   for (const e of events) tally[e.provider] = (tally[e.provider] || 0) + 1
   console.log(`DRY RUN: ${events.length} raw events -> ${payload.length} daily rows`, tally)
+  console.log(`         + ${hourly.length} recent hourly rows, ${toolsPayload.length} tool rows -> ${HOURLY_URL}`)
   console.log(JSON.stringify(payload, null, 2))
+  if (toolsPayload.length) console.log('tools:', JSON.stringify(toolsPayload, null, 2))
   process.exit(0)
 }
+let committed = false
 try {
   const res = await fetch(INGEST_URL, {
     method: 'POST',
@@ -246,6 +325,7 @@ try {
   if (res.ok) {
     if (!existsSync(MM_DIR)) mkdirSync(MM_DIR, { recursive: true })
     writeFileSync(STATE_PATH, JSON.stringify(state))
+    committed = true
     console.error(`modelmeter: reported ${payload.length} usage rows`)
   } else {
     console.error(`modelmeter: ingest returned ${res.status}`)
@@ -253,4 +333,18 @@ try {
 } catch (err) {
   console.error(`modelmeter: ${err.message}`)
 }
+// Additive + best-effort: only after the daily batch is committed (state written),
+// so a retry cannot double-count into the hourly window.
+if (committed && (hourly.length > 0 || toolsPayload.length > 0)) {
+  try {
+    await fetch(HOURLY_URL, {
+      method: 'POST',
+      headers: { Authorization: `Bearer ${TOKEN}`, 'Content-Type': 'application/json' },
+      body: JSON.stringify({ source: 'collector', hours: hourly, tools: toolsPayload }),
+    })
+  } catch {
+    // detail (hourly + per-tool) is best-effort; never block the collector on it
+  }
+}
 process.exit(0)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "modelmeter-collect",
-  "version": "0.1.0",
+  "version": "0.3.0",
   "description": "Report LLM token usage from local Claude Code / Codex logs to ModelMeter. Token counts only, never prompts or keys.",
   "type": "module",
   "bin": {