tokentracker-cli 0.8.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,759 @@
1
+ // Claude Code "Context Breakdown" categorizer.
2
+ //
3
+ // Reads ~/.claude/projects/**/*.jsonl and splits each assistant message's
4
+ // usage into seven semantic buckets, mirroring (approximately) the Claude
5
+ // Code in-CLI /context view but as a historical aggregate. Computes on
6
+ // demand — no queue schema changes, no parser changes, no sync changes.
7
+ //
8
+ // Why these seven and not the screenshot's eight: the raw system prompt
9
+ // (which contains tools schema, skills, rules, MCP descriptions) is sent
10
+ // once per session as a 1h-ephemeral cache prefix and is never logged
11
+ // verbatim in the jsonl. So at the token-accounting layer those four are
12
+ // indistinguishable — they all collapse into `system_prefix`. UI says so.
13
+ const fssync = require("node:fs");
14
+ const os = require("node:os");
15
+ const path = require("node:path");
16
+ const readline = require("node:readline");
17
+
18
+ const CATEGORY_KEYS = [
19
+ "system_prefix",
20
+ "conversation_history",
21
+ "user_input",
22
+ "tool_calls",
23
+ "subagents",
24
+ "reasoning",
25
+ "assistant_response",
26
+ ];
27
+
28
+ const SUBAGENT_TOOL_NAMES = new Set(["Agent", "Task"]);
29
+
30
+ function emptyTotals() {
31
+ return {
32
+ input_tokens: 0,
33
+ cached_input_tokens: 0,
34
+ cache_creation_input_tokens: 0,
35
+ output_tokens: 0,
36
+ reasoning_output_tokens: 0,
37
+ total_tokens: 0,
38
+ };
39
+ }
40
+
41
+ function emptyCategoryMap() {
42
+ const out = {};
43
+ for (const key of CATEGORY_KEYS) out[key] = emptyTotals();
44
+ return out;
45
+ }
46
+
47
+ function addInto(target, source) {
48
+ target.input_tokens += source.input_tokens || 0;
49
+ target.cached_input_tokens += source.cached_input_tokens || 0;
50
+ target.cache_creation_input_tokens += source.cache_creation_input_tokens || 0;
51
+ target.output_tokens += source.output_tokens || 0;
52
+ target.reasoning_output_tokens += source.reasoning_output_tokens || 0;
53
+ target.total_tokens += source.total_tokens || 0;
54
+ }
55
+
56
+ function defaultClaudeProjectsDir() {
57
+ return path.join(os.homedir(), ".claude", "projects");
58
+ }
59
+
60
+ function listSessionFiles(rootDir) {
61
+ const out = [];
62
+ const stack = [rootDir];
63
+ while (stack.length > 0) {
64
+ const dir = stack.pop();
65
+ let entries;
66
+ try {
67
+ entries = fssync.readdirSync(dir, { withFileTypes: true });
68
+ } catch (_e) {
69
+ continue;
70
+ }
71
+ for (const entry of entries) {
72
+ const fp = path.join(dir, entry.name);
73
+ if (entry.isDirectory()) {
74
+ stack.push(fp);
75
+ } else if (entry.isFile() && entry.name.endsWith(".jsonl")) {
76
+ out.push(fp);
77
+ }
78
+ }
79
+ }
80
+ return out;
81
+ }
82
+
83
+ // Distribute one assistant message's output tokens across categories by the
84
+ // character-length ratio of each content block. Thinking goes to reasoning,
85
+ // tool_use(Agent|Task) → subagents, tool_use(other) → tool_calls, text →
86
+ // assistant_response. If reasoning_output_tokens is reported separately, use
87
+ // that exact figure for reasoning instead of pro-rating.
88
+ function splitOutputByContent(usage, content, breakdown) {
89
+ const total = Math.max(0, Number(usage.output_tokens || 0));
90
+ const reasoningExplicit = Math.max(0, Number(usage.reasoning_output_tokens || 0));
91
+ if (total === 0) return;
92
+
93
+ const blocks = Array.isArray(content) ? content : [];
94
+ const buckets = { reasoning: 0, tool_calls: 0, subagents: 0, assistant_response: 0 };
95
+ let totalChars = 0;
96
+
97
+ for (const block of blocks) {
98
+ if (!block || typeof block !== "object") continue;
99
+ const type = block.type;
100
+ let chars = 0;
101
+ if (type === "thinking") {
102
+ chars = String(block.thinking || block.text || "").length || 1;
103
+ buckets.reasoning += chars;
104
+ } else if (type === "text") {
105
+ chars = String(block.text || "").length || 1;
106
+ buckets.assistant_response += chars;
107
+ } else if (type === "tool_use") {
108
+ const inputJson = block.input ? JSON.stringify(block.input) : "";
109
+ chars = (block.name || "").length + inputJson.length + 1;
110
+ if (SUBAGENT_TOOL_NAMES.has(block.name)) buckets.subagents += chars;
111
+ else buckets.tool_calls += chars;
112
+ } else {
113
+ continue;
114
+ }
115
+ totalChars += chars;
116
+ }
117
+
118
+ if (totalChars === 0) {
119
+ breakdown.assistant_response.output_tokens += total;
120
+ breakdown.assistant_response.total_tokens += total;
121
+ return;
122
+ }
123
+
124
+ // If the API reported reasoning tokens explicitly, peel them off first
125
+ // and pro-rate the rest of the output across the remaining buckets.
126
+ let nonReasoningOutput = total;
127
+ if (reasoningExplicit > 0) {
128
+ const reasoningShare = Math.min(reasoningExplicit, total);
129
+ breakdown.reasoning.output_tokens += reasoningShare;
130
+ breakdown.reasoning.reasoning_output_tokens += reasoningShare;
131
+ breakdown.reasoning.total_tokens += reasoningShare;
132
+ nonReasoningOutput = total - reasoningShare;
133
+ // Drop the thinking-char contribution; it was just paid for.
134
+ totalChars -= buckets.reasoning;
135
+ buckets.reasoning = 0;
136
+ }
137
+
138
+ if (nonReasoningOutput <= 0 || totalChars <= 0) return;
139
+
140
+ // Largest-remainder rounding so the four sub-buckets sum exactly to
141
+ // nonReasoningOutput (no off-by-one drift across thousands of messages).
142
+ const order = ["reasoning", "tool_calls", "subagents", "assistant_response"];
143
+ const exact = order.map((k) => (buckets[k] / totalChars) * nonReasoningOutput);
144
+ const floored = exact.map((x) => Math.floor(x));
145
+ const remainder = nonReasoningOutput - floored.reduce((a, b) => a + b, 0);
146
+ const remainders = exact
147
+ .map((x, i) => ({ i, frac: x - Math.floor(x) }))
148
+ .sort((a, b) => b.frac - a.frac);
149
+ for (let k = 0; k < remainder; k++) floored[remainders[k % order.length].i] += 1;
150
+
151
+ for (let i = 0; i < order.length; i++) {
152
+ const key = order[i];
153
+ const tok = floored[i];
154
+ if (tok === 0) continue;
155
+ breakdown[key].output_tokens += tok;
156
+ breakdown[key].total_tokens += tok;
157
+ if (key === "reasoning") breakdown[key].reasoning_output_tokens += tok;
158
+ }
159
+ }
160
+
161
+ // Per-session state lets us pick out the *first* meaningful cache_creation
162
+ // chunk and call that the system_prefix. Subsequent cache_creations are
163
+ // incremental — we attribute them to conversation_history.
164
+ function classifyOneMessage(obj, sessionState, breakdown) {
165
+ const usage = obj?.message?.usage;
166
+ if (!usage || typeof usage !== "object") return;
167
+
168
+ const cacheCreate = Math.max(0, Number(usage.cache_creation_input_tokens || 0));
169
+ const cacheRead = Math.max(0, Number(usage.cache_read_input_tokens || 0));
170
+ const inputNonCached = Math.max(0, Number(usage.input_tokens || 0));
171
+ const output = Math.max(0, Number(usage.output_tokens || 0));
172
+
173
+ // input_tokens (pure non-cached) → user_input
174
+ if (inputNonCached > 0) {
175
+ breakdown.user_input.input_tokens += inputNonCached;
176
+ breakdown.user_input.total_tokens += inputNonCached;
177
+ }
178
+
179
+ // cache_read_input_tokens → conversation_history (replaying earlier turns)
180
+ if (cacheRead > 0) {
181
+ breakdown.conversation_history.cached_input_tokens += cacheRead;
182
+ breakdown.conversation_history.total_tokens += cacheRead;
183
+ }
184
+
185
+ // cache_creation_input_tokens: first big block of a session = system_prefix,
186
+ // everything after = incremental conversation history.
187
+ if (cacheCreate > 0) {
188
+ if (!sessionState.systemPrefixSeen) {
189
+ breakdown.system_prefix.cache_creation_input_tokens += cacheCreate;
190
+ breakdown.system_prefix.total_tokens += cacheCreate;
191
+ sessionState.systemPrefixSeen = true;
192
+ } else {
193
+ breakdown.conversation_history.cache_creation_input_tokens += cacheCreate;
194
+ breakdown.conversation_history.total_tokens += cacheCreate;
195
+ }
196
+ }
197
+
198
+ // Split output across reasoning / tool_calls / subagents / assistant_response.
199
+ if (output > 0) {
200
+ splitOutputByContent(
201
+ { output_tokens: output, reasoning_output_tokens: usage.reasoning_output_tokens },
202
+ obj?.message?.content,
203
+ breakdown,
204
+ );
205
+ }
206
+ }
207
+
208
+ // Read one session jsonl streaming, in timestamp range, dedup by msgId+reqId.
209
+ async function categorizeSessionFile(filePath, { fromIso, toIso, seenHashes }, breakdown) {
210
+ let stream;
211
+ try {
212
+ stream = fssync.createReadStream(filePath, { encoding: "utf8" });
213
+ } catch (_e) {
214
+ return 0;
215
+ }
216
+ const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
217
+ const sessionState = { systemPrefixSeen: false };
218
+ let counted = 0;
219
+
220
+ for await (const line of rl) {
221
+ if (!line || !line.includes('"usage"')) continue;
222
+ let obj;
223
+ try {
224
+ obj = JSON.parse(line);
225
+ } catch (_e) {
226
+ continue;
227
+ }
228
+ const ts = typeof obj?.timestamp === "string" ? obj.timestamp : null;
229
+ if (!ts) continue;
230
+ if (fromIso && ts < fromIso) continue;
231
+ if (toIso && ts > toIso) continue;
232
+
233
+ const msgId = obj?.message?.id;
234
+ const reqId = obj?.requestId;
235
+ if (msgId && reqId) {
236
+ const hash = `${msgId}:${reqId}`;
237
+ if (seenHashes.has(hash)) continue;
238
+ seenHashes.add(hash);
239
+ }
240
+
241
+ classifyOneMessage(obj, sessionState, breakdown);
242
+ counted += 1;
243
+ }
244
+ rl.close();
245
+ stream.close?.();
246
+ return counted;
247
+ }
248
+
249
+ // Convert a YYYY-MM-DD day key (already in the user's tz from the API call)
250
+ // into an inclusive ISO range. We still match against UTC timestamps in the
251
+ // jsonl, so we widen by ±14h to be safe across timezones — totals are
252
+ // post-filtered against the queue's authoritative UTC totals anyway, this
253
+ // view is approximate by design.
254
+ function dayKeyToIsoBounds(from, to) {
255
+ if (!from && !to) return { fromIso: null, toIso: null };
256
+ const fromDate = from ? new Date(`${from}T00:00:00Z`) : null;
257
+ const toDate = to ? new Date(`${to}T23:59:59Z`) : null;
258
+ if (fromDate && Number.isFinite(fromDate.getTime())) {
259
+ fromDate.setUTCHours(fromDate.getUTCHours() - 14);
260
+ }
261
+ if (toDate && Number.isFinite(toDate.getTime())) {
262
+ toDate.setUTCHours(toDate.getUTCHours() + 14);
263
+ }
264
+ return {
265
+ fromIso: fromDate ? fromDate.toISOString() : null,
266
+ toIso: toDate ? toDate.toISOString() : null,
267
+ };
268
+ }
269
+
270
+ // Cache: keyed on (rootDir|from|to|maxMtime). 60s TTL is a safety net in
271
+ // case the watcher misses something.
272
+ const CACHE = new Map();
273
+ const CACHE_TTL_MS = 60_000;
274
+
275
+ function maxMtimeMs(files) {
276
+ let max = 0;
277
+ for (const fp of files) {
278
+ try {
279
+ const st = fssync.statSync(fp);
280
+ if (st.mtimeMs > max) max = st.mtimeMs;
281
+ } catch (_e) {}
282
+ }
283
+ return max;
284
+ }
285
+
286
+ async function computeClaudeCategoryBreakdown({ from = null, to = null, rootDir = null, projectDir = null } = {}) {
287
+ const root = rootDir || defaultClaudeProjectsDir();
288
+ let files = [];
289
+ try {
290
+ files = listSessionFiles(root);
291
+ } catch (_e) {
292
+ return {
293
+ source: "claude",
294
+ scope: "supported",
295
+ totals: emptyTotals(),
296
+ categories: CATEGORY_KEYS.map((key) => ({
297
+ key,
298
+ totals: emptyTotals(),
299
+ percent: 0,
300
+ })),
301
+ session_count: 0,
302
+ message_count: 0,
303
+ };
304
+ }
305
+
306
+ const cacheKey = `${root}|${from || ""}|${to || ""}|${files.length}|${maxMtimeMs(files)}`;
307
+ const cached = CACHE.get(cacheKey);
308
+ if (cached && Date.now() - cached.at < CACHE_TTL_MS) {
309
+ return cached.value;
310
+ }
311
+
312
+ const { fromIso, toIso } = dayKeyToIsoBounds(from, to);
313
+ const breakdown = emptyCategoryMap();
314
+ const seenHashes = new Set();
315
+ let messageCount = 0;
316
+ let sessionCount = 0;
317
+
318
+ for (const fp of files) {
319
+ const counted = await categorizeSessionFile(
320
+ fp,
321
+ { fromIso, toIso, seenHashes },
322
+ breakdown,
323
+ );
324
+ if (counted > 0) sessionCount += 1;
325
+ messageCount += counted;
326
+ }
327
+
328
+ const totals = emptyTotals();
329
+ for (const key of CATEGORY_KEYS) addInto(totals, breakdown[key]);
330
+
331
+ const result = {
332
+ source: "claude",
333
+ scope: "supported",
334
+ totals,
335
+ categories: CATEGORY_KEYS.map((key) => {
336
+ const t = breakdown[key];
337
+ const percent = totals.total_tokens > 0
338
+ ? Number(((t.total_tokens / totals.total_tokens) * 100).toFixed(2))
339
+ : 0;
340
+ return { key, totals: t, percent };
341
+ }),
342
+ session_count: sessionCount,
343
+ message_count: messageCount,
344
+ };
345
+
346
+ CACHE.set(cacheKey, { at: Date.now(), value: result });
347
+ // Bound cache size — categorizer is cheap to recompute, no point hoarding.
348
+ if (CACHE.size > 32) {
349
+ const oldest = [...CACHE.entries()].sort((a, b) => a[1].at - b[1].at)[0];
350
+ if (oldest) CACHE.delete(oldest[0]);
351
+ }
352
+ return result;
353
+ }
354
+
355
+ // Lightweight on-disk count of static resources Claude Code's /context UI
356
+ // also surfaces (Skills, MCP servers, Memory files, Custom agents). These are
357
+ // counts of what's *installed*, not historical token usage — the same way
358
+ // /context shows "MCP tools 0 (115)" with the install count in parens. Lets
359
+ // the dashboard match that vocabulary even though token-level separation
360
+ // from the system prompt isn't possible from the rollout logs alone.
361
+ function countDirEntries(dir, predicate) {
362
+ let entries;
363
+ try {
364
+ entries = fssync.readdirSync(dir, { withFileTypes: true });
365
+ } catch (_e) {
366
+ return 0;
367
+ }
368
+ return entries.filter(predicate).length;
369
+ }
370
+
371
+ function fileExists(fp) {
372
+ try {
373
+ return fssync.statSync(fp).isFile();
374
+ } catch (_e) {
375
+ return false;
376
+ }
377
+ }
378
+
379
+ function safeReadJson(fp) {
380
+ try {
381
+ return JSON.parse(fssync.readFileSync(fp, "utf8"));
382
+ } catch (_e) {
383
+ return null;
384
+ }
385
+ }
386
+
387
+ // Walk @./path imports recursively. Claude Code expands @file references
388
+ // inside CLAUDE.md into separate memory entries; /context counts them.
389
+ function collectMemoryImports(filePath, seen) {
390
+ if (!filePath || seen.has(filePath)) return;
391
+ seen.add(filePath);
392
+ let raw;
393
+ try {
394
+ raw = fssync.readFileSync(filePath, "utf8");
395
+ } catch (_e) {
396
+ return;
397
+ }
398
+ const dir = path.dirname(filePath);
399
+ // Match `@path/to/file.md` (CC's import syntax), but skip `@user@host` and
400
+ // `email@host` patterns by requiring a path-like suffix.
401
+ const re = /(?:^|\s)@([./~][^\s)]+\.md)\b/g;
402
+ let m;
403
+ while ((m = re.exec(raw)) !== null) {
404
+ let target = m[1];
405
+ if (target.startsWith("~")) target = path.join(os.homedir(), target.slice(1).replace(/^\//, ""));
406
+ else if (!path.isAbsolute(target)) target = path.resolve(dir, target);
407
+ if (fileExists(target)) collectMemoryImports(target, seen);
408
+ }
409
+ }
410
+
411
+ function findLatestPluginVersionDir(pluginCacheRoot) {
412
+ let entries;
413
+ try {
414
+ entries = fssync.readdirSync(pluginCacheRoot, { withFileTypes: true });
415
+ } catch (_e) {
416
+ return null;
417
+ }
418
+ const dirs = entries.filter((e) => e.isDirectory()).map((e) => e.name);
419
+ if (dirs.length === 0) return null;
420
+ // Pick highest semver-ish lex sort fallback. CC keeps the active version
421
+ // path under the plugin's cache; if multiple versions linger, the lexically
422
+ // largest is usually the latest installed.
423
+ dirs.sort();
424
+ return path.join(pluginCacheRoot, dirs[dirs.length - 1]);
425
+ }
426
+
427
+ function countSkillsInDir(rootDir) {
428
+ // Walk subdirs once looking for SKILL.md / skill.md.
429
+ let count = 0;
430
+ const stack = [rootDir];
431
+ while (stack.length > 0) {
432
+ const dir = stack.pop();
433
+ let entries;
434
+ try {
435
+ entries = fssync.readdirSync(dir, { withFileTypes: true });
436
+ } catch (_e) {
437
+ continue;
438
+ }
439
+ for (const e of entries) {
440
+ if (!e.isDirectory()) continue;
441
+ const sub = path.join(dir, e.name);
442
+ if (fileExists(path.join(sub, "SKILL.md")) || fileExists(path.join(sub, "skill.md"))) {
443
+ count += 1;
444
+ } else {
445
+ stack.push(sub);
446
+ }
447
+ }
448
+ }
449
+ return count;
450
+ }
451
+
452
+ function countAgentMarkdowns(rootDir) {
453
+ let count = 0;
454
+ const stack = [rootDir];
455
+ while (stack.length > 0) {
456
+ const dir = stack.pop();
457
+ let entries;
458
+ try {
459
+ entries = fssync.readdirSync(dir, { withFileTypes: true });
460
+ } catch (_e) {
461
+ continue;
462
+ }
463
+ for (const e of entries) {
464
+ const fp = path.join(dir, e.name);
465
+ if (e.isDirectory()) stack.push(fp);
466
+ else if (e.isFile() && e.name.endsWith(".md")) count += 1;
467
+ }
468
+ }
469
+ return count;
470
+ }
471
+
472
+ function listEnabledPlugins() {
473
+ const home = os.homedir();
474
+ // settings.local.json overrides settings.json (CC's normal precedence).
475
+ const baseMap = safeReadJson(path.join(home, ".claude", "settings.json"))?.enabledPlugins || {};
476
+ const localMap = safeReadJson(path.join(home, ".claude", "settings.local.json"))?.enabledPlugins || {};
477
+ const merged = { ...baseMap, ...localMap };
478
+ return Object.entries(merged)
479
+ .filter(([, enabled]) => enabled === true)
480
+ .map(([key]) => key);
481
+ }
482
+
483
+ function getConfiguredResources({ projectDir = null } = {}) {
484
+ const home = os.homedir();
485
+ const claudeRoot = path.join(home, ".claude");
486
+ const cacheRoot = path.join(claudeRoot, "plugins", "cache");
487
+
488
+ // --- Skills ------------------------------------------------------------
489
+ let skillsCount = countSkillsInDir(path.join(claudeRoot, "skills"));
490
+ if (projectDir) {
491
+ skillsCount += countSkillsInDir(path.join(projectDir, ".claude", "skills"));
492
+ }
493
+
494
+ // --- Custom agents -----------------------------------------------------
495
+ let agentsCount = countAgentMarkdowns(path.join(claudeRoot, "agents"));
496
+ if (projectDir) {
497
+ agentsCount += countAgentMarkdowns(path.join(projectDir, ".claude", "agents"));
498
+ }
499
+
500
+ // --- MCP servers -------------------------------------------------------
501
+ // Primary: ~/.claude.json (single dot-json — CC's main config), NOT
502
+ // ~/.claude/settings.json (which holds GUI toggles, not MCP).
503
+ let mcpCount = 0;
504
+ const claudeJson = safeReadJson(path.join(home, ".claude.json"));
505
+ if (claudeJson?.mcpServers && typeof claudeJson.mcpServers === "object") {
506
+ mcpCount += Object.keys(claudeJson.mcpServers).length;
507
+ }
508
+ if (projectDir) {
509
+ const projectMcp = safeReadJson(path.join(projectDir, ".mcp.json"));
510
+ if (projectMcp?.mcpServers && typeof projectMcp.mcpServers === "object") {
511
+ mcpCount += Object.keys(projectMcp.mcpServers).length;
512
+ }
513
+ }
514
+
515
+ // --- Plugin contributions (enabled plugins only) -----------------------
516
+ // Plugin caches live at ~/.claude/plugins/cache/<owner>/<plugin>/<version>/
517
+ // and contribute skills, agents, and mcpServers (declared in plugin.json).
518
+ for (const pluginKey of listEnabledPlugins()) {
519
+ // pluginKey is "name@marketplace" (e.g., "claude-mem@thedotmack").
520
+ const [name, marketplace] = pluginKey.split("@");
521
+ if (!name || !marketplace) continue;
522
+ const pluginRoot = path.join(cacheRoot, marketplace, name);
523
+ const versionDir = findLatestPluginVersionDir(pluginRoot);
524
+ if (!versionDir) continue;
525
+ skillsCount += countSkillsInDir(path.join(versionDir, "skills"));
526
+ agentsCount += countAgentMarkdowns(path.join(versionDir, "agents"));
527
+ const pluginManifest = safeReadJson(path.join(versionDir, ".claude-plugin", "plugin.json"));
528
+ if (pluginManifest?.mcpServers && typeof pluginManifest.mcpServers === "object") {
529
+ mcpCount += Object.keys(pluginManifest.mcpServers).length;
530
+ }
531
+ }
532
+
533
+ // --- Memory files (CLAUDE.md + transitive @-imports) -------------------
534
+ const memorySeen = new Set();
535
+ const userMd = path.join(claudeRoot, "CLAUDE.md");
536
+ const homeMd = path.join(home, "CLAUDE.md");
537
+ if (fileExists(userMd)) collectMemoryImports(userMd, memorySeen);
538
+ if (fileExists(homeMd) && fssync.statSync(homeMd).size > 0) collectMemoryImports(homeMd, memorySeen);
539
+ // Walk up from projectDir to find the closest CLAUDE.md (CC walks up too).
540
+ // Handles dev servers running from a subdir (e.g. vite from dashboard/).
541
+ if (projectDir) {
542
+ let cursor = projectDir;
543
+ for (let i = 0; i < 8; i++) {
544
+ const candidate = path.join(cursor, "CLAUDE.md");
545
+ if (fileExists(candidate)) {
546
+ collectMemoryImports(candidate, memorySeen);
547
+ break;
548
+ }
549
+ const parent = path.dirname(cursor);
550
+ if (parent === cursor) break;
551
+ cursor = parent;
552
+ }
553
+ }
554
+
555
+ return {
556
+ skills_count: skillsCount,
557
+ custom_agents_count: agentsCount,
558
+ memory_files_count: memorySeen.size,
559
+ mcp_servers_count: mcpCount,
560
+ };
561
+ }
562
+
563
+ function unsupportedSourcePayload(source) {
564
+ return {
565
+ source,
566
+ scope: "unsupported",
567
+ totals: emptyTotals(),
568
+ categories: CATEGORY_KEYS.map((key) => ({
569
+ key,
570
+ totals: emptyTotals(),
571
+ percent: 0,
572
+ })),
573
+ session_count: 0,
574
+ message_count: 0,
575
+ };
576
+ }
577
+
578
+ // ---------------------------------------------------------------------------
579
+ // Ground-truth bucket aggregator for queue.jsonl repair.
580
+ //
581
+ // `sync.js` historically used a stateful incremental pipeline (cursor offsets
582
+ // + persisted hash set) and the `reincludeClaudeMemObserverFiles` migration
583
+ // shipped 3 versions, each of which reset the hash set and re-read observer
584
+ // jsonls. Result: queue.jsonl ended up with ~+40% extra Claude tokens that
585
+ // never actually existed.
586
+ //
587
+ // This function is the source-of-truth replacement: scan every Claude jsonl,
588
+ // dedup messages by (msgId, requestId) globally — same algorithm ccusage
589
+ // uses — and emit one record per (model, hour_start) bucket. Callers (sync's
590
+ // repair migration) write these as authoritative rows to queue.jsonl,
591
+ // overwriting whatever was there for source=claude.
592
+ // ---------------------------------------------------------------------------
593
+
594
+ function bucketAccumulator() {
595
+ return {
596
+ input_tokens: 0,
597
+ cached_input_tokens: 0,
598
+ cache_creation_input_tokens: 0,
599
+ output_tokens: 0,
600
+ reasoning_output_tokens: 0,
601
+ total_tokens: 0,
602
+ conversation_count: 0,
603
+ };
604
+ }
605
+
606
+ function toUtcHalfHourStart(ts) {
607
+ const dt = new Date(ts);
608
+ if (!Number.isFinite(dt.getTime())) return null;
609
+ const minutes = dt.getUTCMinutes();
610
+ const halfMinute = minutes >= 30 ? 30 : 0;
611
+ return new Date(
612
+ Date.UTC(
613
+ dt.getUTCFullYear(),
614
+ dt.getUTCMonth(),
615
+ dt.getUTCDate(),
616
+ dt.getUTCHours(),
617
+ halfMinute,
618
+ 0,
619
+ 0,
620
+ ),
621
+ ).toISOString();
622
+ }
623
+
624
+ async function computeClaudeGroundTruthBuckets({ rootDir = null } = {}) {
625
+ const root = rootDir || defaultClaudeProjectsDir();
626
+ const files = listSessionFiles(root);
627
+ const buckets = new Map(); // `${model}|${hourStart}` → totals
628
+ const seenHashes = new Set();
629
+ const userMessageBuckets = new Map(); // for conversation_count tracking
630
+
631
+ for (const fp of files) {
632
+ let stream;
633
+ try {
634
+ stream = fssync.createReadStream(fp, { encoding: "utf8" });
635
+ } catch (_e) {
636
+ continue;
637
+ }
638
+ const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
639
+ const isMainSession = !fp.includes("/subagents/");
640
+
641
+ for await (const line of rl) {
642
+ if (!line) continue;
643
+
644
+ // Conversation count = main-session user messages with text content
645
+ // (matches what parseClaudeFile in rollout.js does).
646
+ if (isMainSession && line.includes('"type":"user"')) {
647
+ let userObj;
648
+ try {
649
+ userObj = JSON.parse(line);
650
+ } catch (_e) {
651
+ /* skip */
652
+ }
653
+ if (userObj?.type === "user") {
654
+ const content = userObj?.message?.content;
655
+ const hasText =
656
+ typeof content === "string" ||
657
+ (Array.isArray(content) && content.some((b) => b?.type === "text"));
658
+ if (hasText) {
659
+ const ts = typeof userObj?.timestamp === "string" ? userObj.timestamp : null;
660
+ const hourStart = ts ? toUtcHalfHourStart(ts) : null;
661
+ if (hourStart) {
662
+ const k = `unknown|${hourStart}`;
663
+ userMessageBuckets.set(k, (userMessageBuckets.get(k) || 0) + 1);
664
+ }
665
+ }
666
+ }
667
+ }
668
+
669
+ if (!line.includes('"usage"')) continue;
670
+ let obj;
671
+ try {
672
+ obj = JSON.parse(line);
673
+ } catch (_e) {
674
+ continue;
675
+ }
676
+ const usage = obj?.message?.usage;
677
+ if (!usage || typeof usage !== "object") continue;
678
+
679
+ const msgId = obj?.message?.id;
680
+ const reqId = obj?.requestId;
681
+ if (msgId && reqId) {
682
+ const hash = `${msgId}:${reqId}`;
683
+ if (seenHashes.has(hash)) continue;
684
+ seenHashes.add(hash);
685
+ }
686
+
687
+ const model = (obj?.message?.model || obj?.model || "unknown").trim() || "unknown";
688
+ const ts = typeof obj?.timestamp === "string" ? obj.timestamp : null;
689
+ const hourStart = ts ? toUtcHalfHourStart(ts) : null;
690
+ if (!hourStart) continue;
691
+
692
+ const inputTok = Math.max(0, Number(usage.input_tokens || 0));
693
+ const cacheRead = Math.max(0, Number(usage.cache_read_input_tokens || 0));
694
+ const cacheCreate = Math.max(0, Number(usage.cache_creation_input_tokens || 0));
695
+ const outputTok = Math.max(0, Number(usage.output_tokens || 0));
696
+ const reasoningTok = Math.max(0, Number(usage.reasoning_output_tokens || 0));
697
+ const total = inputTok + cacheRead + cacheCreate + outputTok;
698
+
699
+ const key = `${model}|${hourStart}`;
700
+ let acc = buckets.get(key);
701
+ if (!acc) {
702
+ acc = bucketAccumulator();
703
+ buckets.set(key, acc);
704
+ }
705
+ acc.input_tokens += inputTok;
706
+ acc.cached_input_tokens += cacheRead;
707
+ acc.cache_creation_input_tokens += cacheCreate;
708
+ acc.output_tokens += outputTok;
709
+ acc.reasoning_output_tokens += reasoningTok;
710
+ acc.total_tokens += total;
711
+ }
712
+ rl.close();
713
+ stream.close?.();
714
+ }
715
+
716
+ // Stitch user-message conversation counts onto the unknown-model bucket
717
+ // for the same hour (matches rollout.js behavior — user messages are
718
+ // counted under DEFAULT_MODEL because they have no model field).
719
+ for (const [key, count] of userMessageBuckets) {
720
+ let acc = buckets.get(key);
721
+ if (!acc) {
722
+ acc = bucketAccumulator();
723
+ buckets.set(key, acc);
724
+ }
725
+ acc.conversation_count += count;
726
+ }
727
+
728
+ const out = [];
729
+ for (const [key, totals] of buckets) {
730
+ const sep = key.indexOf("|");
731
+ const model = key.slice(0, sep);
732
+ const hourStart = key.slice(sep + 1);
733
+ out.push({
734
+ source: "claude",
735
+ model,
736
+ hour_start: hourStart,
737
+ ...totals,
738
+ billable_total_tokens: totals.total_tokens,
739
+ });
740
+ }
741
+ return {
742
+ rows: out,
743
+ seenHashes: Array.from(seenHashes),
744
+ fileList: files,
745
+ };
746
+ }
747
+
748
+ module.exports = {
749
+ CATEGORY_KEYS,
750
+ computeClaudeCategoryBreakdown,
751
+ computeClaudeGroundTruthBuckets,
752
+ unsupportedSourcePayload,
753
+ getConfiguredResources,
754
+ // Exported for tests
755
+ splitOutputByContent,
756
+ classifyOneMessage,
757
+ emptyTotals,
758
+ emptyCategoryMap,
759
+ };