vibeusage 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,154 @@
1
+ "use strict";
2
+
3
+ const fs = require("node:fs");
4
+ const path = require("node:path");
5
+
6
+ /**
7
+ * Gemini audit strategy.
8
+ *
9
+ * Gemini writes one JSON per session under
10
+ * ~/.gemini/tmp/<hash>/chats/session-YYYY-MM-DDTHH-MM-<id>.json
11
+ * The file has `{ messages: [ { role, timestamp, model, tokens } ] }` where
12
+ * `tokens` is the cumulative usage up to that message (not a per-turn delta).
13
+ *
14
+ * Channel semantics differ from Claude but match Codex in one important way:
15
+ * input + cached + output + tool + thoughts != total
16
+ * because `tokens.total` is the authoritative upstream count that
17
+ * src/lib/rollout.js normalizeGeminiTokens passes through as-is to the DB.
18
+ * Naively summing the five sub-channels double-counts. As with the Codex
19
+ * strategy, we route `delta.total` into the output channel and zero the rest
20
+ * so the framework's sum-of-channels row.truth equals the DB total_tokens
21
+ * without exposing Gemini's internal breakdown through the generic contract.
22
+ *
23
+ * Dedupe:
24
+ * - Per-file index diff mirrors parseGeminiFile's `lastTotals` state.
25
+ * - When `tokens.total` drops (session reset / resume), we treat the current
26
+ * cumulative as the delta just like the parser does.
27
+ */
28
+
29
+ module.exports = {
30
+ id: "gemini",
31
+ displayName: "Gemini CLI",
32
+ sessionRoot({ home, env }) {
33
+ const base = (env && env.GEMINI_HOME) || path.join(home, ".gemini");
34
+ return path.join(base, "tmp");
35
+ },
36
+ walkSessions({ root }) {
37
+ if (!fs.existsSync(root)) return [];
38
+ const out = [];
39
+ for (const hash of safeReadDirSync(root)) {
40
+ if (!hash.isDirectory()) continue;
41
+ const chatsDir = path.join(root, hash.name, "chats");
42
+ for (const f of safeReadDirSync(chatsDir)) {
43
+ if (!f.isFile()) continue;
44
+ if (!f.name.startsWith("session-") || !f.name.endsWith(".json")) continue;
45
+ out.push(path.join(chatsDir, f.name));
46
+ }
47
+ }
48
+ return out;
49
+ },
50
+ *iterateRecords(filePath) {
51
+ let raw;
52
+ try {
53
+ raw = fs.readFileSync(filePath, "utf8");
54
+ } catch (_err) {
55
+ return;
56
+ }
57
+ if (!raw.trim()) return;
58
+ let session;
59
+ try {
60
+ session = JSON.parse(raw);
61
+ } catch (_err) {
62
+ return;
63
+ }
64
+ const messages = Array.isArray(session?.messages) ? session.messages : [];
65
+ let prevTotals = null;
66
+ for (const msg of messages) {
67
+ if (!msg || typeof msg !== "object") continue;
68
+ const ts = typeof msg.timestamp === "string" ? msg.timestamp : null;
69
+ if (!ts) continue;
70
+ const tokens = msg.tokens;
71
+ if (!tokens || typeof tokens !== "object") continue;
72
+
73
+ const curr = normalizeTokens(tokens);
74
+ const delta = diffTotals(curr, prevTotals);
75
+ prevTotals = curr;
76
+ if (!delta || !delta.total) continue;
77
+
78
+ yield {
79
+ line: JSON.stringify({ timestamp: ts, delta }),
80
+ context: { filePath },
81
+ };
82
+ }
83
+ },
84
+ extractUsage(line) {
85
+ if (!line) return null;
86
+ let obj;
87
+ try {
88
+ obj = JSON.parse(line);
89
+ } catch (_err) {
90
+ return null;
91
+ }
92
+ const ts = typeof obj.timestamp === "string" ? obj.timestamp : null;
93
+ const d = obj.delta;
94
+ if (!ts || !d || !Number(d.total)) return null;
95
+ return {
96
+ timestamp: ts,
97
+ dedupeId: null, // per-file index diff already dedupes
98
+ channels: {
99
+ input: 0,
100
+ cache_creation: 0,
101
+ cache_read: 0,
102
+ // Route the authoritative upstream total into a single channel; see
103
+ // module docstring for why we do not split it.
104
+ output: Number(d.total),
105
+ reasoning: 0,
106
+ },
107
+ };
108
+ },
109
+ };
110
+
111
+ function normalizeTokens(tokens) {
112
+ return {
113
+ input: nonneg(tokens.input),
114
+ cached: nonneg(tokens.cached),
115
+ output: nonneg(tokens.output),
116
+ tool: nonneg(tokens.tool),
117
+ thoughts: nonneg(tokens.thoughts),
118
+ total: nonneg(tokens.total),
119
+ };
120
+ }
121
+
122
+ function diffTotals(curr, prev) {
123
+ if (!curr) return null;
124
+ if (!prev) {
125
+ // First message with tokens — the whole cumulative value is the delta.
126
+ return curr;
127
+ }
128
+ // Session reset: upstream total decreased (resume / new session). Trust the
129
+ // new value as the full delta.
130
+ if (curr.total < prev.total) return curr;
131
+ const delta = {
132
+ input: Math.max(0, curr.input - prev.input),
133
+ cached: Math.max(0, curr.cached - prev.cached),
134
+ output: Math.max(0, curr.output - prev.output),
135
+ tool: Math.max(0, curr.tool - prev.tool),
136
+ thoughts: Math.max(0, curr.thoughts - prev.thoughts),
137
+ total: Math.max(0, curr.total - prev.total),
138
+ };
139
+ return delta;
140
+ }
141
+
142
+ function nonneg(v) {
143
+ const n = Number(v);
144
+ if (!Number.isFinite(n) || n < 0) return 0;
145
+ return Math.floor(n);
146
+ }
147
+
148
+ function safeReadDirSync(p) {
149
+ try {
150
+ return fs.readdirSync(p, { withFileTypes: true });
151
+ } catch (_err) {
152
+ return [];
153
+ }
154
+ }
@@ -0,0 +1,69 @@
1
+ "use strict";
2
+
3
+ const fs = require("node:fs");
4
+ const path = require("node:path");
5
+
6
+ /**
7
+ * Hermes audit strategy.
8
+ *
9
+ * Hermes does not write raw session logs; it emits one pre-aggregated event
10
+ * per turn into the vibeusage tracker directory:
11
+ * ~/.vibeusage/tracker/hermes.usage.jsonl
12
+ * Each line is a `{type: "usage", emitted_at, model, input_tokens,
13
+ * output_tokens, cache_read_tokens, cache_write_tokens, reasoning_tokens,
14
+ * total_tokens}` record. src/commands/sync.js parseHermesUsageLedger already
15
+ * copies `total_tokens` straight into the bucket, so this audit routes the
16
+ * upstream total into the output channel — same pattern we use for Codex and
17
+ * Gemini.
18
+ *
19
+ * sessionRoot: the tracker directory (NOT `~/.hermes/...` — Hermes usage data
20
+ * lives under ~/.vibeusage/tracker because Hermes is a plugin that hands
21
+ * vibeusage ledger rows directly).
22
+ */
23
+
24
+ module.exports = {
25
+ id: "hermes",
26
+ displayName: "Hermes Plugin",
27
+ sessionRoot({ home, env }) {
28
+ const base = (env && env.VIBEUSAGE_HOME) || path.join(home, ".vibeusage");
29
+ return path.join(base, "tracker");
30
+ },
31
+ walkSessions({ root }) {
32
+ const ledger = path.join(root, "hermes.usage.jsonl");
33
+ if (!fs.existsSync(ledger)) return [];
34
+ return [ledger];
35
+ },
36
+ extractUsage(line) {
37
+ if (!line) return null;
38
+ let event;
39
+ try {
40
+ event = JSON.parse(line);
41
+ } catch (_err) {
42
+ return null;
43
+ }
44
+ if (!event || event.type !== "usage") return null;
45
+ const timestamp = typeof event.emitted_at === "string" ? event.emitted_at : null;
46
+ if (!timestamp) return null;
47
+ const total = nonneg(event.total_tokens);
48
+ if (total === 0) return null;
49
+ return {
50
+ timestamp,
51
+ // Hermes ledger records do not carry a stable per-event id;
52
+ // the ledger is append-only and duplicates are prevented at write time.
53
+ dedupeId: null,
54
+ channels: {
55
+ input: 0,
56
+ cache_creation: 0,
57
+ cache_read: 0,
58
+ output: total, // route authoritative upstream total here
59
+ reasoning: 0,
60
+ },
61
+ };
62
+ },
63
+ };
64
+
65
+ function nonneg(v) {
66
+ const n = Number(v);
67
+ if (!Number.isFinite(n) || n < 0) return 0;
68
+ return Math.floor(n);
69
+ }
@@ -0,0 +1,105 @@
1
+ "use strict";
2
+
3
+ const fs = require("node:fs");
4
+ const path = require("node:path");
5
+
6
+ /**
7
+ * Kimi audit strategy.
8
+ *
9
+ * Kimi CLI writes one wire log per session:
10
+ * ~/.kimi/sessions/<project>/<sessionId>/wire.jsonl
11
+ * Each StatusUpdate line carries the delta for one Anthropic-compatible
12
+ * message:
13
+ * { timestamp: <unix_seconds float>,
14
+ * message: { type: "StatusUpdate",
15
+ * payload: { message_id, token_usage: {
16
+ * input_other, input_cache_creation,
17
+ * input_cache_read, output } } } }
18
+ *
19
+ * Channel mapping lines up with src/lib/rollout.js normalizeKimiUsage so the
20
+ * framework's sum-of-channels row.truth equals the DB total_tokens:
21
+ * input = input_other + input_cache_creation
22
+ * cache_read = input_cache_read
23
+ * output = output
24
+ * (cache_creation, reasoning) = 0 (already folded into input / n/a)
25
+ * total = input + cache_read + output
26
+ *
27
+ * Dedupe key: payload.message_id (chatcmpl-…). Kimi does not currently
28
+ * duplicate rows the way Claude Code does, but keying on message_id is
29
+ * free insurance and matches the AGENTS.md intake checklist.
30
+ */
31
+
32
+ module.exports = {
33
+ id: "kimi",
34
+ displayName: "Kimi CLI",
35
+ sessionRoot({ home, env }) {
36
+ const base = (env && env.KIMI_HOME) || path.join(home, ".kimi");
37
+ return path.join(base, "sessions");
38
+ },
39
+ walkSessions({ root }) {
40
+ if (!fs.existsSync(root)) return [];
41
+ const out = [];
42
+ for (const proj of safeReadDirSync(root)) {
43
+ if (!proj.isDirectory()) continue;
44
+ const projDir = path.join(root, proj.name);
45
+ for (const session of safeReadDirSync(projDir)) {
46
+ if (!session.isDirectory()) continue;
47
+ const wire = path.join(projDir, session.name, "wire.jsonl");
48
+ if (!fs.existsSync(wire)) continue;
49
+ out.push(wire);
50
+ }
51
+ }
52
+ return out;
53
+ },
54
+ extractUsage(line) {
55
+ if (!line || !line.includes("StatusUpdate")) return null;
56
+ let obj;
57
+ try {
58
+ obj = JSON.parse(line);
59
+ } catch (_err) {
60
+ return null;
61
+ }
62
+ if (obj?.message?.type !== "StatusUpdate") return null;
63
+ const payload = obj.message.payload;
64
+ const tokens = payload?.token_usage;
65
+ if (!tokens || typeof tokens !== "object") return null;
66
+ const timestamp = unixSecondsToIso(obj.timestamp);
67
+ if (!timestamp) return null;
68
+ return {
69
+ timestamp,
70
+ dedupeId: typeof payload.message_id === "string" && payload.message_id
71
+ ? payload.message_id
72
+ : null,
73
+ channels: {
74
+ input: nonneg(tokens.input_other) + nonneg(tokens.input_cache_creation),
75
+ cache_creation: 0, // already folded into input per normalizeKimiUsage
76
+ cache_read: nonneg(tokens.input_cache_read),
77
+ output: nonneg(tokens.output),
78
+ reasoning: 0,
79
+ },
80
+ };
81
+ },
82
+ };
83
+
84
+ function unixSecondsToIso(value) {
85
+ const n = Number(value);
86
+ if (!Number.isFinite(n) || n <= 0) return null;
87
+ const ms = n < 1e12 ? Math.floor(n * 1000) : Math.floor(n);
88
+ const d = new Date(ms);
89
+ if (Number.isNaN(d.getTime())) return null;
90
+ return d.toISOString();
91
+ }
92
+
93
+ function nonneg(v) {
94
+ const n = Number(v);
95
+ if (!Number.isFinite(n) || n < 0) return 0;
96
+ return Math.floor(n);
97
+ }
98
+
99
+ function safeReadDirSync(p) {
100
+ try {
101
+ return fs.readdirSync(p, { withFileTypes: true });
102
+ } catch (_err) {
103
+ return [];
104
+ }
105
+ }
@@ -0,0 +1,64 @@
1
+ "use strict";
2
+
3
+ const fs = require("node:fs");
4
+ const path = require("node:path");
5
+
6
+ /**
7
+ * OpenClaw audit strategy.
8
+ *
9
+ * Like Hermes, OpenClaw hands vibeusage pre-aggregated ledger rows instead of
10
+ * raw session logs:
11
+ * ~/.vibeusage/tracker/openclaw-usage-ledger.jsonl
12
+ * Each line is a camelCase event
13
+ * { eventId, emittedAt, source, model, inputTokens, cachedInputTokens,
14
+ * outputTokens, reasoningOutputTokens, totalTokens }
15
+ * src/commands/sync.js parseOpenclawSanitizedLedger copies `totalTokens`
16
+ * straight into the bucket, so this audit routes the upstream total into the
17
+ * output channel. Dedupe is keyed on eventId, which the ledger writer
18
+ * already enforces uniqueness of.
19
+ */
20
+
21
+ module.exports = {
22
+ id: "openclaw",
23
+ displayName: "OpenClaw Plugin",
24
+ sessionRoot({ home, env }) {
25
+ const base = (env && env.VIBEUSAGE_HOME) || path.join(home, ".vibeusage");
26
+ return path.join(base, "tracker");
27
+ },
28
+ walkSessions({ root }) {
29
+ const ledger = path.join(root, "openclaw-usage-ledger.jsonl");
30
+ if (!fs.existsSync(ledger)) return [];
31
+ return [ledger];
32
+ },
33
+ extractUsage(line) {
34
+ if (!line) return null;
35
+ let event;
36
+ try {
37
+ event = JSON.parse(line);
38
+ } catch (_err) {
39
+ return null;
40
+ }
41
+ if (!event || typeof event !== "object") return null;
42
+ const timestamp = typeof event.emittedAt === "string" ? event.emittedAt : null;
43
+ if (!timestamp) return null;
44
+ const total = nonneg(event.totalTokens);
45
+ if (total === 0) return null;
46
+ return {
47
+ timestamp,
48
+ dedupeId: typeof event.eventId === "string" && event.eventId ? event.eventId : null,
49
+ channels: {
50
+ input: 0,
51
+ cache_creation: 0,
52
+ cache_read: 0,
53
+ output: total,
54
+ reasoning: 0,
55
+ },
56
+ };
57
+ },
58
+ };
59
+
60
+ function nonneg(v) {
61
+ const n = Number(v);
62
+ if (!Number.isFinite(n) || n < 0) return 0;
63
+ return Math.floor(n);
64
+ }
@@ -0,0 +1,100 @@
1
+ "use strict";
2
+
3
+ const fs = require("node:fs");
4
+ const path = require("node:path");
5
+
6
+ /**
7
+ * OpenCode audit strategy.
8
+ *
9
+ * OpenCode persists one JSON per assistant message under
10
+ * ~/.local/share/opencode/storage/message/ses_<session>/msg_<id>.json
11
+ * Each file looks like:
12
+ * {
13
+ * role: "assistant",
14
+ * id: "msg_...",
15
+ * modelID: "...",
16
+ * tokens: { input, output, reasoning, cache: { read, write } },
17
+ * time: { created, completed }
18
+ * }
19
+ *
20
+ * Channel mapping matches src/lib/rollout.js normalizeOpencodeTokens so the
21
+ * audit's truth sum equals what the parser emits into vibeusage_tracker_hourly
22
+ * (post PR #153, which added cache.read to total):
23
+ * total = input + cache.write + cache.read + output + reasoning
24
+ *
25
+ * Notes:
26
+ * - OPENCODE_HOME / XDG_DATA_HOME env vars override the default root (matches
27
+ * the same logic used by src/commands/sync.js).
28
+ * - Only assistant messages carry tokens; user messages return null from
29
+ * extractUsage so the generic runner skips them.
30
+ * - New OpenCode installs may persist into opencode.db (sqlite) instead of
31
+ * these JSON files. The audit reports no-local-sessions in that case;
32
+ * users can dump the same rows to a JSON file and feed --db-json to
33
+ * compare via the backend path.
34
+ */
35
+
36
+ module.exports = {
37
+ id: "opencode",
38
+ displayName: "OpenCode",
39
+ sessionRoot({ home, env }) {
40
+ const xdg = env.XDG_DATA_HOME || path.join(home, ".local", "share");
41
+ const opencodeHome = env.OPENCODE_HOME || path.join(xdg, "opencode");
42
+ return path.join(opencodeHome, "storage", "message");
43
+ },
44
+ walkSessions({ root }) {
45
+ if (!fs.existsSync(root)) return [];
46
+ const out = [];
47
+ for (const entry of fs.readdirSync(root, { withFileTypes: true })) {
48
+ if (!entry.isDirectory()) continue;
49
+ const dir = path.join(root, entry.name);
50
+ for (const f of fs.readdirSync(dir, { withFileTypes: true })) {
51
+ if (!f.isFile()) continue;
52
+ if (!f.name.startsWith("msg_") || !f.name.endsWith(".json")) continue;
53
+ out.push(path.join(dir, f.name));
54
+ }
55
+ }
56
+ return out;
57
+ },
58
+ // OpenCode is one JSON per file (not JSONL). Yield the whole file body as a
59
+ // single "line" so extractUsage can JSON.parse it uniformly with the
60
+ // line-based contract.
61
+ *iterateRecords(filePath) {
62
+ let text;
63
+ try {
64
+ text = fs.readFileSync(filePath, "utf8");
65
+ } catch (_err) {
66
+ return;
67
+ }
68
+ if (!text.trim()) return;
69
+ yield { line: text, context: { filePath } };
70
+ },
71
+ extractUsage(line) {
72
+ if (!line) return null;
73
+ let obj;
74
+ try {
75
+ obj = JSON.parse(line);
76
+ } catch (_err) {
77
+ return null;
78
+ }
79
+ if (obj?.role !== "assistant") return null;
80
+ const tokens = obj.tokens;
81
+ if (!tokens || typeof tokens !== "object") return null;
82
+ const completed = obj?.time?.completed;
83
+ const created = obj?.time?.created;
84
+ const epochMs = typeof completed === "number" ? completed : typeof created === "number" ? created : null;
85
+ if (!epochMs || !Number.isFinite(epochMs)) return null;
86
+
87
+ const cache = tokens.cache && typeof tokens.cache === "object" ? tokens.cache : {};
88
+ return {
89
+ timestamp: new Date(epochMs).toISOString(),
90
+ dedupeId: typeof obj.id === "string" && obj.id ? obj.id : null,
91
+ channels: {
92
+ input: tokens.input,
93
+ cache_creation: cache.write,
94
+ cache_read: cache.read,
95
+ output: tokens.output,
96
+ reasoning: tokens.reasoning,
97
+ },
98
+ };
99
+ },
100
+ };
@@ -214,6 +214,7 @@ async function parseClaudeIncremental({
214
214
  await ensureDir(path.dirname(queuePath));
215
215
  let filesProcessed = 0;
216
216
  let eventsAggregated = 0;
217
+ let dedupSkipped = 0;
217
218
 
218
219
  const cb = typeof onProgress === "function" ? onProgress : null;
219
220
  const files = Array.isArray(projectFiles) ? projectFiles : [];
@@ -246,6 +247,8 @@ async function parseClaudeIncremental({
246
247
  const prev = cursors.files[key] || null;
247
248
  const inode = st.ino || 0;
248
249
  const startOffset = prev && prev.inode === inode ? prev.offset || 0 : 0;
250
+ const priorSeenIds =
251
+ prev && prev.inode === inode && Array.isArray(prev.seenIds) ? prev.seenIds : [];
249
252
 
250
253
  const projectContext = projectEnabled
251
254
  ? await resolveProjectContextForFile({
@@ -269,16 +272,19 @@ async function parseClaudeIncremental({
269
272
  projectTouchedBuckets,
270
273
  projectRef,
271
274
  projectKey,
275
+ priorSeenIds,
272
276
  });
273
277
 
274
278
  cursors.files[key] = {
275
279
  inode,
276
280
  offset: result.endOffset,
281
+ seenIds: result.seenIds,
277
282
  updatedAt: new Date().toISOString(),
278
283
  };
279
284
 
280
285
  filesProcessed += 1;
281
286
  eventsAggregated += result.eventsAggregated;
287
+ dedupSkipped += result.dedupSkipped || 0;
282
288
 
283
289
  if (cb) {
284
290
  cb({
@@ -303,7 +309,13 @@ async function parseClaudeIncremental({
303
309
  cursors.projectHourly = projectState;
304
310
  }
305
311
 
306
- return { filesProcessed, eventsAggregated, bucketsQueued, projectBucketsQueued };
312
+ return {
313
+ filesProcessed,
314
+ eventsAggregated,
315
+ bucketsQueued,
316
+ projectBucketsQueued,
317
+ dedupSkipped,
318
+ };
307
319
  }
308
320
 
309
321
  async function parseGeminiIncremental({
@@ -778,6 +790,8 @@ async function parseRolloutFile({
778
790
  return { endOffset, lastTotal: totals, lastModel: model, eventsAggregated };
779
791
  }
780
792
 
793
+ const CLAUDE_SEEN_IDS_LIMIT = 500;
794
+
781
795
  async function parseClaudeFile({
782
796
  filePath,
783
797
  startOffset,
@@ -788,17 +802,25 @@ async function parseClaudeFile({
788
802
  projectTouchedBuckets,
789
803
  projectRef,
790
804
  projectKey,
805
+ priorSeenIds,
791
806
  }) {
807
+ const seenOrder = Array.isArray(priorSeenIds) ? priorSeenIds.slice() : [];
808
+ const seenSet = new Set(seenOrder);
809
+
792
810
  const st = await fs.stat(filePath).catch(() => null);
793
- if (!st || !st.isFile()) return { endOffset: startOffset, eventsAggregated: 0 };
811
+ if (!st || !st.isFile()) {
812
+ return { endOffset: startOffset, eventsAggregated: 0, dedupSkipped: 0, seenIds: seenOrder };
813
+ }
794
814
 
795
815
  const endOffset = st.size;
796
- if (startOffset >= endOffset) return { endOffset, eventsAggregated: 0 };
816
+ if (startOffset >= endOffset)
817
+ return { endOffset, eventsAggregated: 0, dedupSkipped: 0, seenIds: seenOrder };
797
818
 
798
819
  const stream = fssync.createReadStream(filePath, { encoding: "utf8", start: startOffset });
799
820
  const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
800
821
 
801
822
  let eventsAggregated = 0;
823
+ let dedupSkipped = 0;
802
824
  for await (const line of rl) {
803
825
  if (!line || !line.includes('\"usage\"')) continue;
804
826
  let obj;
@@ -811,6 +833,15 @@ async function parseClaudeFile({
811
833
  const usage = obj?.message?.usage || obj?.usage;
812
834
  if (!usage || typeof usage !== "object") continue;
813
835
 
836
+ // Claude Code writes the same assistant message multiple times in the session log
837
+ // (same `message.id` / `requestId`, different outer `uuid`). Aggregate once per
838
+ // upstream Anthropic response to avoid multi-counting token usage.
839
+ const dedupeId = obj?.message?.id || obj?.requestId || null;
840
+ if (dedupeId && seenSet.has(dedupeId)) {
841
+ dedupSkipped += 1;
842
+ continue;
843
+ }
844
+
814
845
  const model = normalizeModelInput(obj?.message?.model || obj?.model) || DEFAULT_MODEL;
815
846
  const tokenTimestamp = typeof obj?.timestamp === "string" ? obj.timestamp : null;
816
847
  if (!tokenTimestamp) continue;
@@ -835,12 +866,20 @@ async function parseClaudeFile({
835
866
  addTotals(projectBucket.totals, delta);
836
867
  projectTouchedBuckets.add(projectBucketKey(projectKey, source, bucketStart));
837
868
  }
869
+ if (dedupeId) {
870
+ seenSet.add(dedupeId);
871
+ seenOrder.push(dedupeId);
872
+ }
838
873
  eventsAggregated += 1;
839
874
  }
840
875
 
841
876
  rl.close();
842
877
  stream.close?.();
843
- return { endOffset, eventsAggregated };
878
+ const trimmedSeenIds =
879
+ seenOrder.length > CLAUDE_SEEN_IDS_LIMIT
880
+ ? seenOrder.slice(seenOrder.length - CLAUDE_SEEN_IDS_LIMIT)
881
+ : seenOrder;
882
+ return { endOffset, eventsAggregated, dedupSkipped, seenIds: trimmedSeenIds };
844
883
  }
845
884
 
846
885
  async function parseKimiFile({
@@ -2181,7 +2220,10 @@ function normalizeOpencodeTokens(tokens) {
2181
2220
  const cached = toNonNegativeInt(tokens.cache?.read);
2182
2221
  const cacheWrite = toNonNegativeInt(tokens.cache?.write);
2183
2222
  const inputTokens = input + cacheWrite;
2184
- const total = inputTokens + output + reasoning;
2223
+ // Include cache-read tokens in the total so OpenCode sessions do not
2224
+ // under-count the way Claude did before the parallel fix; cache-read is
2225
+ // real spend the user pays for on every turn.
2226
+ const total = inputTokens + cached + output + reasoning;
2185
2227
 
2186
2228
  return {
2187
2229
  input_tokens: inputTokens,
@@ -2304,12 +2346,19 @@ function normalizeUsage(u) {
2304
2346
  function normalizeClaudeUsage(u) {
2305
2347
  const inputTokens =
2306
2348
  toNonNegativeInt(u?.input_tokens) + toNonNegativeInt(u?.cache_creation_input_tokens);
2349
+ const cachedInputTokens = toNonNegativeInt(u?.cache_read_input_tokens);
2307
2350
  const outputTokens = toNonNegativeInt(u?.output_tokens);
2308
2351
  const hasTotal = u && Object.prototype.hasOwnProperty.call(u, "total_tokens");
2309
- const totalTokens = hasTotal ? toNonNegativeInt(u?.total_tokens) : inputTokens + outputTokens;
2352
+ // Claude's Messages API does not emit `total_tokens`. When absent, compose it
2353
+ // from all four channels (input / cache_creation / cache_read / output). The
2354
+ // old formula omitted cache_read, which is ~99% of token spend on long
2355
+ // Claude Opus sessions and was the main driver of user-visible under-counts.
2356
+ const totalTokens = hasTotal
2357
+ ? toNonNegativeInt(u?.total_tokens)
2358
+ : inputTokens + cachedInputTokens + outputTokens;
2310
2359
  return {
2311
2360
  input_tokens: inputTokens,
2312
- cached_input_tokens: toNonNegativeInt(u?.cache_read_input_tokens),
2361
+ cached_input_tokens: cachedInputTokens,
2313
2362
  output_tokens: outputTokens,
2314
2363
  reasoning_output_tokens: 0,
2315
2364
  total_tokens: totalTokens,
@@ -2421,4 +2470,13 @@ module.exports = {
2421
2470
  bucketKey,
2422
2471
  enqueueTouchedBuckets,
2423
2472
  toUtcHalfHourStart,
2473
+ // Exported for the token-conservation property test (see
2474
+ // test/parser-total-conservation.test.js and AGENTS.md "新 AI CLI Source
2475
+ // 接入 Checklist"). If you add a new normalize<Source>Usage function,
2476
+ // export it here so the conservation test covers it automatically.
2477
+ normalizeUsage,
2478
+ normalizeClaudeUsage,
2479
+ normalizeGeminiTokens,
2480
+ normalizeKimiUsage,
2481
+ normalizeOpencodeTokens,
2424
2482
  };