@vibe-cafe/vibe-usage 0.7.16 → 0.7.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/parsers/codex.js +131 -34
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vibe-cafe/vibe-usage",
3
- "version": "0.7.16",
3
+ "version": "0.7.18",
4
4
  "description": "Track your AI coding tool token usage and sync to vibecafe.ai",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,6 +1,7 @@
1
- import { readdirSync, readFileSync, statSync, existsSync } from 'node:fs';
1
+ import { createReadStream, readdirSync, existsSync } from 'node:fs';
2
2
  import { join } from 'node:path';
3
3
  import { homedir } from 'node:os';
4
+ import { createInterface } from 'node:readline';
4
5
  import { aggregateToBuckets, extractSessions } from './index.js';
5
6
 
6
7
  const SESSIONS_DIR = join(homedir(), '.codex', 'sessions');
@@ -27,6 +28,57 @@ function findJsonlFiles(dir) {
27
28
  return results;
28
29
  }
29
30
 
31
+ function readLines(filePath) {
32
+ return createInterface({
33
+ input: createReadStream(filePath, { encoding: 'utf-8' }),
34
+ crlfDelay: Infinity,
35
+ });
36
+ }
37
+
38
+ function extractProject(meta) {
39
+ if (meta.git?.repository_url) {
40
+ // e.g. https://github.com/org/repo.git → org/repo
41
+ const match = meta.git.repository_url.match(/([^/]+\/[^/]+?)(?:\.git)?$/);
42
+ if (match) return match[1];
43
+ }
44
+ if (meta.cwd) return meta.cwd.split('/').pop() || 'unknown';
45
+ return 'unknown';
46
+ }
47
+
48
+ /**
49
+ * Stream a session file once and extract its index metadata: the session
50
+ * id, the forked-from id, the project name, and the total count of
51
+ * `event_msg/token_count` records. The token_count total is used to size
52
+ * the replayed-history block of a forked session — a fork copies the
53
+ * original conversation verbatim, so it begins with exactly as many
54
+ * token_count records as the source session has in total.
55
+ */
56
+ async function indexSessionFile(filePath) {
57
+ let sessionId = null;
58
+ let forkedFromId = null;
59
+ let sessionProject = 'unknown';
60
+ let tokenCountRecords = 0;
61
+
62
+ for await (const line of readLines(filePath)) {
63
+ if (!line.trim()) continue;
64
+ try {
65
+ const obj = JSON.parse(line);
66
+ if (obj.type === 'session_meta' && obj.payload) {
67
+ const meta = obj.payload;
68
+ sessionId = meta.id || sessionId;
69
+ forkedFromId = meta.forked_from_id || null;
70
+ sessionProject = extractProject(meta);
71
+ } else if (obj.type === 'event_msg' && obj.payload?.type === 'token_count') {
72
+ tokenCountRecords++;
73
+ }
74
+ } catch {
75
+ continue;
76
+ }
77
+ }
78
+
79
+ return { sessionId, forkedFromId, sessionProject, tokenCountRecords };
80
+ }
81
+
30
82
  export async function parse() {
31
83
  if (!existsSync(SESSIONS_DIR)) return { buckets: [], sessions: [] };
32
84
 
@@ -34,55 +86,88 @@ export async function parse() {
34
86
  const sessionEvents = [];
35
87
  const files = findJsonlFiles(SESSIONS_DIR);
36
88
  if (files.length === 0) return { buckets: [], sessions: [] };
37
- for (const filePath of files) {
38
89
 
39
- let content;
90
+ // Pass 1: index every session by its UUID and count its token_count
91
+ // records. A forked session (session_meta.payload.forked_from_id) starts
92
+ // with the original conversation replayed verbatim — including every
93
+ // token_count, all timestamped in a burst at the fork instant. Those
94
+ // tokens are already counted from the original session's own file, so
95
+ // re-counting them here double-counts usage and produces a spurious
96
+ // token/cost spike at the fork time. Timestamps cannot distinguish the
97
+ // replay from new activity (the replay burst is stamped at/after the fork
98
+ // instant, within the same 1–3s window), so we instead skip exactly the
99
+ // original session's token_count count from the start of each fork.
100
+ const tokenCountById = new Map(); // sessionId → number of token_count records
101
+ const fileMeta = new Map(); // filePath -> { forkedFromId, sessionProject }
102
+ for (const filePath of files) {
103
+ let meta;
40
104
  try {
41
- content = readFileSync(filePath, 'utf-8');
105
+ meta = await indexSessionFile(filePath);
42
106
  } catch {
43
107
  continue;
44
108
  }
109
+ fileMeta.set(filePath, meta);
110
+ if (meta.sessionId) {
111
+ tokenCountById.set(meta.sessionId, meta.tokenCountRecords);
112
+ }
113
+ }
45
114
 
46
- // Extract project name and model from session_meta line
47
- let sessionProject = 'unknown';
48
- let sessionModel = 'unknown';
49
- for (const line of content.split('\n')) {
50
- if (!line.trim()) continue;
51
- try {
52
- const obj = JSON.parse(line);
53
- if (obj.type === 'session_meta' && obj.payload) {
54
- const meta = obj.payload;
55
- if (meta.cwd) {
56
- sessionProject = meta.cwd.split('/').pop() || 'unknown';
57
- }
58
- if (meta.git?.repository_url) {
59
- // e.g. https://github.com/org/repo.git org/repo
60
- const match = meta.git.repository_url.match(/([^/]+\/[^/]+?)(?:\.git)?$/);
61
- if (match) sessionProject = match[1];
62
- }
63
- break;
64
- }
65
- } catch { break; }
115
+ // Pass 2: parse usage, skipping each fork's replayed-history token_counts.
116
+ for (const filePath of files) {
117
+ const fm = fileMeta.get(filePath);
118
+ if (!fm) continue;
119
+ const { forkedFromId } = fm;
120
+
121
+ // How many leading token_count records are copied history. A fork's file
122
+ // begins with the *entire* source file replayed verbatim, so the count
123
+ // to skip is the source's total token_count count. This is correct even
124
+ // for chained forks: a fork-of-a-fork replays the parent fork's whole
125
+ // file (which itself already contains the grandparent's replay), so
126
+ // skipping the parent's full count skips exactly the duplicated region.
127
+ // If the source file is missing (rotated/deleted) we cannot locate the
128
+ // boundary; skip nothing so incomplete data over-counts rather than
129
+ // silently dropping real usage.
130
+ let replayTokenCountToSkip = 0;
131
+ if (forkedFromId != null) {
132
+ replayTokenCountToSkip = tokenCountById.get(forkedFromId) ?? 0;
66
133
  }
134
+ let tokenCountSeen = 0;
135
+
136
+ const sessionProject = fm.sessionProject;
67
137
 
68
138
  let turnContextModel = 'unknown';
69
139
  const prevTotal = new Map();
70
- for (const line of content.split('\n')) {
140
+ for await (const line of readLines(filePath)) {
71
141
  if (!line.trim()) continue;
72
142
  try {
73
143
  const obj = JSON.parse(line);
74
144
 
145
+ // A fork's replayed-history block is the run from the start of the
146
+ // file up to and including the Nth token_count, where N is the source
147
+ // session's total token_count count. We are still inside that block
148
+ // until we have *passed* the Nth token_count. (token_count is the
149
+ // last event of each turn, so the boundary lands cleanly at a turn
150
+ // edge — the new conversation's events come strictly after it.)
151
+ const inReplayBlock = tokenCountSeen < replayTokenCountToSkip;
152
+
75
153
  if (obj.timestamp) {
76
154
  const evTs = new Date(obj.timestamp);
77
155
  if (!isNaN(evTs.getTime())) {
78
- const isUserTurn = obj.type === 'turn_context' || obj.type === 'session_meta';
79
- sessionEvents.push({
80
- sessionId: filePath,
81
- source: 'codex',
82
- project: sessionProject,
83
- timestamp: evTs,
84
- role: isUserTurn ? 'user' : 'assistant',
85
- });
156
+ // Skip replayed history events so a forked session's
157
+ // duration/active-time/message counts reflect only the new
158
+ // conversation, not the copied original. session_meta itself is
159
+ // kept: it marks when the fork actually started.
160
+ const isReplay = inReplayBlock && obj.type !== 'session_meta';
161
+ if (!isReplay) {
162
+ const isUserTurn = obj.type === 'turn_context' || obj.type === 'session_meta';
163
+ sessionEvents.push({
164
+ sessionId: filePath,
165
+ source: 'codex',
166
+ project: sessionProject,
167
+ timestamp: evTs,
168
+ role: isUserTurn ? 'user' : 'assistant',
169
+ });
170
+ }
86
171
  }
87
172
  }
88
173
 
@@ -104,6 +189,14 @@ export async function parse() {
104
189
  const timestamp = obj.timestamp ? new Date(obj.timestamp) : null;
105
190
  if (!timestamp || isNaN(timestamp.getTime())) continue;
106
191
 
192
+ // This is the (tokenCountSeen+1)-th token_count in the file. If it
193
+ // falls inside the fork's replay block it's an exact copy of a record
194
+ // already counted from the source session's own file — skip it (but
195
+ // still advance the cumulative-total baseline below so the first real
196
+ // post-fork delta is measured correctly).
197
+ const isReplayedHistory = tokenCountSeen < replayTokenCountToSkip;
198
+ tokenCountSeen++;
199
+
107
200
  // Prefer incremental per-request usage; compute delta from cumulative total as fallback
108
201
  let usage = info.last_token_usage;
109
202
  if (!usage && info.total_token_usage) {
@@ -121,11 +214,15 @@ export async function parse() {
121
214
  // First cumulative entry — use as-is (it's the first event's total)
122
215
  usage = curr;
123
216
  }
217
+ // Always advance the cumulative baseline, even for replayed history,
218
+ // so the first real post-fork delta is measured against the last
219
+ // replayed total instead of being mistaken for a fresh "first entry".
124
220
  prevTotal.set(totalKey, { ...curr });
125
221
  }
126
222
  if (!usage) continue;
223
+ if (isReplayedHistory) continue;
127
224
 
128
- const model = info.model || payload.model || turnContextModel || sessionModel;
225
+ const model = info.model || payload.model || turnContextModel || 'unknown';
129
226
 
130
227
  // OpenAI API: input_tokens INCLUDES cached, output_tokens INCLUDES reasoning.
131
228
  // Normalize to Anthropic-style semantics where each field is non-overlapping.