@vibe-cafe/vibe-usage 0.7.16 → 0.7.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/parsers/codex.js +131 -34
package/package.json
CHANGED
package/src/parsers/codex.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { createReadStream, readdirSync, existsSync } from 'node:fs';
|
|
2
2
|
import { join } from 'node:path';
|
|
3
3
|
import { homedir } from 'node:os';
|
|
4
|
+
import { createInterface } from 'node:readline';
|
|
4
5
|
import { aggregateToBuckets, extractSessions } from './index.js';
|
|
5
6
|
|
|
6
7
|
const SESSIONS_DIR = join(homedir(), '.codex', 'sessions');
|
|
@@ -27,6 +28,57 @@ function findJsonlFiles(dir) {
|
|
|
27
28
|
return results;
|
|
28
29
|
}
|
|
29
30
|
|
|
31
|
+
function readLines(filePath) {
|
|
32
|
+
return createInterface({
|
|
33
|
+
input: createReadStream(filePath, { encoding: 'utf-8' }),
|
|
34
|
+
crlfDelay: Infinity,
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function extractProject(meta) {
|
|
39
|
+
if (meta.git?.repository_url) {
|
|
40
|
+
// e.g. https://github.com/org/repo.git → org/repo
|
|
41
|
+
const match = meta.git.repository_url.match(/([^/]+\/[^/]+?)(?:\.git)?$/);
|
|
42
|
+
if (match) return match[1];
|
|
43
|
+
}
|
|
44
|
+
if (meta.cwd) return meta.cwd.split('/').pop() || 'unknown';
|
|
45
|
+
return 'unknown';
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Stream a session file once and extract its index metadata: the session
|
|
50
|
+
* id, the forked-from id, the project name, and the total count of
|
|
51
|
+
* `event_msg/token_count` records. The token_count total is used to size
|
|
52
|
+
* the replayed-history block of a forked session — a fork copies the
|
|
53
|
+
* original conversation verbatim, so it begins with exactly as many
|
|
54
|
+
* token_count records as the source session has in total.
|
|
55
|
+
*/
|
|
56
|
+
async function indexSessionFile(filePath) {
|
|
57
|
+
let sessionId = null;
|
|
58
|
+
let forkedFromId = null;
|
|
59
|
+
let sessionProject = 'unknown';
|
|
60
|
+
let tokenCountRecords = 0;
|
|
61
|
+
|
|
62
|
+
for await (const line of readLines(filePath)) {
|
|
63
|
+
if (!line.trim()) continue;
|
|
64
|
+
try {
|
|
65
|
+
const obj = JSON.parse(line);
|
|
66
|
+
if (obj.type === 'session_meta' && obj.payload) {
|
|
67
|
+
const meta = obj.payload;
|
|
68
|
+
sessionId = meta.id || sessionId;
|
|
69
|
+
forkedFromId = meta.forked_from_id || null;
|
|
70
|
+
sessionProject = extractProject(meta);
|
|
71
|
+
} else if (obj.type === 'event_msg' && obj.payload?.type === 'token_count') {
|
|
72
|
+
tokenCountRecords++;
|
|
73
|
+
}
|
|
74
|
+
} catch {
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return { sessionId, forkedFromId, sessionProject, tokenCountRecords };
|
|
80
|
+
}
|
|
81
|
+
|
|
30
82
|
export async function parse() {
|
|
31
83
|
if (!existsSync(SESSIONS_DIR)) return { buckets: [], sessions: [] };
|
|
32
84
|
|
|
@@ -34,55 +86,88 @@ export async function parse() {
|
|
|
34
86
|
const sessionEvents = [];
|
|
35
87
|
const files = findJsonlFiles(SESSIONS_DIR);
|
|
36
88
|
if (files.length === 0) return { buckets: [], sessions: [] };
|
|
37
|
-
for (const filePath of files) {
|
|
38
89
|
|
|
39
|
-
|
|
90
|
+
// Pass 1: index every session by its UUID and count its token_count
|
|
91
|
+
// records. A forked session (session_meta.payload.forked_from_id) starts
|
|
92
|
+
// with the original conversation replayed verbatim — including every
|
|
93
|
+
// token_count, all timestamped in a burst at the fork instant. Those
|
|
94
|
+
// tokens are already counted from the original session's own file, so
|
|
95
|
+
// re-counting them here double-counts usage and produces a spurious
|
|
96
|
+
// token/cost spike at the fork time. Timestamps cannot distinguish the
|
|
97
|
+
// replay from new activity (the replay burst is stamped at/after the fork
|
|
98
|
+
// instant, within the same 1–3s window), so we instead skip exactly the
|
|
99
|
+
// original session's token_count count from the start of each fork.
|
|
100
|
+
const tokenCountById = new Map(); // sessionId → number of token_count records
|
|
101
|
+
const fileMeta = new Map(); // filePath -> { forkedFromId, sessionProject }
|
|
102
|
+
for (const filePath of files) {
|
|
103
|
+
let meta;
|
|
40
104
|
try {
|
|
41
|
-
|
|
105
|
+
meta = await indexSessionFile(filePath);
|
|
42
106
|
} catch {
|
|
43
107
|
continue;
|
|
44
108
|
}
|
|
109
|
+
fileMeta.set(filePath, meta);
|
|
110
|
+
if (meta.sessionId) {
|
|
111
|
+
tokenCountById.set(meta.sessionId, meta.tokenCountRecords);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
45
114
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
}
|
|
65
|
-
} catch { break; }
|
|
115
|
+
// Pass 2: parse usage, skipping each fork's replayed-history token_counts.
|
|
116
|
+
for (const filePath of files) {
|
|
117
|
+
const fm = fileMeta.get(filePath);
|
|
118
|
+
if (!fm) continue;
|
|
119
|
+
const { forkedFromId } = fm;
|
|
120
|
+
|
|
121
|
+
// How many leading token_count records are copied history. A fork's file
|
|
122
|
+
// begins with the *entire* source file replayed verbatim, so the count
|
|
123
|
+
// to skip is the source's total token_count count. This is correct even
|
|
124
|
+
// for chained forks: a fork-of-a-fork replays the parent fork's whole
|
|
125
|
+
// file (which itself already contains the grandparent's replay), so
|
|
126
|
+
// skipping the parent's full count skips exactly the duplicated region.
|
|
127
|
+
// If the source file is missing (rotated/deleted) we cannot locate the
|
|
128
|
+
// boundary; skip nothing so incomplete data over-counts rather than
|
|
129
|
+
// silently dropping real usage.
|
|
130
|
+
let replayTokenCountToSkip = 0;
|
|
131
|
+
if (forkedFromId != null) {
|
|
132
|
+
replayTokenCountToSkip = tokenCountById.get(forkedFromId) ?? 0;
|
|
66
133
|
}
|
|
134
|
+
let tokenCountSeen = 0;
|
|
135
|
+
|
|
136
|
+
const sessionProject = fm.sessionProject;
|
|
67
137
|
|
|
68
138
|
let turnContextModel = 'unknown';
|
|
69
139
|
const prevTotal = new Map();
|
|
70
|
-
for (const line of
|
|
140
|
+
for await (const line of readLines(filePath)) {
|
|
71
141
|
if (!line.trim()) continue;
|
|
72
142
|
try {
|
|
73
143
|
const obj = JSON.parse(line);
|
|
74
144
|
|
|
145
|
+
// A fork's replayed-history block is the run from the start of the
|
|
146
|
+
// file up to and including the Nth token_count, where N is the source
|
|
147
|
+
// session's total token_count count. We are still inside that block
|
|
148
|
+
// until we have *passed* the Nth token_count. (token_count is the
|
|
149
|
+
// last event of each turn, so the boundary lands cleanly at a turn
|
|
150
|
+
// edge — the new conversation's events come strictly after it.)
|
|
151
|
+
const inReplayBlock = tokenCountSeen < replayTokenCountToSkip;
|
|
152
|
+
|
|
75
153
|
if (obj.timestamp) {
|
|
76
154
|
const evTs = new Date(obj.timestamp);
|
|
77
155
|
if (!isNaN(evTs.getTime())) {
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
156
|
+
// Skip replayed history events so a forked session's
|
|
157
|
+
// duration/active-time/message counts reflect only the new
|
|
158
|
+
// conversation, not the copied original. session_meta itself is
|
|
159
|
+
// kept: it marks when the fork actually started.
|
|
160
|
+
const isReplay = inReplayBlock && obj.type !== 'session_meta';
|
|
161
|
+
if (!isReplay) {
|
|
162
|
+
const isUserTurn = obj.type === 'turn_context' || obj.type === 'session_meta';
|
|
163
|
+
sessionEvents.push({
|
|
164
|
+
sessionId: filePath,
|
|
165
|
+
source: 'codex',
|
|
166
|
+
project: sessionProject,
|
|
167
|
+
timestamp: evTs,
|
|
168
|
+
role: isUserTurn ? 'user' : 'assistant',
|
|
169
|
+
});
|
|
170
|
+
}
|
|
86
171
|
}
|
|
87
172
|
}
|
|
88
173
|
|
|
@@ -104,6 +189,14 @@ export async function parse() {
|
|
|
104
189
|
const timestamp = obj.timestamp ? new Date(obj.timestamp) : null;
|
|
105
190
|
if (!timestamp || isNaN(timestamp.getTime())) continue;
|
|
106
191
|
|
|
192
|
+
// This is the (tokenCountSeen+1)-th token_count in the file. If it
|
|
193
|
+
// falls inside the fork's replay block it's an exact copy of a record
|
|
194
|
+
// already counted from the source session's own file — skip it (but
|
|
195
|
+
// still advance the cumulative-total baseline below so the first real
|
|
196
|
+
// post-fork delta is measured correctly).
|
|
197
|
+
const isReplayedHistory = tokenCountSeen < replayTokenCountToSkip;
|
|
198
|
+
tokenCountSeen++;
|
|
199
|
+
|
|
107
200
|
// Prefer incremental per-request usage; compute delta from cumulative total as fallback
|
|
108
201
|
let usage = info.last_token_usage;
|
|
109
202
|
if (!usage && info.total_token_usage) {
|
|
@@ -121,11 +214,15 @@ export async function parse() {
|
|
|
121
214
|
// First cumulative entry — use as-is (it's the first event's total)
|
|
122
215
|
usage = curr;
|
|
123
216
|
}
|
|
217
|
+
// Always advance the cumulative baseline, even for replayed history,
|
|
218
|
+
// so the first real post-fork delta is measured against the last
|
|
219
|
+
// replayed total instead of being mistaken for a fresh "first entry".
|
|
124
220
|
prevTotal.set(totalKey, { ...curr });
|
|
125
221
|
}
|
|
126
222
|
if (!usage) continue;
|
|
223
|
+
if (isReplayedHistory) continue;
|
|
127
224
|
|
|
128
|
-
const model = info.model || payload.model || turnContextModel ||
|
|
225
|
+
const model = info.model || payload.model || turnContextModel || 'unknown';
|
|
129
226
|
|
|
130
227
|
// OpenAI API: input_tokens INCLUDES cached, output_tokens INCLUDES reasoning.
|
|
131
228
|
// Normalize to Anthropic-style semantics where each field is non-overlapping.
|