vibeusage 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/doctor.js +196 -2
- package/src/commands/sync.js +29 -1
- package/src/lib/ops/audit-claude.js +35 -0
- package/src/lib/ops/audit-source.js +399 -0
- package/src/lib/ops/sources/_rollout-base.js +203 -0
- package/src/lib/ops/sources/claude.js +52 -0
- package/src/lib/ops/sources/codex.js +10 -0
- package/src/lib/ops/sources/every-code.js +10 -0
- package/src/lib/ops/sources/gemini.js +154 -0
- package/src/lib/ops/sources/hermes.js +69 -0
- package/src/lib/ops/sources/kimi.js +105 -0
- package/src/lib/ops/sources/openclaw.js +64 -0
- package/src/lib/ops/sources/opencode.js +100 -0
- package/src/lib/rollout.js +27 -5
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* audit-source.js — generic ground-truth auditor.
|
|
5
|
+
*
|
|
6
|
+
* Every AI CLI source (claude, opencode, codex, gemini, kimi, ...) carries a
|
|
7
|
+
* different session-log layout and token schema, but the audit shape is the
|
|
8
|
+
* same: walk local sessions, dedup by upstream id, sum all channels per day,
|
|
9
|
+
* then compare against DB totals.
|
|
10
|
+
*
|
|
11
|
+
* This module captures that shape. Source-specific knowledge lives in
|
|
12
|
+
* src/lib/ops/sources/<id>.js as a `strategy` object (see CONTRACT below).
|
|
13
|
+
*
|
|
14
|
+
* CONTRACT (strategy shape):
|
|
15
|
+
* {
|
|
16
|
+
* id: "claude" | "opencode" | ...,
|
|
17
|
+
* displayName: "Claude Code",
|
|
18
|
+
* sessionRoot({ home, env }) -> absolute path,
|
|
19
|
+
* walkSessions({ root }) -> string[] // list of files/dbs to read
|
|
20
|
+
* extractUsage(line, context) -> null | {
|
|
21
|
+
* timestamp: "<ISO8601>",
|
|
22
|
+
* dedupeId: "<stable id>" | null,
|
|
23
|
+
* channels: { input, cache_creation, cache_read, output, reasoning }
|
|
24
|
+
* }
|
|
25
|
+
* // optional: skip the jsonl line-by-line reader if the source uses sqlite
|
|
26
|
+
* // and must iterate rows differently
|
|
27
|
+
* iterateRecords(filePath) -> iterable<{ line, context }>
|
|
28
|
+
* }
|
|
29
|
+
*
|
|
30
|
+
* Consumers:
|
|
31
|
+
* - doctor --audit-tokens --source <id>
|
|
32
|
+
* - scripts/ops/compare-<source>-ground-truth.cjs (thin CLI wrappers)
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
const fs = require("node:fs");
|
|
36
|
+
const os = require("node:os");
|
|
37
|
+
const { spawnSync } = require("node:child_process");
|
|
38
|
+
|
|
39
|
+
const DEFAULT_DAYS = 14;
|
|
40
|
+
const DEFAULT_THRESHOLD_PCT = 25;
|
|
41
|
+
|
|
42
|
+
// `resolveUserIdViaInsforge` and `queryDbTotalsViaInsforge` interpolate these
|
|
43
|
+
// values into a SQL string handed to `insforge db query` (argv, not shell, but
|
|
44
|
+
// the argv reaches a SQL executor with service-role authority). The three
|
|
45
|
+
// values are validated against strict whitelists so a malicious / typo flag
|
|
46
|
+
// like --user-id "foo'; DROP TABLE users; --" cannot reach the DB.
|
|
47
|
+
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
48
|
+
const SOURCE_ID_RE = /^[a-z][a-z0-9-]*$/;
|
|
49
|
+
const ISO_TIMESTAMP_RE =
|
|
50
|
+
/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z$/;
|
|
51
|
+
|
|
52
|
+
function runSourceAudit({
|
|
53
|
+
strategy,
|
|
54
|
+
days = DEFAULT_DAYS,
|
|
55
|
+
threshold = DEFAULT_THRESHOLD_PCT,
|
|
56
|
+
userId = null,
|
|
57
|
+
deviceId = null,
|
|
58
|
+
dbJsonPath = null,
|
|
59
|
+
dbJson = null,
|
|
60
|
+
home = os.homedir(),
|
|
61
|
+
env = process.env,
|
|
62
|
+
sessionRootOverride = null,
|
|
63
|
+
} = {}) {
|
|
64
|
+
if (!strategy || typeof strategy !== "object") {
|
|
65
|
+
throw new Error("runSourceAudit requires a strategy object");
|
|
66
|
+
}
|
|
67
|
+
for (const key of ["id", "sessionRoot", "walkSessions", "extractUsage"]) {
|
|
68
|
+
if (typeof strategy[key] !== "function" && typeof strategy[key] !== "string") {
|
|
69
|
+
throw new Error(`strategy.${key} is required`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
if (!Number.isFinite(days) || days <= 0) {
|
|
73
|
+
throw new Error(`days must be a positive number, got ${days}`);
|
|
74
|
+
}
|
|
75
|
+
if (!Number.isFinite(threshold) || threshold < 0) {
|
|
76
|
+
throw new Error(`threshold must be non-negative, got ${threshold}`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const root = sessionRootOverride || strategy.sessionRoot({ home, env });
|
|
80
|
+
const now = new Date();
|
|
81
|
+
const start = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()));
|
|
82
|
+
start.setUTCDate(start.getUTCDate() - (days - 1));
|
|
83
|
+
const windowStartIso = start.toISOString();
|
|
84
|
+
const files = strategy.walkSessions({ root, windowStartIso });
|
|
85
|
+
if (!files || files.length === 0) {
|
|
86
|
+
return {
|
|
87
|
+
ok: false,
|
|
88
|
+
error: "no-local-sessions",
|
|
89
|
+
source: strategy.id,
|
|
90
|
+
message: `no local sessions for source=${strategy.id} under ${root}`,
|
|
91
|
+
rows: [],
|
|
92
|
+
maxDriftPct: 0,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const local = computeLocalTotals({ files, windowStartIso, strategy });
|
|
97
|
+
|
|
98
|
+
let backend;
|
|
99
|
+
if (dbJson) {
|
|
100
|
+
try {
|
|
101
|
+
backend = parseDbJson(dbJson);
|
|
102
|
+
} catch (err) {
|
|
103
|
+
return { ok: false, error: "db-json-parse", source: strategy.id, message: err.message, rows: [], maxDriftPct: 0 };
|
|
104
|
+
}
|
|
105
|
+
} else if (dbJsonPath) {
|
|
106
|
+
let blob;
|
|
107
|
+
try {
|
|
108
|
+
blob = fs.readFileSync(dbJsonPath, "utf8");
|
|
109
|
+
} catch (err) {
|
|
110
|
+
return {
|
|
111
|
+
ok: false,
|
|
112
|
+
error: "db-json-read-failed",
|
|
113
|
+
source: strategy.id,
|
|
114
|
+
message: `cannot read ${dbJsonPath}: ${err?.message || err}`,
|
|
115
|
+
rows: [],
|
|
116
|
+
maxDriftPct: 0,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
backend = parseDbJson(blob);
|
|
120
|
+
} else {
|
|
121
|
+
const resolvedUserId = userId || resolveUserIdViaInsforge({ deviceId });
|
|
122
|
+
if (!resolvedUserId) {
|
|
123
|
+
return {
|
|
124
|
+
ok: false,
|
|
125
|
+
error: "cannot-resolve-user-id",
|
|
126
|
+
source: strategy.id,
|
|
127
|
+
message:
|
|
128
|
+
"cannot resolve user_id; pass userId explicitly, supply dbJson, or make sure `insforge` CLI is linked to the vibeusage workspace and config.deviceId is set",
|
|
129
|
+
rows: [],
|
|
130
|
+
maxDriftPct: 0,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
const queryRes = queryDbTotalsViaInsforge({
|
|
134
|
+
userId: resolvedUserId,
|
|
135
|
+
source: strategy.id,
|
|
136
|
+
windowStartIso,
|
|
137
|
+
});
|
|
138
|
+
if (!queryRes.ok) {
|
|
139
|
+
return { ...queryRes, source: strategy.id, rows: [], maxDriftPct: 0 };
|
|
140
|
+
}
|
|
141
|
+
backend = queryRes.byDay;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const dayKeys = Array.from(new Set([...local.byDay.keys(), ...backend.keys()])).sort();
|
|
145
|
+
const rows = [];
|
|
146
|
+
let maxDriftPct = 0;
|
|
147
|
+
for (const day of dayKeys) {
|
|
148
|
+
const truth = (local.byDay.get(day) || { total: 0 }).total;
|
|
149
|
+
const dbTotal = backend.get(day) || 0;
|
|
150
|
+
const ratio = truth > 0 ? dbTotal / truth : null;
|
|
151
|
+
const drift = ratio == null ? null : Math.abs(ratio - 1) * 100;
|
|
152
|
+
if (drift != null && drift > maxDriftPct) maxDriftPct = drift;
|
|
153
|
+
rows.push({ day, truth, db: dbTotal, ratio, driftPct: drift });
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
ok: true,
|
|
158
|
+
source: strategy.id,
|
|
159
|
+
displayName: strategy.displayName || strategy.id,
|
|
160
|
+
windowStartIso,
|
|
161
|
+
days,
|
|
162
|
+
thresholdPct: threshold,
|
|
163
|
+
filesScanned: files.length,
|
|
164
|
+
usageLines: local.scanned,
|
|
165
|
+
uniqueMessages: local.uniqueIds,
|
|
166
|
+
duplicatesSkipped: local.skippedDup,
|
|
167
|
+
rows,
|
|
168
|
+
maxDriftPct: Number(maxDriftPct.toFixed(2)),
|
|
169
|
+
exceedsThreshold: maxDriftPct > threshold,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function computeLocalTotals({ files, windowStartIso, strategy }) {
|
|
174
|
+
const byDay = new Map();
|
|
175
|
+
const seen = new Set();
|
|
176
|
+
let scanned = 0;
|
|
177
|
+
let skippedDup = 0;
|
|
178
|
+
|
|
179
|
+
const records = typeof strategy.iterateRecords === "function"
|
|
180
|
+
? strategy.iterateRecords
|
|
181
|
+
: defaultIterateRecords;
|
|
182
|
+
|
|
183
|
+
for (const filePath of files) {
|
|
184
|
+
for (const { line, context } of records(filePath)) {
|
|
185
|
+
const extracted = strategy.extractUsage(line, context);
|
|
186
|
+
if (!extracted) continue;
|
|
187
|
+
const { timestamp, dedupeId, channels } = extracted;
|
|
188
|
+
if (!timestamp || timestamp < windowStartIso) continue;
|
|
189
|
+
const day = isoDay(timestamp);
|
|
190
|
+
if (!day) continue;
|
|
191
|
+
|
|
192
|
+
scanned += 1;
|
|
193
|
+
if (dedupeId && seen.has(dedupeId)) {
|
|
194
|
+
skippedDup += 1;
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
if (dedupeId) seen.add(dedupeId);
|
|
198
|
+
|
|
199
|
+
const input = nonneg(channels.input);
|
|
200
|
+
const cacheCreation = nonneg(channels.cache_creation);
|
|
201
|
+
const cacheRead = nonneg(channels.cache_read);
|
|
202
|
+
const output = nonneg(channels.output);
|
|
203
|
+
const reasoning = nonneg(channels.reasoning);
|
|
204
|
+
const total = input + cacheCreation + cacheRead + output + reasoning;
|
|
205
|
+
if (total === 0) continue;
|
|
206
|
+
|
|
207
|
+
let row = byDay.get(day);
|
|
208
|
+
if (!row) {
|
|
209
|
+
row = {
|
|
210
|
+
total: 0,
|
|
211
|
+
input: 0,
|
|
212
|
+
cache_creation: 0,
|
|
213
|
+
cache_read: 0,
|
|
214
|
+
output: 0,
|
|
215
|
+
reasoning: 0,
|
|
216
|
+
messages: 0,
|
|
217
|
+
};
|
|
218
|
+
byDay.set(day, row);
|
|
219
|
+
}
|
|
220
|
+
row.total += total;
|
|
221
|
+
row.input += input;
|
|
222
|
+
row.cache_creation += cacheCreation;
|
|
223
|
+
row.cache_read += cacheRead;
|
|
224
|
+
row.output += output;
|
|
225
|
+
row.reasoning += reasoning;
|
|
226
|
+
row.messages += 1;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return { byDay, scanned, skippedDup, uniqueIds: seen.size };
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function* defaultIterateRecords(filePath) {
|
|
234
|
+
let text;
|
|
235
|
+
try {
|
|
236
|
+
text = fs.readFileSync(filePath, "utf8");
|
|
237
|
+
} catch (_err) {
|
|
238
|
+
return;
|
|
239
|
+
}
|
|
240
|
+
for (const line of text.split("\n")) {
|
|
241
|
+
if (!line) continue;
|
|
242
|
+
yield { line, context: { filePath } };
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function isoDay(ts) {
|
|
247
|
+
if (typeof ts !== "string") return null;
|
|
248
|
+
const m = ts.match(/^(\d{4}-\d{2}-\d{2})/);
|
|
249
|
+
return m ? m[1] : null;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function nonneg(v) {
|
|
253
|
+
const n = Number(v);
|
|
254
|
+
if (!Number.isFinite(n) || n < 0) return 0;
|
|
255
|
+
return Math.floor(n);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function resolveUserIdViaInsforge({ deviceId }) {
|
|
259
|
+
if (!deviceId || !UUID_RE.test(String(deviceId))) return null;
|
|
260
|
+
const r = spawnSync(
|
|
261
|
+
"insforge",
|
|
262
|
+
[
|
|
263
|
+
"db",
|
|
264
|
+
"query",
|
|
265
|
+
`SELECT user_id FROM vibeusage_tracker_devices WHERE id='${deviceId}' LIMIT 1`,
|
|
266
|
+
],
|
|
267
|
+
{ encoding: "utf8" },
|
|
268
|
+
);
|
|
269
|
+
if (r.status !== 0) return null;
|
|
270
|
+
const m = (r.stdout || "").match(
|
|
271
|
+
/\b([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b/i,
|
|
272
|
+
);
|
|
273
|
+
return m ? m[1] : null;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function queryDbTotalsViaInsforge({ userId, source, windowStartIso }) {
|
|
277
|
+
if (!userId || !UUID_RE.test(String(userId))) {
|
|
278
|
+
return {
|
|
279
|
+
ok: false,
|
|
280
|
+
error: "invalid-user-id",
|
|
281
|
+
message: `refusing to query DB with non-UUID user id '${String(userId).slice(0, 40)}'`,
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
if (!source || !SOURCE_ID_RE.test(String(source))) {
|
|
285
|
+
return {
|
|
286
|
+
ok: false,
|
|
287
|
+
error: "invalid-source-id",
|
|
288
|
+
message: `refusing to query DB with non-identifier source '${String(source).slice(0, 40)}'`,
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
if (!windowStartIso || !ISO_TIMESTAMP_RE.test(String(windowStartIso))) {
|
|
292
|
+
return {
|
|
293
|
+
ok: false,
|
|
294
|
+
error: "invalid-window-start",
|
|
295
|
+
message: `refusing to query DB with non-ISO windowStartIso '${String(windowStartIso).slice(0, 40)}'`,
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
const sql =
|
|
299
|
+
`SELECT DATE(hour_start) AS day, SUM(total_tokens) AS tokens ` +
|
|
300
|
+
`FROM vibeusage_tracker_hourly ` +
|
|
301
|
+
`WHERE source='${source}' AND user_id='${userId}' AND hour_start >= '${windowStartIso}' ` +
|
|
302
|
+
`GROUP BY DATE(hour_start) ORDER BY day`;
|
|
303
|
+
const r = spawnSync("insforge", ["--json", "db", "query", sql], { encoding: "utf8" });
|
|
304
|
+
if (r.status !== 0) {
|
|
305
|
+
return {
|
|
306
|
+
ok: false,
|
|
307
|
+
error: "insforge-db-query-failed",
|
|
308
|
+
message:
|
|
309
|
+
`\`insforge db query\` failed (${r.status}). Run \`insforge current\` to confirm ` +
|
|
310
|
+
`the CLI is linked to the vibeusage workspace, or pass dbJson directly.`,
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
return { ok: true, byDay: parseDbJson(r.stdout) };
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
function parseDbJson(blob) {
|
|
317
|
+
let parsed;
|
|
318
|
+
if (typeof blob === "object" && blob !== null) {
|
|
319
|
+
parsed = blob;
|
|
320
|
+
} else {
|
|
321
|
+
try {
|
|
322
|
+
parsed = JSON.parse(blob);
|
|
323
|
+
} catch (err) {
|
|
324
|
+
throw new Error(`cannot parse DB JSON: ${err?.message || err}`);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
const rows = Array.isArray(parsed)
|
|
328
|
+
? parsed
|
|
329
|
+
: Array.isArray(parsed?.rows)
|
|
330
|
+
? parsed.rows
|
|
331
|
+
: Array.isArray(parsed?.data)
|
|
332
|
+
? parsed.data
|
|
333
|
+
: null;
|
|
334
|
+
if (!rows) {
|
|
335
|
+
throw new Error(
|
|
336
|
+
`DB JSON shape unexpected (need array of {day, tokens}); got ${
|
|
337
|
+
Object.keys(parsed || {}).join(",") || "(empty)"
|
|
338
|
+
}`,
|
|
339
|
+
);
|
|
340
|
+
}
|
|
341
|
+
const byDay = new Map();
|
|
342
|
+
for (const row of rows) {
|
|
343
|
+
const rawDay = row?.day ?? row?.date ?? row?.bucket_day;
|
|
344
|
+
const day = typeof rawDay === "string" ? rawDay.slice(0, 10) : null;
|
|
345
|
+
if (!day) continue;
|
|
346
|
+
const total = nonneg(row.tokens ?? row.total_tokens ?? row.total);
|
|
347
|
+
byDay.set(day, total);
|
|
348
|
+
}
|
|
349
|
+
return byDay;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Registry for doctor --audit-tokens --source routing.
|
|
353
|
+
// Register new strategies as they land in sources/<id>.js.
|
|
354
|
+
function getStrategy(id) {
|
|
355
|
+
switch (id) {
|
|
356
|
+
case "claude":
|
|
357
|
+
// eslint-disable-next-line global-require
|
|
358
|
+
return require("./sources/claude");
|
|
359
|
+
case "opencode":
|
|
360
|
+
// eslint-disable-next-line global-require
|
|
361
|
+
return require("./sources/opencode");
|
|
362
|
+
case "codex":
|
|
363
|
+
// eslint-disable-next-line global-require
|
|
364
|
+
return require("./sources/codex");
|
|
365
|
+
case "every-code":
|
|
366
|
+
// eslint-disable-next-line global-require
|
|
367
|
+
return require("./sources/every-code");
|
|
368
|
+
case "gemini":
|
|
369
|
+
// eslint-disable-next-line global-require
|
|
370
|
+
return require("./sources/gemini");
|
|
371
|
+
case "kimi":
|
|
372
|
+
// eslint-disable-next-line global-require
|
|
373
|
+
return require("./sources/kimi");
|
|
374
|
+
case "hermes":
|
|
375
|
+
// eslint-disable-next-line global-require
|
|
376
|
+
return require("./sources/hermes");
|
|
377
|
+
case "openclaw":
|
|
378
|
+
// eslint-disable-next-line global-require
|
|
379
|
+
return require("./sources/openclaw");
|
|
380
|
+
default:
|
|
381
|
+
return null;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function listRegisteredSources() {
|
|
386
|
+
return ["claude", "opencode", "codex", "every-code", "gemini", "kimi", "hermes", "openclaw"];
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
module.exports = {
|
|
390
|
+
DEFAULT_DAYS,
|
|
391
|
+
DEFAULT_THRESHOLD_PCT,
|
|
392
|
+
runSourceAudit,
|
|
393
|
+
getStrategy,
|
|
394
|
+
listRegisteredSources,
|
|
395
|
+
// Exported for targeted tests that assert the SQL-interpolation inputs are
|
|
396
|
+
// validated before reaching `insforge db query`.
|
|
397
|
+
queryDbTotalsViaInsforge,
|
|
398
|
+
resolveUserIdViaInsforge,
|
|
399
|
+
};
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const fs = require("node:fs");
|
|
4
|
+
const path = require("node:path");
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Shared strategy factory for Codex-family rollout audits.
|
|
8
|
+
*
|
|
9
|
+
* Codex and Every-Code write identical rollout .jsonl streams into
|
|
10
|
+
* <home>/<subdir>/sessions/<YYYY>/<MM>/<DD>/rollout-*.jsonl. The directory
|
|
11
|
+
* layout is the only difference (CODEX_HOME / CODE_HOME). Token accounting
|
|
12
|
+
* semantics differ from Claude's in two important ways:
|
|
13
|
+
*
|
|
14
|
+
* 1. Token events are `payload.type === "token_count"` rows that carry
|
|
15
|
+
* `info.total_token_usage` (cumulative) and `info.last_token_usage`
|
|
16
|
+
* (delta for the latest API call). Events with `info: null` are
|
|
17
|
+
* rate-limit-only pings and must be ignored.
|
|
18
|
+
*
|
|
19
|
+
* 2. `input_tokens` already includes cached input, and `output_tokens`
|
|
20
|
+
* already includes `reasoning_output_tokens`. Naively summing all five
|
|
21
|
+
* channels double-counts. The authoritative per-turn total is simply
|
|
22
|
+
* `total_tokens` on the upstream payload, which is what normalizeUsage
|
|
23
|
+
* passes through to the DB unchanged.
|
|
24
|
+
*
|
|
25
|
+
* Approach:
|
|
26
|
+
* - walkSessions prunes by YYYY/MM/DD directories before hitting the jsonl
|
|
27
|
+
* files so the auditor does not scan all ~240K Codex rollouts just to
|
|
28
|
+
* look at the last 14 days.
|
|
29
|
+
* - iterateRecords is stateful per file: it tracks the last seen
|
|
30
|
+
* total_token_usage and yields a synthetic delta object whenever the
|
|
31
|
+
* total changes (uses last_token_usage when available, otherwise the
|
|
32
|
+
* total_prev diff). Duplicate token_count rows with identical totals are
|
|
33
|
+
* skipped; that mirrors parseRolloutFile's pickDelta logic.
|
|
34
|
+
* - extractUsage routes the authoritative `total_tokens` number into the
|
|
35
|
+
* `output` channel and zeroes the rest. The framework sums the five
|
|
36
|
+
* channels to compute row.truth, so putting the whole total into one
|
|
37
|
+
* channel is a deliberate trick that keeps day totals correct without
|
|
38
|
+
* exposing Codex's overlapping channel semantics through the generic
|
|
39
|
+
* contract.
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
function makeRolloutStrategy({ id, displayName, envKey, defaultSubdir }) {
|
|
43
|
+
return {
|
|
44
|
+
id,
|
|
45
|
+
displayName,
|
|
46
|
+
sessionRoot({ home, env }) {
|
|
47
|
+
const base = (env && env[envKey]) || path.join(home, defaultSubdir);
|
|
48
|
+
return path.join(base, "sessions");
|
|
49
|
+
},
|
|
50
|
+
walkSessions({ root, windowStartIso }) {
|
|
51
|
+
if (!fs.existsSync(root)) return [];
|
|
52
|
+
// Rollout events written on day N can carry timestamps from day N-1
|
|
53
|
+
// (sessions straddle midnight). Keep directories starting one day
|
|
54
|
+
// before the window so we do not drop boundary events.
|
|
55
|
+
const bufferDay = shiftIsoDay(windowStartIso, -1);
|
|
56
|
+
const out = [];
|
|
57
|
+
for (const year of safeReadDirSync(root)) {
|
|
58
|
+
if (!year.isDirectory() || !/^\d{4}$/.test(year.name)) continue;
|
|
59
|
+
const yearDir = path.join(root, year.name);
|
|
60
|
+
for (const month of safeReadDirSync(yearDir)) {
|
|
61
|
+
if (!month.isDirectory() || !/^\d{2}$/.test(month.name)) continue;
|
|
62
|
+
const monthDir = path.join(yearDir, month.name);
|
|
63
|
+
for (const day of safeReadDirSync(monthDir)) {
|
|
64
|
+
if (!day.isDirectory() || !/^\d{2}$/.test(day.name)) continue;
|
|
65
|
+
if (bufferDay) {
|
|
66
|
+
const dayIso = `${year.name}-${month.name}-${day.name}`;
|
|
67
|
+
if (dayIso < bufferDay) continue;
|
|
68
|
+
}
|
|
69
|
+
const dayDir = path.join(monthDir, day.name);
|
|
70
|
+
for (const f of safeReadDirSync(dayDir)) {
|
|
71
|
+
if (!f.isFile()) continue;
|
|
72
|
+
if (!f.name.startsWith("rollout-") || !f.name.endsWith(".jsonl")) continue;
|
|
73
|
+
out.push(path.join(dayDir, f.name));
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return out;
|
|
79
|
+
},
|
|
80
|
+
*iterateRecords(filePath) {
|
|
81
|
+
let text;
|
|
82
|
+
try {
|
|
83
|
+
text = fs.readFileSync(filePath, "utf8");
|
|
84
|
+
} catch (_err) {
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
if (!text) return;
|
|
88
|
+
let prevTotal = null;
|
|
89
|
+
for (const line of text.split("\n")) {
|
|
90
|
+
if (!line || !line.includes("token_count")) continue;
|
|
91
|
+
let obj;
|
|
92
|
+
try {
|
|
93
|
+
obj = JSON.parse(line);
|
|
94
|
+
} catch (_err) {
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
const payload = obj?.payload;
|
|
98
|
+
if (!payload || payload.type !== "token_count") continue;
|
|
99
|
+
const info = payload.info;
|
|
100
|
+
if (!info) continue;
|
|
101
|
+
const total = info.total_token_usage || null;
|
|
102
|
+
const last = info.last_token_usage || null;
|
|
103
|
+
if (!total && !last) continue;
|
|
104
|
+
// Duplicate token_count: same totals, skip.
|
|
105
|
+
if (prevTotal && total && sameUsage(prevTotal, total)) continue;
|
|
106
|
+
let delta;
|
|
107
|
+
if (last && Number(last.total_tokens) > 0) {
|
|
108
|
+
delta = last;
|
|
109
|
+
} else if (prevTotal && total) {
|
|
110
|
+
delta = diffUsage(total, prevTotal);
|
|
111
|
+
} else if (total) {
|
|
112
|
+
delta = total;
|
|
113
|
+
} else {
|
|
114
|
+
delta = null;
|
|
115
|
+
}
|
|
116
|
+
if (total) prevTotal = total;
|
|
117
|
+
if (!delta || !Number(delta.total_tokens)) continue;
|
|
118
|
+
yield {
|
|
119
|
+
line: JSON.stringify({ timestamp: obj.timestamp, delta }),
|
|
120
|
+
context: { filePath },
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
extractUsage(line) {
|
|
125
|
+
if (!line) return null;
|
|
126
|
+
let obj;
|
|
127
|
+
try {
|
|
128
|
+
obj = JSON.parse(line);
|
|
129
|
+
} catch (_err) {
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
const ts = typeof obj.timestamp === "string" ? obj.timestamp : null;
|
|
133
|
+
const d = obj.delta;
|
|
134
|
+
if (!ts || !d) return null;
|
|
135
|
+
const totalTokens = Number(d.total_tokens);
|
|
136
|
+
if (!Number.isFinite(totalTokens) || totalTokens <= 0) return null;
|
|
137
|
+
return {
|
|
138
|
+
timestamp: ts,
|
|
139
|
+
dedupeId: null, // per-file dedup already done in iterateRecords
|
|
140
|
+
channels: {
|
|
141
|
+
input: 0,
|
|
142
|
+
cache_creation: 0,
|
|
143
|
+
cache_read: 0,
|
|
144
|
+
// Route the authoritative Codex upstream total into a single
|
|
145
|
+
// channel so the framework's sum-of-channels lands on it. See
|
|
146
|
+
// module docstring for why we do not split the channels.
|
|
147
|
+
output: totalTokens,
|
|
148
|
+
reasoning: 0,
|
|
149
|
+
},
|
|
150
|
+
};
|
|
151
|
+
},
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function safeReadDirSync(p) {
|
|
156
|
+
try {
|
|
157
|
+
return fs.readdirSync(p, { withFileTypes: true });
|
|
158
|
+
} catch (_err) {
|
|
159
|
+
return [];
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function sameUsage(a, b) {
|
|
164
|
+
if (!a || !b) return false;
|
|
165
|
+
for (const k of [
|
|
166
|
+
"input_tokens",
|
|
167
|
+
"cached_input_tokens",
|
|
168
|
+
"output_tokens",
|
|
169
|
+
"reasoning_output_tokens",
|
|
170
|
+
"total_tokens",
|
|
171
|
+
]) {
|
|
172
|
+
if (Number(a[k] || 0) !== Number(b[k] || 0)) return false;
|
|
173
|
+
}
|
|
174
|
+
return true;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function diffUsage(curr, prev) {
|
|
178
|
+
if (!curr || !prev) return curr || null;
|
|
179
|
+
const currTotal = Number(curr.total_tokens || 0);
|
|
180
|
+
const prevTotal = Number(prev.total_tokens || 0);
|
|
181
|
+
if (currTotal < prevTotal) return curr; // session reset
|
|
182
|
+
const out = {};
|
|
183
|
+
for (const k of [
|
|
184
|
+
"input_tokens",
|
|
185
|
+
"cached_input_tokens",
|
|
186
|
+
"output_tokens",
|
|
187
|
+
"reasoning_output_tokens",
|
|
188
|
+
"total_tokens",
|
|
189
|
+
]) {
|
|
190
|
+
out[k] = Math.max(0, Number(curr[k] || 0) - Number(prev[k] || 0));
|
|
191
|
+
}
|
|
192
|
+
return out;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function shiftIsoDay(iso, deltaDays) {
|
|
196
|
+
if (typeof iso !== "string" || !iso) return null;
|
|
197
|
+
const base = new Date(`${iso.slice(0, 10)}T00:00:00Z`);
|
|
198
|
+
if (Number.isNaN(base.getTime())) return null;
|
|
199
|
+
base.setUTCDate(base.getUTCDate() + deltaDays);
|
|
200
|
+
return base.toISOString().slice(0, 10);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
module.exports = { makeRolloutStrategy };
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const fs = require("node:fs");
|
|
4
|
+
const path = require("node:path");
|
|
5
|
+
|
|
6
|
+
module.exports = {
|
|
7
|
+
id: "claude",
|
|
8
|
+
displayName: "Claude Code",
|
|
9
|
+
sessionRoot({ home }) {
|
|
10
|
+
return path.join(home, ".claude", "projects");
|
|
11
|
+
},
|
|
12
|
+
walkSessions({ root }) {
|
|
13
|
+
if (!fs.existsSync(root)) return [];
|
|
14
|
+
const out = [];
|
|
15
|
+
for (const entry of fs.readdirSync(root, { withFileTypes: true })) {
|
|
16
|
+
if (!entry.isDirectory()) continue;
|
|
17
|
+
const dir = path.join(root, entry.name);
|
|
18
|
+
for (const f of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
19
|
+
if (!f.isFile()) continue;
|
|
20
|
+
if (!f.name.endsWith(".jsonl")) continue;
|
|
21
|
+
out.push(path.join(dir, f.name));
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return out;
|
|
25
|
+
},
|
|
26
|
+
extractUsage(line) {
|
|
27
|
+
if (!line || !line.includes('"usage"')) return null;
|
|
28
|
+
let obj;
|
|
29
|
+
try {
|
|
30
|
+
obj = JSON.parse(line);
|
|
31
|
+
} catch (_err) {
|
|
32
|
+
return null;
|
|
33
|
+
}
|
|
34
|
+
const msg = obj?.message || {};
|
|
35
|
+
const usage = msg.usage || obj.usage;
|
|
36
|
+
if (!usage || typeof usage !== "object") return null;
|
|
37
|
+
const timestamp = typeof obj.timestamp === "string" ? obj.timestamp : null;
|
|
38
|
+
if (!timestamp) return null;
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
timestamp,
|
|
42
|
+
dedupeId: msg.id || obj.requestId || null,
|
|
43
|
+
channels: {
|
|
44
|
+
input: usage.input_tokens,
|
|
45
|
+
cache_creation: usage.cache_creation_input_tokens,
|
|
46
|
+
cache_read: usage.cache_read_input_tokens,
|
|
47
|
+
output: usage.output_tokens,
|
|
48
|
+
reasoning: 0,
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
},
|
|
52
|
+
};
|