tokentracker-cli 0.5.76 → 0.5.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tokentracker-cli",
3
- "version": "0.5.76",
3
+ "version": "0.5.77",
4
4
  "description": "Token usage tracker for AI agent CLIs (Claude Code, Codex, Cursor, Kiro, Gemini, OpenCode, OpenClaw, Hermes, GitHub Copilot)",
5
5
  "main": "src/cli.js",
6
6
  "bin": {
@@ -385,7 +385,7 @@ async function cmdSync(argv) {
385
385
  if (!progress?.enabled) return;
386
386
  const pct = p.total > 0 ? p.index / p.total : 1;
387
387
  progress.update(
388
- `Parsing Kiro CLI ${renderBar(pct)} ${formatNumber(p.index)}/${formatNumber(p.total)} convs | buckets ${formatNumber(p.bucketsQueued)}`,
388
+ `Parsing Kiro CLI ${renderBar(pct)} ${formatNumber(p.index)}/${formatNumber(p.total)} sessions | buckets ${formatNumber(p.bucketsQueued)}`,
389
389
  );
390
390
  },
391
391
  });
@@ -16,7 +16,20 @@ const { normalizeState: normalizeUploadState } = require("./upload-throttle");
16
16
  const { probeOpenclawHookState } = require("./openclaw-hook");
17
17
  const { probeOpenclawSessionPluginState } = require("./openclaw-session-plugin");
18
18
  const { resolveTrackerPaths } = require("./tracker-paths");
19
- const { resolveKiroCliDbPath } = require("./rollout");
19
+ // TASK-011: Kiro CLI DB path inlined here to avoid pulling the ~4000-line
20
+ // rollout module on every `tokentracker status` / `diagnostics` call.
21
+ // rollout.js still exports resolveKiroCliDbPath for external callers.
22
+ function resolveKiroCliDbPathInline(env, home) {
23
+ if (env.KIRO_CLI_DB_PATH) return env.KIRO_CLI_DB_PATH;
24
+ const effectiveHome = env.HOME || home;
25
+ return path.join(
26
+ effectiveHome,
27
+ "Library",
28
+ "Application Support",
29
+ "kiro-cli",
30
+ "data.sqlite3",
31
+ );
32
+ }
20
33
 
21
34
  async function collectTrackerDiagnostics({
22
35
  home = os.homedir(),
@@ -98,7 +111,7 @@ async function collectTrackerDiagnostics({
98
111
  const kiroIdePresent =
99
112
  (await safeStatSize(path.join(kiroIdeDevDataDir, "devdata.sqlite"))) > 0 ||
100
113
  (await safeStatSize(path.join(kiroIdeDevDataDir, "tokens_generated.jsonl"))) > 0;
101
- const kiroCliDbPath = resolveKiroCliDbPath(process.env);
114
+ const kiroCliDbPath = resolveKiroCliDbPathInline(process.env, home);
102
115
  const kiroCliPresent = require("node:fs").existsSync(kiroCliDbPath);
103
116
 
104
117
  const lastSuccessAt = uploadThrottle.lastSuccessMs
@@ -557,7 +557,7 @@ function createLocalApiHandler({ queuePath }) {
557
557
  try {
558
558
  // Sticky semantics: never replace an existing on-disk session with an empty cookie map.
559
559
  if (relayCookies.size === 0) return;
560
-
560
+
561
561
  const json = JSON.stringify(Object.fromEntries(relayCookies));
562
562
  fs.writeFileSync(cookiePath, json, { encoding: "utf8", mode: 0o600 });
563
563
  } catch (e) {
@@ -565,6 +565,18 @@ function createLocalApiHandler({ queuePath }) {
565
565
  }
566
566
  }
567
567
 
568
+ function clearRelayCookies(reason) {
569
+ if (relayCookies.size === 0) return;
570
+ relayCookies.clear();
571
+ try {
572
+ if (fs.existsSync(cookiePath)) fs.unlinkSync(cookiePath);
573
+ } catch (e) {
574
+ console.error("[LocalAPI] Failed to clear relay cookies:", e.message);
575
+ return;
576
+ }
577
+ if (reason) console.warn(`[LocalAPI] Cleared relay cookies: ${reason}`);
578
+ }
579
+
568
580
  function captureSetCookies(headerValue) {
569
581
  if (!headerValue) return;
570
582
  const parts = headerValue.split(/,(?=\s*\w+=)/);
@@ -598,11 +610,17 @@ function createLocalApiHandler({ queuePath }) {
598
610
  if (changed) persistRelayCookies();
599
611
  }
600
612
 
613
+ function normalizeCookieHeader(value) {
614
+ if (Array.isArray(value)) return value.filter(Boolean).join("; ");
615
+ return typeof value === "string" ? value : "";
616
+ }
617
+
601
618
  function buildRelayCookieHeader(clientCookieHeader) {
602
- if (relayCookies.size === 0) return clientCookieHeader || "";
619
+ const normalizedClientCookieHeader = normalizeCookieHeader(clientCookieHeader);
620
+ if (relayCookies.size === 0) return normalizedClientCookieHeader;
603
621
  const clientPairs = new Map();
604
- if (clientCookieHeader) {
605
- for (const part of clientCookieHeader.split(";")) {
622
+ if (normalizedClientCookieHeader) {
623
+ for (const part of normalizedClientCookieHeader.split(";")) {
606
624
  const eqIdx = part.indexOf("=");
607
625
  if (eqIdx < 1) continue;
608
626
  const n = part.substring(0, eqIdx).trim();
@@ -672,8 +690,18 @@ function createLocalApiHandler({ queuePath }) {
672
690
  if (key === "host" || key === "connection") continue;
673
691
  proxyHeaders[key] = value;
674
692
  }
675
- // Inject relay cookies so WebView benefits from browser's login session
676
- const mergedCookie = buildRelayCookieHeader(proxyHeaders["cookie"]);
693
+ const hasClientCookie = normalizeCookieHeader(proxyHeaders["cookie"]).trim().length > 0;
694
+ const hasCsrfHeader = typeof proxyHeaders["x-csrf-token"] === "string" && proxyHeaders["x-csrf-token"].trim().length > 0;
695
+ const shouldInjectRelayCookies =
696
+ p !== "/api/auth/refresh" || hasClientCookie || hasCsrfHeader;
697
+
698
+ // Inject relay cookies so WebView benefits from browser's login session.
699
+ // Refresh requests need either a browser cookie or an explicit CSRF token;
700
+ // otherwise replaying a stale persisted refresh cookie just manufactures
701
+ // Invalid CSRF errors on startup.
702
+ const mergedCookie = shouldInjectRelayCookies
703
+ ? buildRelayCookieHeader(proxyHeaders["cookie"])
704
+ : normalizeCookieHeader(proxyHeaders["cookie"]);
677
705
  if (mergedCookie) proxyHeaders["cookie"] = mergedCookie;
678
706
 
679
707
  const bodyChunks = [];
@@ -697,8 +725,17 @@ function createLocalApiHandler({ queuePath }) {
697
725
  return [k, v];
698
726
  });
699
727
  res.writeHead(proxyRes.status, Object.fromEntries(responseHeaders));
700
- const resBody = await proxyRes.arrayBuffer();
701
- res.end(Buffer.from(resBody));
728
+ const resBody = Buffer.from(await proxyRes.arrayBuffer());
729
+ if (
730
+ p === "/api/auth/refresh"
731
+ && proxyRes.status === 403
732
+ && !hasClientCookie
733
+ && !hasCsrfHeader
734
+ && /invalid csrf token/i.test(resBody.toString("utf8"))
735
+ ) {
736
+ clearRelayCookies("stale refresh cookie without local CSRF context");
737
+ }
738
+ res.end(resBody);
702
739
  } catch (e) {
703
740
  json(res, { error: `Auth proxy error: ${e?.message || e}` }, 502);
704
741
  }
@@ -3085,20 +3085,31 @@ function resolveKiroCliDbPath(env = process.env) {
3085
3085
  return path.join(home, "Library", "Application Support", "kiro-cli", "data.sqlite3");
3086
3086
  }
3087
3087
 
3088
+ // Bug-4: canonical UUID shape — 8-4-4-4-12 hex groups. The looser
3089
+ // /^[0-9a-f-]{36}\.json$/ form accepted `36 hyphens`.json or 36 hex with
3090
+ // no hyphens. kiro-cli writes proper UUIDs; lock to the canonical shape.
3091
+ const KIRO_CLI_SESSION_FILE_RE =
3092
+ /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.json$/i;
3093
+
3088
3094
  // Lists ~/.kiro/sessions/cli/{uuid}.json files. Includes files whose sibling
3089
3095
  // .lock is present — we read those as tail-only snapshots so a running
3090
3096
  // session's completed turns still land in the queue on the next sync. The
3091
3097
  // .json files are rewritten atomically by kiro-cli on each turn flush, so
3092
3098
  // a stale read just means we'll pick up the rest next time.
3099
+ //
3100
+ // TASK-014: env.HOME is honored (symmetric with resolveKiroCliDbPath) so
3101
+ // callers can redirect to a tmp home for hermetic tests/CI.
3093
3102
  function resolveKiroCliSessionFiles(env = process.env) {
3094
- const home = require("node:os").homedir();
3103
+ const home = env.HOME || require("node:os").homedir();
3095
3104
  const kiroHome = env.KIRO_HOME || path.join(home, ".kiro");
3096
3105
  const sessionsDir = path.join(kiroHome, "sessions", "cli");
3097
3106
  if (!fssync.existsSync(sessionsDir)) return [];
3098
3107
  const files = [];
3099
3108
  try {
3100
3109
  for (const entry of fssync.readdirSync(sessionsDir)) {
3101
- if (!entry.endsWith(".json")) continue;
3110
+ // TASK-003: only canonical {uuid}.json files; backups, scratch,
3111
+ // typos are skipped so they don't feed JSON.parse garbage.
3112
+ if (!KIRO_CLI_SESSION_FILE_RE.test(entry)) continue;
3102
3113
  files.push(path.join(sessionsDir, entry));
3103
3114
  }
3104
3115
  } catch {
@@ -3126,63 +3137,73 @@ function resolveKiroCliSessionFiles(env = process.env) {
3126
3137
  // "claims" the buffered Prompt chars for its turn, and the buffer resets.
3127
3138
  // Later cycles within the same turn (Assistant → ToolResults → Assistant)
3128
3139
  // do not re-attribute.
3129
- function readKiroCliMessageChars(jsonlPath, turnMessageIds) {
3140
+ async function readKiroCliMessageChars(jsonlPath, turnMessageIds) {
3130
3141
  const result = {
3131
3142
  byMessage: new Map(),
3132
3143
  messageKind: new Map(),
3133
3144
  turnPromptChars: new Map(),
3134
3145
  };
3135
3146
  if (!jsonlPath || !fssync.existsSync(jsonlPath)) return result;
3136
- let raw;
3147
+ // TASK-005: stream via readline so multi-MB .jsonl files (heavy tool-use
3148
+ // sessions) don't block the sync event loop by buffering whole-file.
3149
+ let stream;
3137
3150
  try {
3138
- raw = fssync.readFileSync(jsonlPath, "utf8");
3151
+ stream = fssync.createReadStream(jsonlPath, { encoding: "utf8" });
3139
3152
  } catch {
3140
3153
  return result;
3141
3154
  }
3155
+ const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
3142
3156
  const midToTurn =
3143
3157
  turnMessageIds instanceof Map ? turnMessageIds : new Map();
3144
3158
  const attributedTurns = new Set();
3145
3159
  let pendingPromptChars = 0;
3146
- for (const line of raw.split("\n")) {
3147
- if (!line.trim()) continue;
3148
- let evt;
3149
- try {
3150
- evt = JSON.parse(line);
3151
- } catch {
3152
- continue;
3153
- }
3154
- const data = evt?.data;
3155
- if (!data || typeof data !== "object") continue;
3156
- const mid = data.message_id;
3157
- if (!mid) continue;
3158
- const content = Array.isArray(data.content) ? data.content : [];
3159
- let chars = 0;
3160
- for (const c of content) {
3161
- if (!c || typeof c !== "object") continue;
3162
- if (c.kind === "text" && typeof c.data === "string") {
3163
- chars += c.data.length;
3164
- } else if (c.kind === "toolUse" && c.data && typeof c.data === "object") {
3165
- // tool-use invocations count toward output; stringify the input payload
3166
- try {
3167
- chars += JSON.stringify(c.data.input || {}).length;
3168
- } catch {
3169
- // ignore
3160
+ // Bug-5: wrap the streamed iteration. Mid-read errors (file deleted or
3161
+ // truncated while kiro-cli is writing) would otherwise propagate up and
3162
+ // crash the whole sync pass. On error we return the partial result and
3163
+ // let the next sync re-read fresh.
3164
+ try {
3165
+ for await (const line of rl) {
3166
+ if (!line || !line.trim()) continue;
3167
+ let evt;
3168
+ try {
3169
+ evt = JSON.parse(line);
3170
+ } catch {
3171
+ continue;
3172
+ }
3173
+ const data = evt && evt.data;
3174
+ if (!data || typeof data !== "object") continue;
3175
+ const mid = data.message_id;
3176
+ if (!mid) continue;
3177
+ const content = Array.isArray(data.content) ? data.content : [];
3178
+ let chars = 0;
3179
+ for (const c of content) {
3180
+ if (!c || typeof c !== "object") continue;
3181
+ if (c.kind === "text" && typeof c.data === "string") {
3182
+ chars += c.data.length;
3183
+ } else if (c.kind === "toolUse" && c.data && typeof c.data === "object") {
3184
+ try {
3185
+ chars += JSON.stringify(c.data.input || {}).length;
3186
+ } catch {
3187
+ // ignore
3188
+ }
3170
3189
  }
3171
3190
  }
3172
- }
3173
- result.byMessage.set(mid, (result.byMessage.get(mid) || 0) + chars);
3174
- if (!result.messageKind.has(mid)) result.messageKind.set(mid, evt.kind);
3175
-
3176
- if (evt.kind === "Prompt") {
3177
- pendingPromptChars += chars;
3178
- } else if (evt.kind === "AssistantMessage" && midToTurn.has(mid)) {
3179
- const turnIdx = midToTurn.get(mid);
3180
- if (!attributedTurns.has(turnIdx)) {
3181
- result.turnPromptChars.set(turnIdx, pendingPromptChars);
3182
- attributedTurns.add(turnIdx);
3183
- pendingPromptChars = 0;
3191
+ result.byMessage.set(mid, (result.byMessage.get(mid) || 0) + chars);
3192
+ if (!result.messageKind.has(mid)) result.messageKind.set(mid, evt.kind);
3193
+
3194
+ if (evt.kind === "Prompt") {
3195
+ pendingPromptChars += chars;
3196
+ } else if (evt.kind === "AssistantMessage" && midToTurn.has(mid)) {
3197
+ const turnIdx = midToTurn.get(mid);
3198
+ if (!attributedTurns.has(turnIdx)) {
3199
+ result.turnPromptChars.set(turnIdx, pendingPromptChars);
3200
+ attributedTurns.add(turnIdx);
3201
+ pendingPromptChars = 0;
3202
+ }
3184
3203
  }
3185
3204
  }
3205
+ } catch {
3206
+ // partial data — return what we have.
3186
3207
  }
3187
3208
  return result;
3188
3209
  }
@@ -3192,7 +3213,7 @@ function readKiroCliMessageChars(jsonlPath, turnMessageIds) {
3192
3213
  // input_tokens, output_tokens }]. We use the same request_id dedup slot as
3193
3214
  // the SQLite path so mutations (turn rewritten on next flush) go through
3194
3215
  // the subtract-old/add-new path in parseKiroCliIncremental.
3195
- function readKiroCliSessionTurns(jsonPath) {
3216
+ async function readKiroCliSessionTurns(jsonPath) {
3196
3217
  if (!jsonPath || !fssync.existsSync(jsonPath)) return [];
3197
3218
  let parsed;
3198
3219
  try {
@@ -3229,14 +3250,19 @@ function readKiroCliSessionTurns(jsonPath) {
3229
3250
 
3230
3251
  // Load sibling .jsonl for char-count fallback.
3231
3252
  const jsonlPath = jsonPath.replace(/\.json$/, ".jsonl");
3232
- const charMap = readKiroCliMessageChars(jsonlPath, turnMessageIds);
3253
+ const charMap = await readKiroCliMessageChars(jsonlPath, turnMessageIds);
3233
3254
 
3234
3255
  const flat = [];
3235
3256
  for (let turnIdx = 0; turnIdx < turns.length; turnIdx++) {
3236
3257
  const turn = turns[turnIdx];
3237
3258
  if (!turn || typeof turn !== "object") continue;
3259
+ // TASK-001: preserve the integer 0. `|| null` would coerce a valid
3260
+ // loop_id.rand=0 into a message_id fallback, splitting the dedup
3261
+ // namespace across runs that see 0 vs runs that don't.
3238
3262
  const loopRand =
3239
- (turn.loop_id && (turn.loop_id.rand ?? turn.loop_id.seed)) || null;
3263
+ turn.loop_id && typeof turn.loop_id === "object"
3264
+ ? turn.loop_id.rand ?? turn.loop_id.seed ?? null
3265
+ : null;
3240
3266
  const messageIds = Array.isArray(turn.message_ids) ? turn.message_ids : [];
3241
3267
  const requestId = loopRand != null ? `${sessionId}:${loopRand}` : (messageIds[0] || null);
3242
3268
  if (!requestId) continue;
@@ -3261,17 +3287,42 @@ function readKiroCliSessionTurns(jsonPath) {
3261
3287
  outputTokens = Math.floor(assistantChars / KIRO_CLI_CHARS_PER_TOKEN);
3262
3288
  }
3263
3289
 
3264
- // Timestamp: end_timestamp is an ISO string; coerce to ms.
3265
- const tsRaw = turn.end_timestamp || turn.start_timestamp;
3266
- const tsMs = tsRaw ? Date.parse(tsRaw) : NaN;
3290
+ // TASK-006: timestamp precedence matches SQLite's
3291
+ // request_start_timestamp_ms so a turn that migrates SQLite ↔
3292
+ // session-file buckets identically across tiers (previously a turn
3293
+ // straddling a half-hour boundary bucketed differently per source
3294
+ // because session files use end_timestamp while SQLite uses start).
3295
+ // 1. turn.request_start_timestamp_ms (numeric ms, SQLite shape)
3296
+ // 2. turn.start_timestamp (ISO string)
3297
+ // 3. turn.end_timestamp (ISO string, legacy fallback)
3298
+ let tsMs = NaN;
3299
+ if (Number.isFinite(Number(turn.request_start_timestamp_ms))) {
3300
+ tsMs = Number(turn.request_start_timestamp_ms);
3301
+ } else if (turn.start_timestamp) {
3302
+ tsMs = Date.parse(turn.start_timestamp);
3303
+ } else if (turn.end_timestamp) {
3304
+ tsMs = Date.parse(turn.end_timestamp);
3305
+ }
3267
3306
  if (!Number.isFinite(tsMs) || tsMs <= 0) continue;
3268
3307
 
3269
3308
  flat.push({
3270
3309
  request_id: requestId,
3271
3310
  session_model_id: sessionModelId,
3272
3311
  message_id: messageIds[0] || null,
3312
+ // Turn-granular migration match: surface the full list so the
3313
+ // cross-source retraction in parseKiroCliIncremental can drop this
3314
+ // specific turn iff any of its assistant/tool_result message_ids
3315
+ // appears in SQLite. Session-level matching over-retracts newer
3316
+ // turns in an active session whose older turns have already
3317
+ // flushed to SQLite.
3318
+ all_message_ids: messageIds.slice(),
3273
3319
  model_id: turn.model_id || sessionModelId,
3274
3320
  request_start_timestamp_ms: tsMs,
3321
+ // D-1 / Bug-2: tag with session_id so the retraction pass can match
3322
+ // session-origin entries even when the requestId format has no
3323
+ // colon (no-loop_id fallback uses a bare message_id UUID that would
3324
+ // otherwise be indistinguishable from SQLite's UUID keys).
3325
+ session_id: sessionId,
3275
3326
  // For the parser, we feed the ALREADY-approximated tokens directly via
3276
3327
  // a special sentinel field. The parser will divide chars by
3277
3328
  // KIRO_CLI_CHARS_PER_TOKEN; bypass that by pre-multiplying here.
@@ -3319,7 +3370,14 @@ function canonicalizeKiroCliModelId(raw) {
3319
3370
 
3320
3371
  // Read Kiro CLI requests using SQL-side json_extract so we don't pull the
3321
3372
  // full (93 MB-ish) conversations_v2 blob back through sqlite3 -json.
3322
- function readKiroCliRequests(dbPath) {
3373
+ //
3374
+ // D-1: also surfaces `user_turn_metadata.continuation_id` so the cross-
3375
+ // source retraction pass (parseKiroCliIncremental) can match whichever
3376
+ // UUID kiro-cli used as the session link. The SQL column
3377
+ // `conversation_id` and the inner JSON `continuation_id` are different
3378
+ // UUIDs on observed data; covering both means retraction fires whichever
3379
+ // side matches the live session's `session_id`.
3380
+ function readKiroCliRequests(dbPath, env = process.env) {
3323
3381
  if (!dbPath || !fssync.existsSync(dbPath)) return [];
3324
3382
  let raw;
3325
3383
  try {
@@ -3330,13 +3388,23 @@ function readKiroCliRequests(dbPath) {
3330
3388
  dbPath,
3331
3389
  "SELECT conversation_id, " +
3332
3390
  "json_extract(value, '$.model_info.model_id') AS session_model_id, " +
3391
+ "json_extract(value, '$.user_turn_metadata.continuation_id') AS continuation_id, " +
3333
3392
  "json_extract(value, '$.user_turn_metadata.requests') AS requests_json " +
3334
3393
  "FROM conversations_v2 " +
3335
3394
  "WHERE json_extract(value, '$.user_turn_metadata.requests') IS NOT NULL",
3336
3395
  ],
3337
3396
  { encoding: "utf8", maxBuffer: 128 * 1024 * 1024, timeout: 120_000 },
3338
3397
  );
3339
- } catch {
3398
+ } catch (err) {
3399
+ // TASK-012 / D-8: debug-gated stderr log so a missing sqlite3 binary
3400
+ // is distinguishable from an empty DB. Silent by default. env is
3401
+ // threaded so tests can toggle debug hermetically.
3402
+ const dbg = String((env && env.TOKENTRACKER_DEBUG) || "").toLowerCase();
3403
+ if (dbg === "1" || dbg === "true") {
3404
+ process.stderr.write(
3405
+ `[kiro-cli] sqlite3 read failed: ${err?.message || err}\n`,
3406
+ );
3407
+ }
3340
3408
  return [];
3341
3409
  }
3342
3410
  if (!raw || !raw.trim()) return [];
@@ -3360,6 +3428,7 @@ function readKiroCliRequests(dbPath) {
3360
3428
  if (!r || typeof r !== "object") continue;
3361
3429
  flat.push({
3362
3430
  conversation_id: row.conversation_id,
3431
+ continuation_id: row.continuation_id || null,
3363
3432
  session_model_id: row.session_model_id || null,
3364
3433
  request_id: r.request_id || null,
3365
3434
  message_id: r.message_id || null,
@@ -3400,18 +3469,22 @@ async function parseKiroCliIncremental({ sessionFiles, cursors, queuePath, onPro
3400
3469
  // Combine two sources under the same (source='kiro', cursors.kiroCli)
3401
3470
  // namespace: historical rows from the SQLite DB plus live session state
3402
3471
  // from ~/.kiro/sessions/cli/{uuid}.json (covers turns from a running
3403
- // session that hasn't flushed to SQLite yet). Request IDs are disjoint
3404
- // (SQLite uses request_id UUID; sessions use {sessionId}:{loop_id.rand}),
3405
- // so no cross-source dedup is needed.
3406
- const flatDb = fssync.existsSync(dbPath) ? readKiroCliRequests(dbPath) : [];
3472
+ // session that hasn't flushed to SQLite yet). Request ID shapes differ:
3473
+ // SQLite carries a persisted request_id UUID; session files synthesize
3474
+ // `${sessionId}:${loop_id.rand}`. When kiro-cli migrates a live session
3475
+ // into SQLite the same turn lands under a new request_id — the cross-
3476
+ // source retraction pass below (D-1 + TASK-007) matches session_id ↔
3477
+ // SQLite conversation_id OR continuation_id to subtract the orphan
3478
+ // session-file cursor entry before the new SQLite row is processed.
3479
+ const flatDb = fssync.existsSync(dbPath)
3480
+ ? readKiroCliRequests(dbPath, resolvedEnv)
3481
+ : [];
3407
3482
  const sessionFilesList = resolveKiroCliSessionFiles(resolvedEnv);
3408
- const flatSessions = [];
3483
+ let flatSessions = [];
3409
3484
  for (const jsonPath of sessionFilesList) {
3410
- for (const turn of readKiroCliSessionTurns(jsonPath)) {
3411
- flatSessions.push(turn);
3412
- }
3485
+ const turns = await readKiroCliSessionTurns(jsonPath);
3486
+ for (const turn of turns) flatSessions.push(turn);
3413
3487
  }
3414
- const flat = flatDb.concat(flatSessions);
3415
3488
  // Per-request state replaces the old seenIds set. Each entry captures
3416
3489
  // what we contributed for that request_id last time, so a later mutation
3417
3490
  // (same request_id, different fingerprint) can subtract-old/add-new
@@ -3421,17 +3494,146 @@ async function parseKiroCliIncremental({ sessionFiles, cursors, queuePath, onPro
3421
3494
  ? { ...kiroCliState.requests }
3422
3495
  : {};
3423
3496
 
3424
- if (flat.length === 0) {
3425
- cursors.kiroCli = {
3426
- ...kiroCliState,
3427
- requests: requestState,
3428
- updatedAt: new Date().toISOString(),
3429
- };
3430
- return { recordsProcessed: 0, eventsAggregated: 0, bucketsQueued: 0 };
3431
- }
3432
-
3433
3497
  const hourlyState = normalizeHourlyState(cursors?.hourly);
3434
3498
  const touchedBuckets = new Set();
3499
+ const debugEnabled = ["1", "true"].includes(
3500
+ String(resolvedEnv.TOKENTRACKER_DEBUG || "").toLowerCase(),
3501
+ );
3502
+
3503
+ // ── TASK-007 + D-1: cross-source retraction. When a conversation has
3504
+ // migrated from the session-file tier into SQLite, the cursor's
3505
+ // prior session-file entry (keyed `${sessionId}:${loopRand}` OR a
3506
+ // bare message_id UUID when loop_id is absent) never matches the
3507
+ // new SQLite request_id. Without retraction the old contribution
3508
+ // stays in the bucket absolute and the new SQLite row is added on
3509
+ // top — permanent double-count. D-6: typed non-empty check so a
3510
+ // corrupt NULL/empty conv_id can't poison the match set.
3511
+ //
3512
+ // Two match sets are built:
3513
+ // • migratedConvIds — session_id → any row in SQLite. Used to scope
3514
+ // cursor retraction (coarse but safe because
3515
+ // un-migrated turns still present in the session
3516
+ // file are re-added later in this same run).
3517
+ // • migratedMsgIds — r.message_id → exact turn in SQLite. Used to
3518
+ // filter flatSessions at TURN granularity. An
3519
+ // active session with older migrated turns +
3520
+ // newer session-file-only turns must keep the
3521
+ // newer turns; session-level filtering dropped
3522
+ // them and caused Kiro CLI under-count.
3523
+ const migratedConvIds = new Set();
3524
+ const migratedMsgIds = new Set();
3525
+ for (const row of flatDb) {
3526
+ if (!row) continue;
3527
+ if (typeof row.conversation_id === "string" && row.conversation_id)
3528
+ migratedConvIds.add(row.conversation_id);
3529
+ if (typeof row.continuation_id === "string" && row.continuation_id)
3530
+ migratedConvIds.add(row.continuation_id);
3531
+ if (typeof row.message_id === "string" && row.message_id)
3532
+ migratedMsgIds.add(row.message_id);
3533
+ }
3534
+ if (migratedConvIds.size > 0) {
3535
+ // Pre-collect to retract so mutation during iteration is safe.
3536
+ // Retraction stays session-level: for every cursor entry whose
3537
+ // session_id has any row in SQLite, subtract its prior contribution.
3538
+ // This is provably correct because turns still live in the session
3539
+ // file get re-added in this same run via the (turn-granular) filter
3540
+ // below, producing a net delta of zero for un-migrated turns.
3541
+ const toRetract = [];
3542
+ for (const [reqId, prev] of Object.entries(requestState)) {
3543
+ if (!prev || typeof prev !== "object") continue;
3544
+ // Bug-2: prefer the stored session_id tag (new schema); fall back
3545
+ // to colon-split for legacy cursors pre-dating this change.
3546
+ let sid = null;
3547
+ if (typeof prev.session_id === "string" && prev.session_id) {
3548
+ sid = prev.session_id;
3549
+ } else {
3550
+ const colon = reqId.indexOf(":");
3551
+ if (colon > 0) sid = reqId.slice(0, colon);
3552
+ }
3553
+ if (!sid || !migratedConvIds.has(sid)) continue;
3554
+ toRetract.push([reqId, prev, sid]);
3555
+ }
3556
+ for (const [reqId, prev, sid] of toRetract) {
3557
+ if (prev.input_tokens || prev.output_tokens) {
3558
+ const prevBucket = getHourlyBucket(
3559
+ hourlyState,
3560
+ "kiro",
3561
+ prev.model,
3562
+ prev.bucketStart,
3563
+ );
3564
+ addTotals(prevBucket.totals, {
3565
+ input_tokens: -prev.input_tokens,
3566
+ cached_input_tokens: 0,
3567
+ cache_creation_input_tokens: 0,
3568
+ output_tokens: -prev.output_tokens,
3569
+ reasoning_output_tokens: 0,
3570
+ total_tokens: -(prev.input_tokens + prev.output_tokens),
3571
+ conversation_count: -1,
3572
+ });
3573
+ touchedBuckets.add(bucketKey("kiro", prev.model, prev.bucketStart));
3574
+ }
3575
+ delete requestState[reqId];
3576
+ if (debugEnabled) {
3577
+ process.stderr.write(
3578
+ `[kiro-cli] retracted migrated session entry (conv ${sid})\n`,
3579
+ );
3580
+ }
3581
+ }
3582
+ // Turn-granular filter: drop a session-file turn only when at least
3583
+ // one of its assistant/tool_result message_ids is present in SQLite
3584
+ // (i.e. this specific turn has been flushed). Newer turns in the
3585
+ // same session that haven't yet landed in SQLite survive.
3586
+ //
3587
+ // Edge: a turn with no message_ids at all cannot be matched. We keep
3588
+ // it — preferring a rare potential double-count (narrow, since such
3589
+ // a turn would also have no request_id under the no-loop_id path and
3590
+ // be discarded upstream) over the reported regression of dropping
3591
+ // legitimate newer turns wholesale. D-14: still O(N) single-pass.
3592
+ const before = flatSessions.length;
3593
+ flatSessions = flatSessions.filter((s) => {
3594
+ if (!s) return false;
3595
+ const mids = Array.isArray(s.all_message_ids)
3596
+ ? s.all_message_ids
3597
+ : s.message_id
3598
+ ? [s.message_id]
3599
+ : [];
3600
+ for (const mid of mids) {
3601
+ if (typeof mid === "string" && mid && migratedMsgIds.has(mid)) {
3602
+ return false;
3603
+ }
3604
+ }
3605
+ return true;
3606
+ });
3607
+ if (debugEnabled && flatSessions.length !== before) {
3608
+ process.stderr.write(
3609
+ `[kiro-cli] dropped ${before - flatSessions.length} migrated session-file turn(s)\n`,
3610
+ );
3611
+ }
3612
+ }
3613
+
3614
+ const flat = flatDb.concat(flatSessions);
3615
+
3616
+ if (flat.length === 0) {
3617
+ // Bug-1: retraction may have touched buckets even with empty flat.
3618
+ // Clamp + cap BEFORE flushing so the early-return path applies the
3619
+ // same guarantees as the main path (fixes a skip that flushed
3620
+ // negative conversation_counts and left the cap unapplied).
3621
+ const cappedEarly = clampAndCapKiroCliState({
3622
+ requestState,
3623
+ hourlyState,
3624
+ touchedBuckets,
3625
+ });
3626
+ const bucketsQueued = await enqueueTouchedBuckets({
3627
+ queuePath,
3628
+ hourlyState,
3629
+ touchedBuckets,
3630
+ });
3631
+ const updatedAt = new Date().toISOString();
3632
+ hourlyState.updatedAt = updatedAt;
3633
+ cursors.hourly = hourlyState;
3634
+ cursors.kiroCli = { ...kiroCliState, requests: cappedEarly, updatedAt };
3635
+ return { recordsProcessed: 0, eventsAggregated: 0, bucketsQueued };
3636
+ }
3435
3637
  const cb = typeof onProgress === "function" ? onProgress : null;
3436
3638
  let recordsProcessed = 0;
3437
3639
  let eventsAggregated = 0;
@@ -3497,12 +3699,17 @@ async function parseKiroCliIncremental({ sessionFiles, cursors, queuePath, onPro
3497
3699
 
3498
3700
  // Always record the cursor entry (even for zero-token requests) so we
3499
3701
  // don't re-count later if Kiro rewrites this request with real data.
3702
+ // Bug-2: tag session-origin entries with session_id so the retraction
3703
+ // pass can identify them regardless of request_id format (the
3704
+ // no-loop_id fallback produces a bare UUID with no colon, which would
3705
+ // otherwise be indistinguishable from SQLite's UUID keys).
3500
3706
  requestState[requestId] = {
3501
3707
  fingerprint,
3502
3708
  bucketStart,
3503
3709
  model,
3504
3710
  input_tokens: approxInput,
3505
3711
  output_tokens: approxOutput,
3712
+ ...(r.session_id ? { session_id: r.session_id } : {}),
3506
3713
  };
3507
3714
 
3508
3715
  if (cb && i % 50 === 0) {
@@ -3516,15 +3723,59 @@ async function parseKiroCliIncremental({ sessionFiles, cursors, queuePath, onPro
3516
3723
  }
3517
3724
  }
3518
3725
 
3726
+ const cappedState = clampAndCapKiroCliState({
3727
+ requestState,
3728
+ hourlyState,
3729
+ touchedBuckets,
3730
+ });
3731
+
3519
3732
  const bucketsQueued = await enqueueTouchedBuckets({ queuePath, hourlyState, touchedBuckets });
3520
3733
  const updatedAt = new Date().toISOString();
3521
3734
  hourlyState.updatedAt = updatedAt;
3522
3735
  cursors.hourly = hourlyState;
3523
- cursors.kiroCli = { ...kiroCliState, requests: requestState, updatedAt };
3736
+ cursors.kiroCli = { ...kiroCliState, requests: cappedState, updatedAt };
3524
3737
 
3525
3738
  return { recordsProcessed, eventsAggregated, bucketsQueued };
3526
3739
  }
3527
3740
 
3741
+ // TASK-004 + TASK-010 + Bug-1: shared end-of-run clamp + cap for
3742
+ // parseKiroCliIncremental. Centralized so the main path AND the
3743
+ // retraction-only early-return path both apply the same guarantees.
3744
+ // Mutates hourlyState bucket totals in place (clamp) and returns a new
3745
+ // capped requestState object (cap).
3746
+ const KIRO_CLI_CURSOR_MAX_AGE_MS = 90 * 24 * 60 * 60 * 1000;
3747
+ const KIRO_CLI_CURSOR_MAX_ENTRIES = 20_000;
3748
+
3749
+ function clampAndCapKiroCliState({ requestState, hourlyState, touchedBuckets }) {
3750
+ // TASK-010: clamp conversation_count to >= 0 on Kiro-touched buckets
3751
+ // only. The shared enqueueTouchedBuckets is left untouched so
3752
+ // legitimate negatives from the 10 other parsers are not masked. Kiro
3753
+ // negatives come from the subtract-old pass on mutation or retraction.
3754
+ for (const key of touchedBuckets) {
3755
+ const bucket = hourlyState.buckets && hourlyState.buckets[key];
3756
+ if (bucket && bucket.totals && bucket.totals.conversation_count < 0) {
3757
+ bucket.totals.conversation_count = 0;
3758
+ }
3759
+ }
3760
+ // TASK-004: cap cursors.kiroCli.requests by age + count. Runs LAST so
3761
+ // nothing active or just-retracted is pruned mid-flight.
3762
+ const ageCutoffMs = Date.now() - KIRO_CLI_CURSOR_MAX_AGE_MS;
3763
+ const cappedEntries = [];
3764
+ for (const [reqId, entry] of Object.entries(requestState)) {
3765
+ if (!entry || typeof entry !== "object") continue;
3766
+ const ts = entry.bucketStart ? Date.parse(entry.bucketStart) : NaN;
3767
+ if (!Number.isFinite(ts) || ts < ageCutoffMs) continue;
3768
+ cappedEntries.push([reqId, entry, ts]);
3769
+ }
3770
+ if (cappedEntries.length > KIRO_CLI_CURSOR_MAX_ENTRIES) {
3771
+ cappedEntries.sort((a, b) => b[2] - a[2]); // newest first
3772
+ cappedEntries.length = KIRO_CLI_CURSOR_MAX_ENTRIES;
3773
+ }
3774
+ const capped = {};
3775
+ for (const [reqId, entry] of cappedEntries) capped[reqId] = entry;
3776
+ return capped;
3777
+ }
3778
+
3528
3779
  // Back-compat path: per-session .json files (the old fixture shape). Emits
3529
3780
  // exact tokens if the fixture happens to carry them (which the test fixture
3530
3781
  // does). Used only by the test/rollout-parser.test.js fixture tests.