costhawk 1.5.12 → 1.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.d.ts +1 -1
- package/dist/build-info.js +1 -1
- package/dist/cursor-parser.d.ts +128 -20
- package/dist/cursor-parser.d.ts.map +1 -1
- package/dist/cursor-parser.js +576 -43
- package/dist/cursor-parser.js.map +1 -1
- package/dist/index.js +66 -15
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cursor-parser.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Cursor Local SQLite Parser
|
|
2
|
+
* Cursor Local SQLite Parser
|
|
3
3
|
*
|
|
4
4
|
* Parses Cursor IDE chat history from the local SQLite database to extract
|
|
5
|
-
* token usage
|
|
5
|
+
* token usage and timestamps. Read-only. Does not push to any backend.
|
|
6
6
|
*
|
|
7
7
|
* Storage:
|
|
8
8
|
* macOS: ~/Library/Application Support/Cursor/User/globalStorage/state.vscdb
|
|
@@ -18,21 +18,93 @@
|
|
|
18
18
|
* on bubble rows. Model name at $.modelInfo.modelName. Server-side dedup id
|
|
19
19
|
* at $.serverBubbleId.
|
|
20
20
|
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
21
|
+
* Timestamps (verified in Task #30 against a real state.vscdb):
|
|
22
|
+
* - $.createdAt on bubbles is an ISO 8601 string (~56% coverage, all-or-
|
|
23
|
+
* nothing per composer — likely added in a newer Cursor version).
|
|
24
|
+
* - $.createdAt on composerData rows is a Unix milliseconds number (100%
|
|
25
|
+
* coverage). Same field name, different type — parser handles both.
|
|
26
|
+
* - $.lastUpdatedAt on composerData rows is Unix ms (~13% coverage).
|
|
27
|
+
* - $.timingInfo.client* on bubbles is performance.now()-style relative
|
|
28
|
+
* (seconds since Cursor process start), NOT absolute — never use it as
|
|
29
|
+
* a wall-clock timestamp.
|
|
24
30
|
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
31
|
+
* Fallback ladder for per-session timestamps: prefer min/max of bubble
|
|
32
|
+
* createdAt when present, otherwise use composerData.createdAt with optional
|
|
33
|
+
* composerData.lastUpdatedAt as end time. Every session gets non-null
|
|
34
|
+
* timestamps; the `timestampSource` / `timestampQuality` fields surface
|
|
35
|
+
* whether the values are precise or approximate.
|
|
36
|
+
*
|
|
37
|
+
* Workspace metadata fields (workspaceHash/workspaceName) remain unverified
|
|
38
|
+
* and return null. composerData.name is a candidate for workspaceName but
|
|
39
|
+
* has not been confirmed yet.
|
|
27
40
|
*/
|
|
28
41
|
import { execFileSync } from "child_process";
|
|
29
|
-
import { existsSync } from "fs";
|
|
42
|
+
import { existsSync, statSync } from "fs";
|
|
30
43
|
import { homedir, platform } from "os";
|
|
31
44
|
import { join } from "path";
|
|
32
45
|
// Defaults — overridable via env vars
|
|
33
46
|
const DEFAULT_SQLITE3_PATH = "/usr/bin/sqlite3";
|
|
34
47
|
const SQLITE_TIMEOUT_MS = 10_000;
|
|
35
48
|
const SQLITE_MAX_BUFFER_BYTES = 32 * 1024 * 1024;
|
|
49
|
+
// Sanity-check range for parsed Unix-ms timestamps. We reject anything before
|
|
50
|
+
// 2020 or at/after 2100 as "not plausibly a Cursor message timestamp" — this
|
|
51
|
+
// catches both `performance.now`-style relative values (which look like
|
|
52
|
+
// fractional seconds) and negative / NaN parse results from malformed data.
|
|
53
|
+
const MIN_UNIX_MS = Date.UTC(2020, 0, 1);
|
|
54
|
+
const MAX_UNIX_MS = Date.UTC(2100, 0, 1);
|
|
55
|
+
// Self-test invariant: bubbles sometimes persist a few minutes earlier than
|
|
56
|
+
// the composerData row due to clock skew or write-order races. Tolerate 5
|
|
57
|
+
// minutes before raising a warning.
|
|
58
|
+
const INVARIANT_SKEW_TOLERANCE_MS = 5 * 60 * 1000;
|
|
59
|
+
/**
|
|
60
|
+
* Parse an ISO 8601 string into Unix ms, or null if the input is not a
|
|
61
|
+
* string, not parseable, or outside the sane range. Callers should treat
|
|
62
|
+
* null as "no usable timestamp here" and fall through to the next source.
|
|
63
|
+
*/
|
|
64
|
+
function parseIsoToMs(value) {
|
|
65
|
+
if (typeof value !== "string")
|
|
66
|
+
return null;
|
|
67
|
+
const ms = Date.parse(value);
|
|
68
|
+
if (!Number.isFinite(ms) || ms < MIN_UNIX_MS || ms >= MAX_UNIX_MS) {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
return ms;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Parse a number that is supposed to be Unix ms, rejecting values outside
|
|
75
|
+
* the sane range. This specifically catches `timingInfo.clientStartTime`
|
|
76
|
+
* (which is `performance.now()` seconds since process start and lands
|
|
77
|
+
* far below MIN_UNIX_MS).
|
|
78
|
+
*/
|
|
79
|
+
function parseUnixMsLoose(value) {
|
|
80
|
+
if (typeof value !== "number" || !Number.isFinite(value))
|
|
81
|
+
return null;
|
|
82
|
+
if (value < MIN_UNIX_MS || value >= MAX_UNIX_MS)
|
|
83
|
+
return null;
|
|
84
|
+
return Math.floor(value);
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Accept either shape for a `createdAt`-style field. composerData.createdAt
|
|
88
|
+
* is a number; bubble.createdAt is an ISO string. We try both without
|
|
89
|
+
* signaling which one matched — the caller does not need to know.
|
|
90
|
+
*/
|
|
91
|
+
function parseTimestampField(value) {
|
|
92
|
+
return parseUnixMsLoose(value) ?? parseIsoToMs(value);
|
|
93
|
+
}
|
|
94
|
+
function msToIso(ms) {
|
|
95
|
+
return new Date(ms).toISOString();
|
|
96
|
+
}
|
|
97
|
+
function msToUtcDateKey(ms) {
|
|
98
|
+
return new Date(ms).toISOString().split("T")[0];
|
|
99
|
+
}
|
|
100
|
+
function createEmptyTokenUsage() {
|
|
101
|
+
return {
|
|
102
|
+
inputTokens: 0,
|
|
103
|
+
outputTokens: 0,
|
|
104
|
+
cacheCreationTokens: 0,
|
|
105
|
+
cacheReadTokens: 0,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
36
108
|
/**
|
|
37
109
|
* Get the default Cursor SQLite path for the current platform, honoring
|
|
38
110
|
* the COSTHAWK_CURSOR_DB_PATH environment override.
|
|
@@ -162,15 +234,20 @@ function hasTokenUsage(bubble) {
|
|
|
162
234
|
return bubble.inputTokens > 0 || bubble.outputTokens > 0;
|
|
163
235
|
}
|
|
164
236
|
const BUBBLE_KEY_REGEX = /^bubbleId:([^:]+):(.+)$/;
|
|
237
|
+
const COMPOSER_KEY_REGEX = /^composerData:(.+)$/;
|
|
165
238
|
/**
|
|
166
239
|
* Parse a single bubbleId row into structured BubbleData.
|
|
167
240
|
*
|
|
168
241
|
* Returns null if the row key is malformed, the value is not parseable JSON,
|
|
169
|
-
* or the row contains neither a non-empty model name
|
|
170
|
-
*
|
|
171
|
-
*
|
|
172
|
-
*
|
|
173
|
-
*
|
|
242
|
+
* or the row contains neither a non-empty model name, positive token counts,
|
|
243
|
+
* nor a parseable timestamp. Cursor can store model metadata, token usage,
|
|
244
|
+
* and timestamps on different rows, so the parser accepts any usable signal
|
|
245
|
+
* in isolation and lets the per-composer aggregation merge them.
|
|
246
|
+
*
|
|
247
|
+
* Timestamp handling: `createdAt` is accepted as either an ISO 8601 string
|
|
248
|
+
* (standard shape on bubble rows) or a Unix-ms number (defensive fallback).
|
|
249
|
+
* Rows with only a timestamp and no tokens/model still return BubbleData
|
|
250
|
+
* so the timestamp contributes to per-composer start/end resolution.
|
|
174
251
|
*/
|
|
175
252
|
function parseBubble(row) {
|
|
176
253
|
const match = BUBBLE_KEY_REGEX.exec(row.key);
|
|
@@ -205,10 +282,11 @@ function parseBubble(row) {
|
|
|
205
282
|
modelName = mi.modelName;
|
|
206
283
|
}
|
|
207
284
|
}
|
|
208
|
-
|
|
209
|
-
//
|
|
210
|
-
//
|
|
211
|
-
|
|
285
|
+
const createdAtMs = parseTimestampField(obj.createdAt);
|
|
286
|
+
// Skip rows with no usable signal at all — neither model metadata,
|
|
287
|
+
// positive token counts, nor a parseable timestamp. These are typically
|
|
288
|
+
// system messages, empty bubbles, or tool-call bookkeeping rows.
|
|
289
|
+
if (!modelName && inputTokens === 0 && outputTokens === 0 && createdAtMs === null) {
|
|
212
290
|
return null;
|
|
213
291
|
}
|
|
214
292
|
let serverBubbleId;
|
|
@@ -223,15 +301,126 @@ function parseBubble(row) {
|
|
|
223
301
|
modelName,
|
|
224
302
|
inputTokens,
|
|
225
303
|
outputTokens,
|
|
304
|
+
createdAtMs,
|
|
226
305
|
};
|
|
227
306
|
}
|
|
228
307
|
/**
|
|
229
|
-
* Parse
|
|
230
|
-
* anything to the costcanary backend.
|
|
308
|
+
* Parse a composerData row into ComposerMetadata.
|
|
231
309
|
*
|
|
232
|
-
* Returns
|
|
233
|
-
*
|
|
234
|
-
*
|
|
310
|
+
* Returns null for malformed keys, unparseable JSON, or rows with no
|
|
311
|
+
* usable timestamp fields. composerData.createdAt in real Cursor data
|
|
312
|
+
* is a Unix-ms number, but the parser accepts either shape defensively.
|
|
313
|
+
*
|
|
314
|
+
* If lastUpdatedAt is earlier than createdAt (clock skew, data corruption),
|
|
315
|
+
* lastUpdatedAt is dropped rather than trusted, so downstream aggregation
|
|
316
|
+
* never produces endTime < startTime.
|
|
317
|
+
*/
|
|
318
|
+
function parseComposerData(row) {
|
|
319
|
+
const match = COMPOSER_KEY_REGEX.exec(row.key);
|
|
320
|
+
if (!match) {
|
|
321
|
+
return null;
|
|
322
|
+
}
|
|
323
|
+
const [, composerId] = match;
|
|
324
|
+
let value;
|
|
325
|
+
try {
|
|
326
|
+
value = JSON.parse(row.value);
|
|
327
|
+
}
|
|
328
|
+
catch {
|
|
329
|
+
return null;
|
|
330
|
+
}
|
|
331
|
+
if (typeof value !== "object" || value === null) {
|
|
332
|
+
return null;
|
|
333
|
+
}
|
|
334
|
+
const obj = value;
|
|
335
|
+
const createdAtMs = parseTimestampField(obj.createdAt);
|
|
336
|
+
const rawLastUpdatedAtMs = parseTimestampField(obj.lastUpdatedAt);
|
|
337
|
+
// Drop lastUpdatedAt if it violates the ordering invariant. We never want
|
|
338
|
+
// to produce a session where endTime < startTime because the source
|
|
339
|
+
// values were corrupt.
|
|
340
|
+
const lastUpdatedAtMs = createdAtMs !== null &&
|
|
341
|
+
rawLastUpdatedAtMs !== null &&
|
|
342
|
+
rawLastUpdatedAtMs < createdAtMs
|
|
343
|
+
? null
|
|
344
|
+
: rawLastUpdatedAtMs;
|
|
345
|
+
if (createdAtMs === null && lastUpdatedAtMs === null) {
|
|
346
|
+
return null;
|
|
347
|
+
}
|
|
348
|
+
return {
|
|
349
|
+
composerId,
|
|
350
|
+
createdAtMs,
|
|
351
|
+
lastUpdatedAtMs,
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Resolve per-session start/end times and provenance from the bubble and
|
|
356
|
+
* composer timestamp sources. This is the core of the PR2 fallback ladder:
|
|
357
|
+
*
|
|
358
|
+
* - If any bubble in the composer has createdAtMs, use min/max of bubble
|
|
359
|
+
* timestamps. Source = "bubble", quality = "precise". If composerData
|
|
360
|
+
* provides a later lastUpdatedAt, prefer it for endTime and downgrade
|
|
361
|
+
* the source to "mixed" (still "approximate" since we can't prove
|
|
362
|
+
* those two sources describe the same timeline fidelity).
|
|
363
|
+
* - Otherwise, if composerData has createdAtMs, use it for start and
|
|
364
|
+
* (lastUpdatedAt ?? createdAtMs) for end. Source = "composer",
|
|
365
|
+
* quality = "approximate".
|
|
366
|
+
* - Otherwise, source = "none", quality = "none", startTime = endTime
|
|
367
|
+
* = null. Callers should still emit the session — the tokens are real
|
|
368
|
+
* even if the timing isn't.
|
|
369
|
+
*/
|
|
370
|
+
function resolveSessionTimestamps(bubbleCreatedAtsMs, composerMeta) {
|
|
371
|
+
const hasBubbleTimestamps = bubbleCreatedAtsMs.length > 0;
|
|
372
|
+
const composerCreatedAtMs = composerMeta?.createdAtMs ?? null;
|
|
373
|
+
const composerLastUpdatedAtMs = composerMeta?.lastUpdatedAtMs ?? null;
|
|
374
|
+
if (hasBubbleTimestamps) {
|
|
375
|
+
let startMs = bubbleCreatedAtsMs[0];
|
|
376
|
+
let endMs = bubbleCreatedAtsMs[0];
|
|
377
|
+
for (const ms of bubbleCreatedAtsMs) {
|
|
378
|
+
if (ms < startMs)
|
|
379
|
+
startMs = ms;
|
|
380
|
+
if (ms > endMs)
|
|
381
|
+
endMs = ms;
|
|
382
|
+
}
|
|
383
|
+
// If the composer's own lastUpdatedAt is AFTER our max bubble timestamp,
|
|
384
|
+
// prefer it — Cursor can persist the composer row when the session is
|
|
385
|
+
// closed, capturing activity that never produced a token-bearing bubble.
|
|
386
|
+
let mixed = false;
|
|
387
|
+
if (composerLastUpdatedAtMs !== null && composerLastUpdatedAtMs > endMs) {
|
|
388
|
+
endMs = composerLastUpdatedAtMs;
|
|
389
|
+
mixed = true;
|
|
390
|
+
}
|
|
391
|
+
return {
|
|
392
|
+
startTime: msToIso(startMs),
|
|
393
|
+
endTime: msToIso(endMs),
|
|
394
|
+
source: mixed ? "mixed" : "bubble",
|
|
395
|
+
quality: mixed ? "approximate" : "precise",
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
if (composerCreatedAtMs !== null) {
|
|
399
|
+
const endMs = composerLastUpdatedAtMs !== null && composerLastUpdatedAtMs >= composerCreatedAtMs
|
|
400
|
+
? composerLastUpdatedAtMs
|
|
401
|
+
: composerCreatedAtMs;
|
|
402
|
+
return {
|
|
403
|
+
startTime: msToIso(composerCreatedAtMs),
|
|
404
|
+
endTime: msToIso(endMs),
|
|
405
|
+
source: "composer",
|
|
406
|
+
quality: "approximate",
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
return {
|
|
410
|
+
startTime: null,
|
|
411
|
+
endTime: null,
|
|
412
|
+
source: "none",
|
|
413
|
+
quality: "none",
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Parse Cursor usage from local SQLite. Read-only — does NOT push anything
|
|
418
|
+
* to the costcanary backend.
|
|
419
|
+
*
|
|
420
|
+
* Returns aggregated session data per composer with per-session token totals,
|
|
421
|
+
* message counts, start/end timestamps, and daily usage buckets. Throws
|
|
422
|
+
* CursorParserError on unrecoverable failures (missing DB, missing sqlite3
|
|
423
|
+
* binary, malformed SQLite output).
|
|
235
424
|
*
|
|
236
425
|
* Dedup strategy: per composer, keep one entry per (serverBubbleId ?? bubbleId).
|
|
237
426
|
* On collision, keep the candidate with the larger token total.
|
|
@@ -240,32 +429,40 @@ function parseBubble(row) {
|
|
|
240
429
|
* the returned `model` field is "mixed". If no model info is present on any
|
|
241
430
|
* bubble, the field is "unknown".
|
|
242
431
|
*
|
|
243
|
-
* Sort order: total tokens descending.
|
|
244
|
-
* are not yet verified for Cursor.
|
|
432
|
+
* Sort order: total tokens descending.
|
|
245
433
|
*/
|
|
246
|
-
export function
|
|
434
|
+
export function parseCursorUsage() {
|
|
247
435
|
const dbPath = getCursorDbPath();
|
|
248
436
|
// Throws CursorParserError on missing DB / missing sqlite3 / query failure
|
|
249
|
-
const
|
|
250
|
-
|
|
251
|
-
//
|
|
252
|
-
//
|
|
253
|
-
//
|
|
254
|
-
//
|
|
437
|
+
const bubbleRows = runCursorQuery("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'");
|
|
438
|
+
const composerRows = runCursorQuery("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'");
|
|
439
|
+
// Cursor splits model metadata, token usage, and timestamps across
|
|
440
|
+
// different bubble rows. We collect them into separate per-composer
|
|
441
|
+
// structures so each signal is captured even when rows carry only one
|
|
442
|
+
// of them.
|
|
255
443
|
//
|
|
256
444
|
// - tokenBubblesByComposer: per-composer dedup map for bubbles that carry
|
|
257
445
|
// positive token counts. Dedup key is (serverBubbleId ?? bubbleId).
|
|
258
446
|
// Collision rule: keep the candidate with the larger token total.
|
|
259
447
|
// - modelsByComposer: per-composer set of all distinct non-empty model
|
|
260
|
-
// names found on ANY bubble row in the composer.
|
|
261
|
-
//
|
|
262
|
-
//
|
|
263
|
-
//
|
|
264
|
-
//
|
|
265
|
-
//
|
|
448
|
+
// names found on ANY bubble row in the composer.
|
|
449
|
+
// - bubbleCreatedAtsByComposer: per-composer list of parsed bubble
|
|
450
|
+
// createdAtMs values. Not deduped — we only need min/max, and duplicate
|
|
451
|
+
// values are harmless for those aggregations.
|
|
452
|
+
// - composerMetaById: composerData row metadata, used as the fallback
|
|
453
|
+
// source for per-session timestamps.
|
|
266
454
|
const tokenBubblesByComposer = new Map();
|
|
267
455
|
const modelsByComposer = new Map();
|
|
268
|
-
|
|
456
|
+
const bubbleCreatedAtsByComposer = new Map();
|
|
457
|
+
const composerMetaById = new Map();
|
|
458
|
+
for (const row of composerRows) {
|
|
459
|
+
const meta = parseComposerData(row);
|
|
460
|
+
if (!meta) {
|
|
461
|
+
continue;
|
|
462
|
+
}
|
|
463
|
+
composerMetaById.set(meta.composerId, meta);
|
|
464
|
+
}
|
|
465
|
+
for (const row of bubbleRows) {
|
|
269
466
|
const bubble = parseBubble(row);
|
|
270
467
|
if (!bubble) {
|
|
271
468
|
continue;
|
|
@@ -278,6 +475,14 @@ export function parseCursorUsageDryRun() {
|
|
|
278
475
|
}
|
|
279
476
|
composerModels.add(bubble.modelName);
|
|
280
477
|
}
|
|
478
|
+
if (bubble.createdAtMs !== null) {
|
|
479
|
+
let composerCreatedAts = bubbleCreatedAtsByComposer.get(bubble.composerId);
|
|
480
|
+
if (!composerCreatedAts) {
|
|
481
|
+
composerCreatedAts = [];
|
|
482
|
+
bubbleCreatedAtsByComposer.set(bubble.composerId, composerCreatedAts);
|
|
483
|
+
}
|
|
484
|
+
composerCreatedAts.push(bubble.createdAtMs);
|
|
485
|
+
}
|
|
281
486
|
if (!hasTokenUsage(bubble)) {
|
|
282
487
|
continue;
|
|
283
488
|
}
|
|
@@ -298,17 +503,45 @@ export function parseCursorUsageDryRun() {
|
|
|
298
503
|
}
|
|
299
504
|
composerMap.set(dedupKey, bubble);
|
|
300
505
|
}
|
|
301
|
-
// Aggregate per composer into the
|
|
506
|
+
// Aggregate per composer into the parser output shape.
|
|
302
507
|
const sessions = [];
|
|
303
508
|
for (const [composerId, composerMap] of tokenBubblesByComposer) {
|
|
304
509
|
let inputTokens = 0;
|
|
305
510
|
let outputTokens = 0;
|
|
306
511
|
let messageCount = 0;
|
|
307
512
|
const modelsSeen = modelsByComposer.get(composerId) ?? new Set();
|
|
513
|
+
const composerMeta = composerMetaById.get(composerId);
|
|
514
|
+
const bubbleCreatedAts = bubbleCreatedAtsByComposer.get(composerId) ?? [];
|
|
515
|
+
// Daily bucketing: for each token-bearing bubble, prefer its own
|
|
516
|
+
// createdAt; otherwise fall back to the composer's createdAt so the
|
|
517
|
+
// session still contributes to some day rather than silently
|
|
518
|
+
// dropping tokens from the daily view. We track whether any bucket
|
|
519
|
+
// used the composer fallback so the session-level dailyUsageSource
|
|
520
|
+
// reflects approximate day attribution.
|
|
521
|
+
const dailyUsage = {};
|
|
522
|
+
let anyBubbleFellBackToComposer = false;
|
|
308
523
|
for (const bubble of composerMap.values()) {
|
|
309
524
|
inputTokens += bubble.inputTokens;
|
|
310
525
|
outputTokens += bubble.outputTokens;
|
|
311
526
|
messageCount += 1;
|
|
527
|
+
let bucketMs = null;
|
|
528
|
+
if (bubble.createdAtMs !== null) {
|
|
529
|
+
bucketMs = bubble.createdAtMs;
|
|
530
|
+
}
|
|
531
|
+
else if (composerMeta?.createdAtMs != null) {
|
|
532
|
+
bucketMs = composerMeta.createdAtMs;
|
|
533
|
+
anyBubbleFellBackToComposer = true;
|
|
534
|
+
}
|
|
535
|
+
if (bucketMs !== null) {
|
|
536
|
+
const dateKey = msToUtcDateKey(bucketMs);
|
|
537
|
+
let bucket = dailyUsage[dateKey];
|
|
538
|
+
if (!bucket) {
|
|
539
|
+
bucket = createEmptyTokenUsage();
|
|
540
|
+
dailyUsage[dateKey] = bucket;
|
|
541
|
+
}
|
|
542
|
+
bucket.inputTokens += bubble.inputTokens;
|
|
543
|
+
bucket.outputTokens += bubble.outputTokens;
|
|
544
|
+
}
|
|
312
545
|
}
|
|
313
546
|
if (messageCount === 0) {
|
|
314
547
|
continue;
|
|
@@ -323,22 +556,47 @@ export function parseCursorUsageDryRun() {
|
|
|
323
556
|
else {
|
|
324
557
|
model = "mixed";
|
|
325
558
|
}
|
|
559
|
+
const timing = resolveSessionTimestamps(bubbleCreatedAts, composerMeta);
|
|
560
|
+
// dailyUsageSource classification:
|
|
561
|
+
// "bubble" — every bucket came from a bubble-level createdAt (precise)
|
|
562
|
+
// "composer" — at least one bucket fell back to composer.createdAt,
|
|
563
|
+
// so the whole per-day view is approximate. Any fallback
|
|
564
|
+
// downgrades the entire session so downstream renderers
|
|
565
|
+
// don't imply message-level precision we can't back up.
|
|
566
|
+
// "none" — no bucket had any timestamp source; dailyUsage is empty.
|
|
567
|
+
let dailyUsageSource;
|
|
568
|
+
if (Object.keys(dailyUsage).length === 0) {
|
|
569
|
+
dailyUsageSource = "none";
|
|
570
|
+
}
|
|
571
|
+
else if (anyBubbleFellBackToComposer) {
|
|
572
|
+
dailyUsageSource = "composer";
|
|
573
|
+
}
|
|
574
|
+
else {
|
|
575
|
+
dailyUsageSource = "bubble";
|
|
576
|
+
}
|
|
326
577
|
sessions.push({
|
|
327
578
|
sessionId: composerId,
|
|
328
|
-
workspaceHash: null,
|
|
329
|
-
workspaceName: null,
|
|
579
|
+
workspaceHash: null,
|
|
580
|
+
workspaceName: null,
|
|
330
581
|
model,
|
|
331
582
|
tokens: {
|
|
332
583
|
inputTokens,
|
|
333
584
|
outputTokens,
|
|
334
|
-
cacheCreationTokens: 0,
|
|
585
|
+
cacheCreationTokens: 0,
|
|
335
586
|
cacheReadTokens: 0,
|
|
336
587
|
},
|
|
337
588
|
messageCount,
|
|
338
589
|
filePath: dbPath,
|
|
590
|
+
startTime: timing.startTime,
|
|
591
|
+
endTime: timing.endTime,
|
|
592
|
+
timestampSource: timing.source,
|
|
593
|
+
timestampQuality: timing.quality,
|
|
594
|
+
dailyUsage,
|
|
595
|
+
dailyUsageSource,
|
|
339
596
|
});
|
|
340
597
|
}
|
|
341
|
-
// Sort by total tokens descending.
|
|
598
|
+
// Sort by total tokens descending. Downstream surfaces can re-sort by
|
|
599
|
+
// startTime if chronological order matters.
|
|
342
600
|
sessions.sort((a, b) => {
|
|
343
601
|
const aTotal = a.tokens.inputTokens + a.tokens.outputTokens;
|
|
344
602
|
const bTotal = b.tokens.inputTokens + b.tokens.outputTokens;
|
|
@@ -349,6 +607,281 @@ export function parseCursorUsageDryRun() {
|
|
|
349
607
|
filePath: dbPath,
|
|
350
608
|
};
|
|
351
609
|
}
|
|
610
|
+
/**
|
|
611
|
+
* Backward-compat alias. PR1 consumers called this function name; keep it
|
|
612
|
+
* working for one release after the rename.
|
|
613
|
+
*/
|
|
614
|
+
export const parseCursorUsageDryRun = parseCursorUsage;
|
|
615
|
+
/**
|
|
616
|
+
* Truncate a UUID-ish identifier to 8 characters for safe display in
|
|
617
|
+
* transparency output. Real UUIDs become e.g. "399974f0" — enough for a
|
|
618
|
+
* human to distinguish keys at a glance, not enough to serve as a stable
|
|
619
|
+
* correlation handle if the output leaks.
|
|
620
|
+
*/
|
|
621
|
+
function truncateId(id) {
|
|
622
|
+
return id.length <= 8 ? id : id.slice(0, 8);
|
|
623
|
+
}
|
|
624
|
+
/**
|
|
625
|
+
* Return transparency metadata about the Cursor SQLite: file size, table
|
|
626
|
+
* list, key-prefix histogram, and a small sample of bubble and composer
|
|
627
|
+
* keys with their UUIDs truncated. Powers the `what_we_read` MCP mode so
|
|
628
|
+
* users can see exactly what data CostHawk is reading.
|
|
629
|
+
*
|
|
630
|
+
* Throws CursorParserError on missing DB, missing sqlite3, or query failure.
|
|
631
|
+
*/
|
|
632
|
+
export function getCursorMeta() {
|
|
633
|
+
const dbPath = getCursorDbPath();
|
|
634
|
+
if (!existsSync(dbPath)) {
|
|
635
|
+
const error = {
|
|
636
|
+
code: "CURSOR_DB_NOT_FOUND",
|
|
637
|
+
message: `Cursor SQLite database not found at ${dbPath}. Make sure Cursor is installed and you have used it at least once. Set COSTHAWK_CURSOR_DB_PATH to override.`,
|
|
638
|
+
};
|
|
639
|
+
throw error;
|
|
640
|
+
}
|
|
641
|
+
let dbFileSize = 0;
|
|
642
|
+
try {
|
|
643
|
+
dbFileSize = statSync(dbPath).size;
|
|
644
|
+
}
|
|
645
|
+
catch {
|
|
646
|
+
dbFileSize = 0;
|
|
647
|
+
}
|
|
648
|
+
const tableRows = runCursorQuery("SELECT name AS key, 'table' AS value FROM sqlite_master WHERE type='table' ORDER BY name");
|
|
649
|
+
const tables = tableRows.map((row) => row.key);
|
|
650
|
+
// Histogram of key prefixes in cursorDiskKV. The CASE expression mirrors
|
|
651
|
+
// the manual probe from Task #30 — substring up to the first colon, or
|
|
652
|
+
// the whole key if there is no colon. ORDER BY count(*) (not the TEXT
|
|
653
|
+
// cast of the count) so the ordering is numeric — otherwise "9" sorts
|
|
654
|
+
// above "184" lexicographically.
|
|
655
|
+
const prefixRows = runCursorQuery("SELECT CASE WHEN instr(key,':')>0 THEN substr(key,1,instr(key,':')-1) ELSE key END AS key, CAST(count(*) AS TEXT) AS value FROM cursorDiskKV GROUP BY 1 ORDER BY count(*) DESC");
|
|
656
|
+
const keyPrefixes = {};
|
|
657
|
+
for (const row of prefixRows) {
|
|
658
|
+
const count = Number.parseInt(row.value, 10);
|
|
659
|
+
if (Number.isFinite(count)) {
|
|
660
|
+
keyPrefixes[row.key] = count;
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
const bubbleSampleRows = runCursorQuery("SELECT key, '' AS value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%' LIMIT 5");
|
|
664
|
+
const composerSampleRows = runCursorQuery("SELECT key, '' AS value FROM cursorDiskKV WHERE key LIKE 'composerData:%' LIMIT 5");
|
|
665
|
+
const sampleBubbleKeys = bubbleSampleRows.map((row) => {
|
|
666
|
+
const match = BUBBLE_KEY_REGEX.exec(row.key);
|
|
667
|
+
if (!match)
|
|
668
|
+
return row.key;
|
|
669
|
+
const [, composerId, bubbleId] = match;
|
|
670
|
+
return `bubbleId:${truncateId(composerId)}:${truncateId(bubbleId)}`;
|
|
671
|
+
});
|
|
672
|
+
const sampleComposerKeys = composerSampleRows.map((row) => {
|
|
673
|
+
const match = COMPOSER_KEY_REGEX.exec(row.key);
|
|
674
|
+
if (!match)
|
|
675
|
+
return row.key;
|
|
676
|
+
const [, composerId] = match;
|
|
677
|
+
return `composerData:${truncateId(composerId)}`;
|
|
678
|
+
});
|
|
679
|
+
return {
|
|
680
|
+
filePath: dbPath,
|
|
681
|
+
dbFileSize,
|
|
682
|
+
tables,
|
|
683
|
+
keyPrefixes,
|
|
684
|
+
sampleBubbleKeys,
|
|
685
|
+
sampleComposerKeys,
|
|
686
|
+
};
|
|
687
|
+
}
|
|
688
|
+
/**
|
|
689
|
+
* Run a full parser health check against the live DB. Reports coverage
|
|
690
|
+
* numbers, validates invariants, and classifies the result as PASS,
|
|
691
|
+
* DEGRADED, or FAIL.
|
|
692
|
+
*
|
|
693
|
+
* - FAIL is reserved for unrecoverable failures (DB missing, sqlite3
|
|
694
|
+
* missing, query error). The MCP tool surfaces FAIL as isError:true.
|
|
695
|
+
* - DEGRADED means the parser ran but flagged warnings — e.g., invariant
|
|
696
|
+
* tolerance exceeded, partial timestamp coverage, unexpected row shapes.
|
|
697
|
+
* - PASS means the parser ran cleanly with full coverage and no warnings.
|
|
698
|
+
*
|
|
699
|
+
* Never throws — catches errors and reports them as FAIL so callers can
|
|
700
|
+
* present the full structured payload to users.
|
|
701
|
+
*/
|
|
702
|
+
export function runCursorSelfTest() {
|
|
703
|
+
const dbPath = getCursorDbPath();
|
|
704
|
+
const sqlite3Path = getSqlite3Path();
|
|
705
|
+
const errors = [];
|
|
706
|
+
const warnings = [];
|
|
707
|
+
const invariantChecks = [];
|
|
708
|
+
const result = {
|
|
709
|
+
filePath: dbPath,
|
|
710
|
+
dbExists: existsSync(dbPath),
|
|
711
|
+
sqlite3Path,
|
|
712
|
+
canQuery: false,
|
|
713
|
+
tokenBubbleCount: 0,
|
|
714
|
+
composerCount: 0,
|
|
715
|
+
sessionsWithTokens: 0,
|
|
716
|
+
timestampCoverage: {
|
|
717
|
+
bubblesWithCreatedAt: 0,
|
|
718
|
+
totalBubbles: 0,
|
|
719
|
+
composersWithCreatedAt: 0,
|
|
720
|
+
totalComposers: 0,
|
|
721
|
+
},
|
|
722
|
+
invariantChecks,
|
|
723
|
+
warnings,
|
|
724
|
+
errors,
|
|
725
|
+
overallStatus: "FAIL",
|
|
726
|
+
};
|
|
727
|
+
if (!result.dbExists) {
|
|
728
|
+
errors.push(`Cursor SQLite database not found at ${dbPath}. Set COSTHAWK_CURSOR_DB_PATH to override.`);
|
|
729
|
+
return result;
|
|
730
|
+
}
|
|
731
|
+
let bubbleRows;
|
|
732
|
+
let composerRows;
|
|
733
|
+
try {
|
|
734
|
+
bubbleRows = runCursorQuery("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'");
|
|
735
|
+
composerRows = runCursorQuery("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'");
|
|
736
|
+
result.canQuery = true;
|
|
737
|
+
}
|
|
738
|
+
catch (err) {
|
|
739
|
+
const code = isCursorParserError(err) ? err.code : "UNKNOWN";
|
|
740
|
+
const message = err instanceof Error
|
|
741
|
+
? err.message
|
|
742
|
+
: typeof err === "object" && err !== null && "message" in err
|
|
743
|
+
? String(err.message)
|
|
744
|
+
: "Unknown error";
|
|
745
|
+
errors.push(`[${code}] ${message}`);
|
|
746
|
+
return result;
|
|
747
|
+
}
|
|
748
|
+
result.timestampCoverage.totalBubbles = bubbleRows.length;
|
|
749
|
+
result.timestampCoverage.totalComposers = composerRows.length;
|
|
750
|
+
// Count bubbles with a usable createdAt timestamp (string or number).
|
|
751
|
+
// This mirrors parseBubble's `createdAtMs` logic so the reported
|
|
752
|
+
// coverage matches what the parser will actually use.
|
|
753
|
+
for (const row of bubbleRows) {
|
|
754
|
+
let obj;
|
|
755
|
+
try {
|
|
756
|
+
obj = JSON.parse(row.value);
|
|
757
|
+
}
|
|
758
|
+
catch {
|
|
759
|
+
continue;
|
|
760
|
+
}
|
|
761
|
+
if (typeof obj !== "object" || obj === null)
|
|
762
|
+
continue;
|
|
763
|
+
const record = obj;
|
|
764
|
+
if (parseTimestampField(record.createdAt) !== null) {
|
|
765
|
+
result.timestampCoverage.bubblesWithCreatedAt += 1;
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
for (const row of composerRows) {
|
|
769
|
+
const meta = parseComposerData(row);
|
|
770
|
+
if (meta && meta.createdAtMs !== null) {
|
|
771
|
+
result.timestampCoverage.composersWithCreatedAt += 1;
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
// Invariant 1: the parser runs without throwing.
|
|
775
|
+
let parserResult = null;
|
|
776
|
+
try {
|
|
777
|
+
parserResult = parseCursorUsage();
|
|
778
|
+
invariantChecks.push({ name: "parser_runs", passed: true });
|
|
779
|
+
}
|
|
780
|
+
catch (err) {
|
|
781
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
782
|
+
invariantChecks.push({
|
|
783
|
+
name: "parser_runs",
|
|
784
|
+
passed: false,
|
|
785
|
+
details: message,
|
|
786
|
+
});
|
|
787
|
+
errors.push(`Parser threw: ${message}`);
|
|
788
|
+
return result;
|
|
789
|
+
}
|
|
790
|
+
result.sessionsWithTokens = parserResult.sessions.length;
|
|
791
|
+
result.tokenBubbleCount = parserResult.sessions.reduce((acc, s) => acc + s.messageCount, 0);
|
|
792
|
+
result.composerCount = composerRows.length;
|
|
793
|
+
// Invariant 2: every session has a non-null timestampSource classification.
|
|
794
|
+
const sessionsWithoutTiming = parserResult.sessions.filter((s) => s.timestampSource === "none");
|
|
795
|
+
if (sessionsWithoutTiming.length > 0) {
|
|
796
|
+
invariantChecks.push({
|
|
797
|
+
name: "all_sessions_have_timestamp_source",
|
|
798
|
+
passed: false,
|
|
799
|
+
details: `${sessionsWithoutTiming.length} sessions have timestampSource="none"`,
|
|
800
|
+
});
|
|
801
|
+
warnings.push(`${sessionsWithoutTiming.length}/${parserResult.sessions.length} sessions have no parseable timestamp source. They will appear with null startTime/endTime in usage output.`);
|
|
802
|
+
}
|
|
803
|
+
else {
|
|
804
|
+
invariantChecks.push({
|
|
805
|
+
name: "all_sessions_have_timestamp_source",
|
|
806
|
+
passed: true,
|
|
807
|
+
});
|
|
808
|
+
}
|
|
809
|
+
// Invariant 3: for every session that resolved start AND end, start <= end.
|
|
810
|
+
const ordering = parserResult.sessions.filter((s) => s.startTime !== null && s.endTime !== null);
|
|
811
|
+
const badOrdering = ordering.filter((s) => (s.startTime !== null ? Date.parse(s.startTime) : 0) >
|
|
812
|
+
(s.endTime !== null ? Date.parse(s.endTime) : 0));
|
|
813
|
+
if (badOrdering.length > 0) {
|
|
814
|
+
invariantChecks.push({
|
|
815
|
+
name: "start_time_le_end_time",
|
|
816
|
+
passed: false,
|
|
817
|
+
details: `${badOrdering.length} sessions violate start <= end`,
|
|
818
|
+
});
|
|
819
|
+
warnings.push(`${badOrdering.length} sessions have startTime > endTime after resolution. This is a parser bug — please report.`);
|
|
820
|
+
}
|
|
821
|
+
else {
|
|
822
|
+
invariantChecks.push({
|
|
823
|
+
name: "start_time_le_end_time",
|
|
824
|
+
passed: true,
|
|
825
|
+
});
|
|
826
|
+
}
|
|
827
|
+
// Invariant 4: for composers where both bubble and composer timestamps
|
|
828
|
+
// exist, min(bubble.createdAt) should be within tolerance of composer
|
|
829
|
+
// createdAt. Violations suggest schema drift or corrupt timing data.
|
|
830
|
+
//
|
|
831
|
+
// We re-derive the per-composer minimum bubble createdAtMs from
|
|
832
|
+
// bubbleRows directly rather than re-running the parser, so the check
|
|
833
|
+
// stays independent of any changes to the main aggregation logic.
|
|
834
|
+
const minBubbleCreatedAtByComposer = new Map();
|
|
835
|
+
for (const row of bubbleRows) {
|
|
836
|
+
const bubble = parseBubble(row);
|
|
837
|
+
if (!bubble || bubble.createdAtMs === null)
|
|
838
|
+
continue;
|
|
839
|
+
const prior = minBubbleCreatedAtByComposer.get(bubble.composerId);
|
|
840
|
+
if (prior === undefined || bubble.createdAtMs < prior) {
|
|
841
|
+
minBubbleCreatedAtByComposer.set(bubble.composerId, bubble.createdAtMs);
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
const composerMetaByIdForCheck = new Map();
|
|
845
|
+
for (const row of composerRows) {
|
|
846
|
+
const meta = parseComposerData(row);
|
|
847
|
+
if (meta)
|
|
848
|
+
composerMetaByIdForCheck.set(meta.composerId, meta);
|
|
849
|
+
}
|
|
850
|
+
let skewWarnings = 0;
|
|
851
|
+
for (const [composerId, minBubbleMs] of minBubbleCreatedAtByComposer) {
|
|
852
|
+
const meta = composerMetaByIdForCheck.get(composerId);
|
|
853
|
+
if (!meta || meta.createdAtMs === null)
|
|
854
|
+
continue;
|
|
855
|
+
const skew = meta.createdAtMs - minBubbleMs;
|
|
856
|
+
if (skew > INVARIANT_SKEW_TOLERANCE_MS) {
|
|
857
|
+
skewWarnings += 1;
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
if (skewWarnings > 0) {
|
|
861
|
+
invariantChecks.push({
|
|
862
|
+
name: "bubble_composer_createdat_skew",
|
|
863
|
+
passed: false,
|
|
864
|
+
details: `${skewWarnings} composers where min(bubble.createdAt) is more than ${INVARIANT_SKEW_TOLERANCE_MS / 1000}s before composerData.createdAt`,
|
|
865
|
+
});
|
|
866
|
+
warnings.push(`${skewWarnings} composers show unexpected clock skew between bubble and composer timestamps. Values are still usable but may indicate schema drift.`);
|
|
867
|
+
}
|
|
868
|
+
else {
|
|
869
|
+
invariantChecks.push({
|
|
870
|
+
name: "bubble_composer_createdat_skew",
|
|
871
|
+
passed: true,
|
|
872
|
+
});
|
|
873
|
+
}
|
|
874
|
+
if (errors.length > 0) {
|
|
875
|
+
result.overallStatus = "FAIL";
|
|
876
|
+
}
|
|
877
|
+
else if (warnings.length > 0) {
|
|
878
|
+
result.overallStatus = "DEGRADED";
|
|
879
|
+
}
|
|
880
|
+
else {
|
|
881
|
+
result.overallStatus = "PASS";
|
|
882
|
+
}
|
|
883
|
+
return result;
|
|
884
|
+
}
|
|
352
885
|
// Re-export the type guard so the MCP tool registration in index.ts can
|
|
353
886
|
// distinguish CursorParserError from generic Error in its catch block.
|
|
354
887
|
export { isCursorParserError };
|