getaimeter 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +53 -53
  2. package/watcher.js +711 -648
package/watcher.js CHANGED
@@ -1,648 +1,711 @@
1
- 'use strict';
2
-
3
- const fs = require('fs');
4
- const path = require('path');
5
- const crypto = require('crypto');
6
- const { execSync } = require('child_process');
7
- const { getApiKey, getWatchPaths } = require('./config');
8
- const { getOffset, setOffset, isDuplicate, save: saveState } = require('./state');
9
- const { postUsage } = require('./reporter');
10
-
11
- // ---------------------------------------------------------------------------
12
- // Logging
13
- // ---------------------------------------------------------------------------
14
-
15
- const LOG_FILE = path.join(require('./config').AIMETER_DIR, 'watcher.log');
16
-
17
- function log(...args) {
18
- const ts = new Date().toISOString();
19
- const msg = `[${ts}] ${args.join(' ')}`;
20
- console.log(msg);
21
- try {
22
- fs.mkdirSync(path.dirname(LOG_FILE), { recursive: true });
23
- fs.appendFileSync(LOG_FILE, msg + '\n');
24
- } catch {}
25
- }
26
-
27
- function logError(...args) {
28
- log('ERROR:', ...args);
29
- }
30
-
31
- // ---------------------------------------------------------------------------
32
- // Source detection from file path
33
- // ---------------------------------------------------------------------------
34
-
35
- // Cache detected sources per file to avoid re-reading headers
36
- const _sourceCache = new Map();
37
-
38
- // Track cumulative token counts per file for Codex CLI (which reports cumulative, not delta)
39
- const _codexCumulative = {};
40
-
41
- function detectSource(filePath) {
42
- if (_sourceCache.has(filePath)) return _sourceCache.get(filePath);
43
-
44
- const normalized = filePath.replace(/\\/g, '/');
45
- if (normalized.includes('local-agent-mode-sessions')) {
46
- _sourceCache.set(filePath, 'desktop_app');
47
- return 'desktop_app';
48
- }
49
-
50
- // For subagent files (e.g. .../ceeb9217.../subagents/agent-xxx.jsonl),
51
- // inherit the parent session's source. Parent JSONL is at the grandparent dir level.
52
- if (normalized.includes('/subagents/')) {
53
- // Extract session UUID dir: .../projects/PROJECT/SESSION_UUID/subagents/...
54
- const match = normalized.match(/(.+)\/([^/]+)\/subagents\//);
55
- if (match) {
56
- const projectDir = match[1]; // .../projects/PROJECT
57
- const sessionUuid = match[2]; // SESSION_UUID
58
- const parentFile = path.join(projectDir.replace(/\//g, path.sep), sessionUuid + '.jsonl');
59
- try {
60
- if (fs.existsSync(parentFile)) {
61
- const parentSource = detectSource(parentFile);
62
- _sourceCache.set(filePath, parentSource);
63
- return parentSource;
64
- }
65
- } catch {}
66
- }
67
- }
68
-
69
- if (normalized.includes('.copilot/') || normalized.includes('/copilot/')) {
70
- _sourceCache.set(filePath, 'copilot_cli');
71
- return 'copilot_cli';
72
- }
73
- if (normalized.includes('.gemini/') || normalized.includes('/gemini/')) {
74
- _sourceCache.set(filePath, 'gemini_cli');
75
- return 'gemini_cli';
76
- }
77
- if (normalized.includes('.codex/') || normalized.includes('/codex/')) {
78
- _sourceCache.set(filePath, 'codex_cli');
79
- return 'codex_cli';
80
- }
81
- if (normalized.includes('/Cursor/') || normalized.includes('/cursor/') || normalized.includes('.cursor/')) {
82
- _sourceCache.set(filePath, 'cursor');
83
- return 'cursor';
84
- }
85
-
86
- // Read first 10KB of the file to find entrypoint or IDE markers
87
- let source = 'cli'; // default
88
- try {
89
- const fd = fs.openSync(filePath, 'r');
90
- const buf = Buffer.alloc(Math.min(10240, fs.fstatSync(fd).size));
91
- fs.readSync(fd, buf, 0, buf.length, 0);
92
- fs.closeSync(fd);
93
- const header = buf.toString('utf8');
94
-
95
- if (header.includes('"entrypoint":"claude-desktop"') || header.includes('"entrypoint": "claude-desktop"')) {
96
- source = 'desktop_app';
97
- } else if (header.includes('ide_opened_file') || header.includes('claude-vscode') || header.includes('"entrypoint":"vscode"') || header.includes('"entrypoint": "vscode"')) {
98
- source = 'vscode';
99
- }
100
- // else remains 'cli'
101
- } catch {}
102
-
103
- _sourceCache.set(filePath, source);
104
- return source;
105
- }
106
-
107
- // ---------------------------------------------------------------------------
108
- // JSONL parsing — extract usage from new bytes in a transcript file
109
- // ---------------------------------------------------------------------------
110
-
111
- function extractNewUsage(filePath) {
112
- let stat;
113
- try { stat = fs.statSync(filePath); } catch { return []; }
114
-
115
- const currentSize = stat.size;
116
- const lastOffset = getOffset(filePath);
117
-
118
- if (currentSize <= lastOffset) return [];
119
-
120
- // Read only the new bytes
121
- const fd = fs.openSync(filePath, 'r');
122
- const buf = Buffer.alloc(currentSize - lastOffset);
123
- fs.readSync(fd, buf, 0, buf.length, lastOffset);
124
- fs.closeSync(fd);
125
-
126
- const text = buf.toString('utf8');
127
- const lines = text.split('\n');
128
-
129
- // If we're reading from mid-file (offset > 0), the first line may be partial
130
- if (lastOffset > 0 && lines.length > 0) lines.shift();
131
-
132
- const usageEvents = [];
133
- let lineOffset = lastOffset;
134
- let pendingThinkingChars = 0; // Track thinking chars from streaming progress messages
135
-
136
- for (const line of lines) {
137
- const trimmed = line.trim();
138
- lineOffset += Buffer.byteLength(line + '\n', 'utf8');
139
- if (!trimmed) continue;
140
-
141
- let obj;
142
- try { obj = JSON.parse(trimmed); } catch { continue; }
143
-
144
- // Normalize entry: support both direct assistant messages and progress-wrapped
145
- // sub-agent messages (where haiku calls appear as type="progress" with the
146
- // real message at obj.data.message.message).
147
- let msg = null;
148
- let msgId = null; // truthy only for progress entries (used for dedup)
149
-
150
- if (obj.type === 'assistant' && obj.message && obj.message.usage) {
151
- msg = obj.message;
152
- } else if (
153
- obj.type === 'progress' &&
154
- obj.data && obj.data.message && obj.data.message.message &&
155
- obj.data.message.message.usage
156
- ) {
157
- msg = obj.data.message.message;
158
- msgId = msg.id; // progress fires multiple times; dedup by message ID
159
- }
160
-
161
- if (!msg) {
162
- // ── Codex CLI format ──────────────────────────────────────────
163
- // Codex events have { type: "event", payload: { type: "token_count", ... } }
164
- // with cumulative input_tokens, output_tokens, reasoning_tokens, cached_input_tokens
165
- if (obj.type === 'event' && obj.payload?.type === 'token_count') {
166
- const p = obj.payload;
167
- const codexModel = obj.turn_context?.model || 'codex';
168
- // Token counts are cumulative per session; we store deltas
169
- // Use a per-file tracker for the previous cumulative values
170
- const prevKey = filePath;
171
- const prev = _codexCumulative[prevKey] || { input: 0, output: 0, reasoning: 0, cached: 0 };
172
- const deltaInput = (p.input_tokens || 0) - prev.input;
173
- const deltaOutput = (p.output_tokens || 0) - prev.output;
174
- const deltaReasoning = (p.reasoning_tokens || 0) - prev.reasoning;
175
-
176
- _codexCumulative[prevKey] = {
177
- input: p.input_tokens || 0,
178
- output: p.output_tokens || 0,
179
- reasoning: p.reasoning_tokens || 0,
180
- cached: p.cached_input_tokens || 0,
181
- };
182
-
183
- // Skip if no new tokens (duplicate or first read)
184
- if (deltaInput <= 0 && deltaOutput <= 0) continue;
185
-
186
- const hashKey = `${filePath}:${lineOffset}:codex:${p.input_tokens}:${p.output_tokens}`;
187
- const hash = crypto.createHash('md5').update(hashKey).digest('hex');
188
- if (isDuplicate(hash)) continue;
189
-
190
- usageEvents.push({
191
- provider: 'openai',
192
- model: codexModel,
193
- source: 'codex_cli',
194
- inputTokens: deltaInput,
195
- outputTokens: deltaOutput,
196
- thinkingTokens: deltaReasoning,
197
- cacheReadTokens: 0,
198
- cacheWriteTokens: 0,
199
- });
200
- continue;
201
- }
202
-
203
- // ── Copilot CLI format ────────────────────────────────────────
204
- // Copilot events: { type: "token_usage", input_tokens, output_tokens, reasoning_tokens, model }
205
- if (obj.type === 'token_usage' && (obj.input_tokens !== undefined || obj.output_tokens !== undefined)) {
206
- const copilotModel = obj.model || 'copilot';
207
- const hashKey = `${filePath}:${lineOffset}:copilot:${obj.input_tokens || 0}:${obj.output_tokens || 0}`;
208
- const hash = crypto.createHash('md5').update(hashKey).digest('hex');
209
- if (isDuplicate(hash)) continue;
210
-
211
- usageEvents.push({
212
- provider: 'github',
213
- model: copilotModel,
214
- source: 'copilot_cli',
215
- inputTokens: obj.input_tokens || 0,
216
- outputTokens: obj.output_tokens || 0,
217
- thinkingTokens: obj.reasoning_tokens || 0,
218
- cacheReadTokens: 0,
219
- cacheWriteTokens: 0,
220
- });
221
- continue;
222
- }
223
-
224
- // ── Gemini CLI format ─────────────────────────────────────────
225
- // Gemini events may contain usageMetadata: { promptTokenCount, candidatesTokenCount, totalTokenCount }
226
- if (obj.usageMetadata && (obj.usageMetadata.promptTokenCount || obj.usageMetadata.candidatesTokenCount)) {
227
- const um = obj.usageMetadata;
228
- const geminiModel = obj.modelVersion || obj.model || 'gemini';
229
- const hashKey = `${filePath}:${lineOffset}:gemini:${um.promptTokenCount || 0}:${um.candidatesTokenCount || 0}`;
230
- const hash = crypto.createHash('md5').update(hashKey).digest('hex');
231
- if (isDuplicate(hash)) continue;
232
-
233
- usageEvents.push({
234
- provider: 'google',
235
- model: geminiModel,
236
- source: 'gemini_cli',
237
- inputTokens: um.promptTokenCount || 0,
238
- outputTokens: um.candidatesTokenCount || 0,
239
- thinkingTokens: um.thoughtsTokenCount || 0,
240
- cacheReadTokens: um.cachedContentTokenCount || 0,
241
- cacheWriteTokens: 0,
242
- });
243
- continue;
244
- }
245
-
246
- continue;
247
- }
248
-
249
- // Skip synthetic/internal messages
250
- if (msg.model === '<synthetic>') continue;
251
-
252
- const model = msg.model || '';
253
-
254
- // Check content blocks for thinking tokens and completion status
255
- const contentBlocks = msg.content || [];
256
- const hasTextContent = contentBlocks.some(b => b.type === 'text' || b.type === 'tool_use');
257
- for (const block of contentBlocks) {
258
- if (block.type === 'thinking' && block.thinking) {
259
- pendingThinkingChars = Math.max(pendingThinkingChars, block.thinking.length);
260
- }
261
- }
262
-
263
- // Skip streaming in-progress messages:
264
- // Null stop_reason with ONLY thinking content = streaming reasoning still in progress.
265
- // Null stop_reason WITH text/tool_use content = complete response (e.g., haiku sub-agent
266
- // calls that never receive a stop_reason in the JSONL but are finished).
267
- if (!msg.stop_reason && !hasTextContent) continue;
268
-
269
- const u = msg.usage;
270
-
271
- // Estimate thinking tokens: ~4 chars per token (conservative estimate)
272
- // The API doesn't separate thinking_tokens in the JSONL usage field
273
- const estimatedThinkingTokens = pendingThinkingChars > 0
274
- ? Math.ceil(pendingThinkingChars / 4)
275
- : 0;
276
- pendingThinkingChars = 0; // Reset for next turn
277
-
278
- // Build dedup hash.
279
- // For entries with a message ID (progress or assistant), use ONLY the message
280
- // ID so the same call is counted once even if it appears in both the parent
281
- // session file (as progress) and the subagent file (as assistant).
282
- // For entries without an ID, fall back to file+offset.
283
- const effectiveId = msgId || msg.id;
284
- const hashKey = effectiveId
285
- ? `msgid:${effectiveId}`
286
- : `${filePath}:${lineOffset}:${model}:${u.input_tokens || 0}:${u.output_tokens || 0}`;
287
- const hash = crypto.createHash('md5').update(hashKey).digest('hex');
288
-
289
- if (isDuplicate(hash)) continue;
290
-
291
- usageEvents.push({
292
- provider: 'anthropic',
293
- model,
294
- source: detectSource(filePath),
295
- inputTokens: u.input_tokens || 0,
296
- outputTokens: u.output_tokens || 0,
297
- thinkingTokens: estimatedThinkingTokens,
298
- cacheReadTokens: u.cache_read_input_tokens || 0,
299
- cacheWriteTokens: u.cache_creation_input_tokens || 0,
300
- });
301
- }
302
-
303
- // Update offset to current file size
304
- setOffset(filePath, currentSize);
305
-
306
- return usageEvents;
307
- }
308
-
309
- // ---------------------------------------------------------------------------
310
- // Cursor IDE — extract usage from SQLite state.vscdb
311
- // ---------------------------------------------------------------------------
312
-
313
- // Track last-seen composerData keys to detect new conversations
314
- const _cursorSeenKeys = new Set();
315
- let _cursorSqliteAvailable = null; // null = unchecked, true/false after first check
316
-
317
- function isSqliteAvailable() {
318
- if (_cursorSqliteAvailable !== null) return _cursorSqliteAvailable;
319
- try {
320
- execSync('sqlite3 --version', { stdio: 'pipe', timeout: 5000 });
321
- _cursorSqliteAvailable = true;
322
- } catch {
323
- _cursorSqliteAvailable = false;
324
- log('sqlite3 CLI not found — Cursor tracking disabled. Install sqlite3 to enable.');
325
- }
326
- return _cursorSqliteAvailable;
327
- }
328
-
329
- function extractCursorUsage(dbPath) {
330
- if (!isSqliteAvailable()) return [];
331
- if (!fs.existsSync(dbPath)) return [];
332
-
333
- // Check if the DB file has been modified since our last check
334
- let stat;
335
- try { stat = fs.statSync(dbPath); } catch { return []; }
336
- const currentMtime = stat.mtimeMs;
337
- const lastMtime = getOffset(dbPath + ':mtime');
338
- if (currentMtime <= lastMtime) return [];
339
-
340
- const usageEvents = [];
341
-
342
- try {
343
- // Query composerData entries from Cursor's KV store
344
- // Each entry has usageData with per-model cost and count
345
- const raw = execSync(
346
- `sqlite3 "${dbPath}" "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'"`,
347
- { timeout: 10000, maxBuffer: 50 * 1024 * 1024, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
348
- );
349
-
350
- if (!raw || !raw.trim()) {
351
- setOffset(dbPath + ':mtime', currentMtime);
352
- return [];
353
- }
354
-
355
- for (const line of raw.split('\n')) {
356
- if (!line.trim()) continue;
357
-
358
- // sqlite3 outputs "key|value" with pipe separator
359
- const sepIdx = line.indexOf('|');
360
- if (sepIdx < 0) continue;
361
-
362
- const key = line.substring(0, sepIdx);
363
- const valueStr = line.substring(sepIdx + 1);
364
-
365
- // Skip already-seen conversations
366
- if (_cursorSeenKeys.has(key)) continue;
367
-
368
- let data;
369
- try { data = JSON.parse(valueStr); } catch { continue; }
370
-
371
- // Extract usage from composerData.usageData
372
- // Format: { "model-name": { costInCents: 0.42, amount: 1 }, ... }
373
- const usage = data.usageData;
374
- if (!usage || typeof usage !== 'object') {
375
- _cursorSeenKeys.add(key);
376
- continue;
377
- }
378
-
379
- const composerId = data.composerId || key;
380
- const timestamp = data.lastUpdatedAt || data.createdAt;
381
-
382
- for (const [model, stats] of Object.entries(usage)) {
383
- if (!stats || typeof stats !== 'object') continue;
384
-
385
- const costCents = stats.costInCents || 0;
386
- const amount = stats.amount || 0;
387
- if (costCents <= 0 && amount <= 0) continue;
388
-
389
- const hashKey = `cursor:${composerId}:${model}:${costCents}:${amount}`;
390
- const hash = crypto.createHash('md5').update(hashKey).digest('hex');
391
- if (isDuplicate(hash)) continue;
392
-
393
- // Determine provider from model name
394
- let provider = 'openai'; // default Cursor uses OpenAI models primarily
395
- if (model.includes('claude') || model.includes('anthropic')) {
396
- provider = 'anthropic';
397
- } else if (model.includes('gemini')) {
398
- provider = 'google';
399
- }
400
-
401
- // Estimate tokens from cost (rough inverse of pricing)
402
- // costInCents is total cost. We don't have exact input/output split,
403
- // so we attribute it all as output tokens for a rough estimate.
404
- // Using mid-range pricing for estimation: ~$10/M tokens average
405
- const estimatedTokens = Math.round((costCents / 100) / (10 / 1_000_000));
406
-
407
- usageEvents.push({
408
- provider,
409
- model,
410
- source: 'cursor',
411
- inputTokens: 0,
412
- outputTokens: estimatedTokens > 0 ? estimatedTokens : amount * 500, // fallback: ~500 tokens per request
413
- thinkingTokens: 0,
414
- cacheReadTokens: 0,
415
- cacheWriteTokens: 0,
416
- });
417
- }
418
-
419
- _cursorSeenKeys.add(key);
420
- }
421
- } catch (err) {
422
- // SQLite might be locked by Cursor this is normal, retry next poll
423
- if (!err.message.includes('database is locked')) {
424
- logError('Cursor SQLite read error:', err.message);
425
- }
426
- }
427
-
428
- setOffset(dbPath + ':mtime', currentMtime);
429
- return usageEvents;
430
- }
431
-
432
- // ---------------------------------------------------------------------------
433
- // Report usage events to backend
434
- // ---------------------------------------------------------------------------
435
-
436
- async function sleep(ms) {
437
- return new Promise(r => setTimeout(r, ms));
438
- }
439
-
440
- async function reportEvents(events) {
441
- const apiKey = getApiKey();
442
- if (!apiKey) {
443
- logError('No API key configured. Run: aimeter setup');
444
- return;
445
- }
446
-
447
- for (const evt of events) {
448
- let attempt = 0;
449
- while (attempt < 4) {
450
- const result = await postUsage(apiKey, evt);
451
- if (result.ok) {
452
- log(`Reported: ${evt.source} ${evt.model} in=${evt.inputTokens} out=${evt.outputTokens} cache_r=${evt.cacheReadTokens}`);
453
- break;
454
- } else if (result.status === 429) {
455
- attempt++;
456
- const wait = attempt * 15_000; // 15s, 30s, 45s
457
- logError(`Rate limited (429). Retry ${attempt}/3 in ${wait / 1000}s...`);
458
- await sleep(wait);
459
- } else {
460
- logError(`Failed to report: HTTP ${result.status} ${result.error || ''}`);
461
- break;
462
- }
463
- }
464
- }
465
- }
466
-
467
- // ---------------------------------------------------------------------------
468
- // File watcher
469
- // ---------------------------------------------------------------------------
470
-
471
- /**
472
- * Process a single file: extract new usage and report it.
473
- * Called only from the poll loop — single-threaded, no races.
474
- */
475
- async function processFile(filePath) {
476
- try {
477
- const events = extractNewUsage(filePath);
478
- if (events.length > 0) {
479
- await reportEvents(events);
480
- saveState();
481
- }
482
- } catch (err) {
483
- logError(`Processing ${filePath}:`, err.message);
484
- }
485
- }
486
-
487
- /**
488
- * Recursively find all .jsonl files under a directory.
489
- */
490
- function findJsonlFiles(dir) {
491
- const results = [];
492
- let entries;
493
- try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return results; }
494
-
495
- for (const entry of entries) {
496
- const full = path.join(dir, entry.name);
497
- if (entry.isDirectory()) {
498
- results.push(...findJsonlFiles(full));
499
- } else if (entry.name.endsWith('.jsonl')) {
500
- results.push(full);
501
- }
502
- }
503
- return results;
504
- }
505
-
506
- /**
507
- * Find Cursor state.vscdb files in watch paths.
508
- */
509
- function findCursorDbs(dirs) {
510
- const results = [];
511
- for (const dir of dirs) {
512
- const vscdb = path.join(dir, 'state.vscdb');
513
- if (fs.existsSync(vscdb)) {
514
- results.push(vscdb);
515
- }
516
- }
517
- return results;
518
- }
519
-
520
- /**
521
- * Start watching all configured paths.
522
- * Returns a cleanup function.
523
- */
524
- function startWatching() {
525
- const watchPaths = getWatchPaths();
526
-
527
- if (watchPaths.length === 0) {
528
- logError('No AI tool directories found. Is Claude Code, Cursor, Codex, or Gemini CLI installed?');
529
- process.exit(1);
530
- }
531
-
532
- log('AIMeter Watcher starting...');
533
- log('Watching:', watchPaths.join(', '));
534
-
535
- const apiKey = getApiKey();
536
- if (!apiKey) {
537
- log('WARNING: No API key found. Usage will not be reported.');
538
- log('Run: aimeter setup');
539
- } else {
540
- log('API key:', apiKey.slice(0, 8) + '...' + apiKey.slice(-4));
541
- }
542
-
543
- // Initial scan: mark existing files as "already read" so we only report
544
- // NEW usage going forward. Without this, first run floods the backend.
545
- const { load: loadState } = require('./state');
546
- const state = loadState();
547
- const isFirstRun = Object.keys(state.fileOffsets || {}).length === 0;
548
-
549
- let filesMarked = 0;
550
- for (const watchPath of watchPaths) {
551
- const files = findJsonlFiles(watchPath);
552
- for (const file of files) {
553
- if (isFirstRun) {
554
- // First run: skip to end of all files
555
- try {
556
- const size = fs.statSync(file).size;
557
- setOffset(file, size);
558
- filesMarked++;
559
- } catch {}
560
- } else {
561
- // Subsequent runs: process new data since last offset
562
- const events = extractNewUsage(file);
563
- if (events.length > 0) {
564
- reportEvents(events);
565
- filesMarked += events.length;
566
- }
567
- }
568
- }
569
- }
570
- if (isFirstRun) {
571
- log(`First run: marked ${filesMarked} existing files as read. Only new usage will be reported.`);
572
- } else if (filesMarked > 0) {
573
- log(`Catch-up: processed ${filesMarked} new events since last run`);
574
- }
575
- saveState();
576
-
577
- // Poll every 5 seconds — simple, reliable, no race conditions.
578
- // fs.watch is unreliable on Windows for deeply nested dirs and fires duplicates.
579
- const POLL_INTERVAL = 5_000;
580
- let polling = false;
581
-
582
- // Find Cursor DBs once at startup
583
- const cursorDbs = findCursorDbs(watchPaths);
584
- if (cursorDbs.length > 0) {
585
- log('Cursor databases found:', cursorDbs.join(', '));
586
- // Mark existing Cursor data as seen on first run
587
- if (isFirstRun) {
588
- for (const dbPath of cursorDbs) {
589
- try {
590
- const mtime = fs.statSync(dbPath).mtimeMs;
591
- setOffset(dbPath + ':mtime', mtime);
592
- // Pre-populate seen keys so we don't report historical data
593
- extractCursorUsage(dbPath); // populates _cursorSeenKeys but we discard results
594
- } catch {}
595
- }
596
- log('Cursor: marked existing conversations as read');
597
- }
598
- }
599
-
600
- const pollInterval = setInterval(async () => {
601
- if (polling) return; // skip if previous poll still running
602
- polling = true;
603
- try {
604
- // Poll JSONL files (Claude, Codex, Gemini, etc.)
605
- for (const watchPath of watchPaths) {
606
- const files = findJsonlFiles(watchPath);
607
- for (const file of files) {
608
- try {
609
- const currentSize = fs.statSync(file).size;
610
- const lastOffset = getOffset(file);
611
- if (currentSize > lastOffset) {
612
- await processFile(file);
613
- }
614
- } catch {}
615
- }
616
- }
617
-
618
- // Poll Cursor SQLite databases
619
- for (const dbPath of cursorDbs) {
620
- try {
621
- const events = extractCursorUsage(dbPath);
622
- if (events.length > 0) {
623
- await reportEvents(events);
624
- saveState();
625
- }
626
- } catch (err) {
627
- logError('Cursor poll error:', err.message);
628
- }
629
- }
630
- } finally {
631
- polling = false;
632
- }
633
- }, POLL_INTERVAL);
634
- log(`Polling every ${POLL_INTERVAL / 1000}s`);
635
-
636
- // Periodic state save
637
- const saveInterval = setInterval(() => saveState(), 30_000);
638
-
639
- // Return cleanup
640
- return () => {
641
- clearInterval(saveInterval);
642
- clearInterval(pollInterval);
643
- saveState();
644
- log('Watcher stopped.');
645
- };
646
- }
647
-
648
- module.exports = { startWatching };
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const crypto = require('crypto');
6
+ const { execSync } = require('child_process');
7
+ const { getApiKey, getWatchPaths } = require('./config');
8
+ const { getOffset, setOffset, isDuplicate, save: saveState } = require('./state');
9
+ const { postUsage } = require('./reporter');
10
+
11
+ // ---------------------------------------------------------------------------
12
+ // Logging
13
+ // ---------------------------------------------------------------------------
14
+
15
+ const LOG_FILE = path.join(require('./config').AIMETER_DIR, 'watcher.log');
16
+
17
+ function log(...args) {
18
+ const ts = new Date().toISOString();
19
+ const msg = `[${ts}] ${args.join(' ')}`;
20
+ console.log(msg);
21
+ try {
22
+ fs.mkdirSync(path.dirname(LOG_FILE), { recursive: true });
23
+ fs.appendFileSync(LOG_FILE, msg + '\n');
24
+ } catch {}
25
+ }
26
+
27
+ function logError(...args) {
28
+ log('ERROR:', ...args);
29
+ }
30
+
31
+ // ---------------------------------------------------------------------------
32
+ // Source detection from file path
33
+ // ---------------------------------------------------------------------------
34
+
35
+ // Cache detected sources per file to avoid re-reading headers
36
+ const _sourceCache = new Map();
37
+
38
+ // Track cumulative token counts per file for Codex CLI (which reports cumulative, not delta)
39
+ const _codexCumulative = {};
40
+
41
+ function detectSource(filePath) {
42
+ if (_sourceCache.has(filePath)) return _sourceCache.get(filePath);
43
+
44
+ const normalized = filePath.replace(/\\/g, '/');
45
+ if (normalized.includes('local-agent-mode-sessions')) {
46
+ _sourceCache.set(filePath, 'desktop_app');
47
+ return 'desktop_app';
48
+ }
49
+
50
+ // For subagent files (e.g. .../ceeb9217.../subagents/agent-xxx.jsonl),
51
+ // inherit the parent session's source. Parent JSONL is at the grandparent dir level.
52
+ if (normalized.includes('/subagents/')) {
53
+ // Extract session UUID dir: .../projects/PROJECT/SESSION_UUID/subagents/...
54
+ const match = normalized.match(/(.+)\/([^/]+)\/subagents\//);
55
+ if (match) {
56
+ const projectDir = match[1]; // .../projects/PROJECT
57
+ const sessionUuid = match[2]; // SESSION_UUID
58
+ const parentFile = path.join(projectDir.replace(/\//g, path.sep), sessionUuid + '.jsonl');
59
+ try {
60
+ if (fs.existsSync(parentFile)) {
61
+ const parentSource = detectSource(parentFile);
62
+ _sourceCache.set(filePath, parentSource);
63
+ return parentSource;
64
+ }
65
+ } catch {}
66
+ }
67
+ }
68
+
69
+ if (normalized.includes('.copilot/') || normalized.includes('/copilot/')) {
70
+ _sourceCache.set(filePath, 'copilot_cli');
71
+ return 'copilot_cli';
72
+ }
73
+ if (normalized.includes('.gemini/') || normalized.includes('/gemini/')) {
74
+ _sourceCache.set(filePath, 'gemini_cli');
75
+ return 'gemini_cli';
76
+ }
77
+ if (normalized.includes('.codex/') || normalized.includes('/codex/')) {
78
+ _sourceCache.set(filePath, 'codex_cli');
79
+ return 'codex_cli';
80
+ }
81
+ if (normalized.includes('/Cursor/') || normalized.includes('/cursor/') || normalized.includes('.cursor/')) {
82
+ _sourceCache.set(filePath, 'cursor');
83
+ return 'cursor';
84
+ }
85
+
86
+ // Read first 10KB of the file to find entrypoint or IDE markers
87
+ let source = 'cli'; // default
88
+ try {
89
+ const fd = fs.openSync(filePath, 'r');
90
+ const buf = Buffer.alloc(Math.min(10240, fs.fstatSync(fd).size));
91
+ fs.readSync(fd, buf, 0, buf.length, 0);
92
+ fs.closeSync(fd);
93
+ const header = buf.toString('utf8');
94
+
95
+ if (header.includes('"entrypoint":"claude-desktop"') || header.includes('"entrypoint": "claude-desktop"')) {
96
+ source = 'desktop_app';
97
+ } else if (header.includes('ide_opened_file') || header.includes('claude-vscode') || header.includes('"entrypoint":"vscode"') || header.includes('"entrypoint": "vscode"')) {
98
+ source = 'vscode';
99
+ }
100
+ // else remains 'cli'
101
+ } catch {}
102
+
103
+ _sourceCache.set(filePath, source);
104
+ return source;
105
+ }
106
+
107
+ // ---------------------------------------------------------------------------
108
+ // JSONL parsing — extract usage from new bytes in a transcript file
109
+ // ---------------------------------------------------------------------------
110
+
111
+ function extractNewUsage(filePath) {
112
+ let stat;
113
+ try { stat = fs.statSync(filePath); } catch { return []; }
114
+
115
+ const currentSize = stat.size;
116
+ const lastOffset = getOffset(filePath);
117
+
118
+ if (currentSize <= lastOffset) return [];
119
+
120
+ // Read only the new bytes
121
+ const fd = fs.openSync(filePath, 'r');
122
+ const buf = Buffer.alloc(currentSize - lastOffset);
123
+ fs.readSync(fd, buf, 0, buf.length, lastOffset);
124
+ fs.closeSync(fd);
125
+
126
+ const text = buf.toString('utf8');
127
+ const lines = text.split('\n');
128
+
129
+ // If we're reading from mid-file (offset > 0), the first line may be partial
130
+ if (lastOffset > 0 && lines.length > 0) lines.shift();
131
+
132
+ const usageEvents = [];
133
+ let lineOffset = lastOffset;
134
+ let pendingThinkingChars = 0; // Track thinking chars from streaming progress messages
135
+
136
+ for (const line of lines) {
137
+ const trimmed = line.trim();
138
+ lineOffset += Buffer.byteLength(line + '\n', 'utf8');
139
+ if (!trimmed) continue;
140
+
141
+ let obj;
142
+ try { obj = JSON.parse(trimmed); } catch { continue; }
143
+
144
+ // Normalize entry: support both direct assistant messages and progress-wrapped
145
+ // sub-agent messages (where haiku calls appear as type="progress" with the
146
+ // real message at obj.data.message.message).
147
+ let msg = null;
148
+ let msgId = null; // truthy only for progress entries (used for dedup)
149
+
150
+ if (obj.type === 'assistant' && obj.message && obj.message.usage) {
151
+ msg = obj.message;
152
+ } else if (
153
+ obj.type === 'progress' &&
154
+ obj.data && obj.data.message && obj.data.message.message &&
155
+ obj.data.message.message.usage
156
+ ) {
157
+ msg = obj.data.message.message;
158
+ msgId = msg.id; // progress fires multiple times; dedup by message ID
159
+ }
160
+
161
+ if (!msg) {
162
+ // ── Codex CLI format ──────────────────────────────────────────
163
+ // Codex events have { type: "event", payload: { type: "token_count", ... } }
164
+ // with cumulative input_tokens, output_tokens, reasoning_tokens, cached_input_tokens
165
+ if (obj.type === 'event' && obj.payload?.type === 'token_count') {
166
+ const p = obj.payload;
167
+ const codexModel = obj.turn_context?.model || 'codex';
168
+ // Token counts are cumulative per session; we store deltas
169
+ // Use a per-file tracker for the previous cumulative values
170
+ const prevKey = filePath;
171
+ const prev = _codexCumulative[prevKey] || { input: 0, output: 0, reasoning: 0, cached: 0 };
172
+ const deltaInput = (p.input_tokens || 0) - prev.input;
173
+ const deltaOutput = (p.output_tokens || 0) - prev.output;
174
+ const deltaReasoning = (p.reasoning_tokens || 0) - prev.reasoning;
175
+
176
+ _codexCumulative[prevKey] = {
177
+ input: p.input_tokens || 0,
178
+ output: p.output_tokens || 0,
179
+ reasoning: p.reasoning_tokens || 0,
180
+ cached: p.cached_input_tokens || 0,
181
+ };
182
+
183
+ // Skip if no new tokens (duplicate or first read)
184
+ if (deltaInput <= 0 && deltaOutput <= 0) continue;
185
+
186
+ const hashKey = `${filePath}:${lineOffset}:codex:${p.input_tokens}:${p.output_tokens}`;
187
+ const hash = crypto.createHash('md5').update(hashKey).digest('hex');
188
+ if (isDuplicate(hash)) continue;
189
+
190
+ usageEvents.push({
191
+ provider: 'openai',
192
+ model: codexModel,
193
+ source: 'codex_cli',
194
+ inputTokens: deltaInput,
195
+ outputTokens: deltaOutput,
196
+ thinkingTokens: deltaReasoning,
197
+ cacheReadTokens: 0,
198
+ cacheWriteTokens: 0,
199
+ });
200
+ continue;
201
+ }
202
+
203
+ // ── Copilot CLI format ────────────────────────────────────────
204
+ // Copilot events: { type: "token_usage", input_tokens, output_tokens, reasoning_tokens, model }
205
+ if (obj.type === 'token_usage' && (obj.input_tokens !== undefined || obj.output_tokens !== undefined)) {
206
+ const copilotModel = obj.model || 'copilot';
207
+ const hashKey = `${filePath}:${lineOffset}:copilot:${obj.input_tokens || 0}:${obj.output_tokens || 0}`;
208
+ const hash = crypto.createHash('md5').update(hashKey).digest('hex');
209
+ if (isDuplicate(hash)) continue;
210
+
211
+ usageEvents.push({
212
+ provider: 'github',
213
+ model: copilotModel,
214
+ source: 'copilot_cli',
215
+ inputTokens: obj.input_tokens || 0,
216
+ outputTokens: obj.output_tokens || 0,
217
+ thinkingTokens: obj.reasoning_tokens || 0,
218
+ cacheReadTokens: 0,
219
+ cacheWriteTokens: 0,
220
+ });
221
+ continue;
222
+ }
223
+
224
+ // ── Gemini CLI format ─────────────────────────────────────────
225
+ // Gemini events may contain usageMetadata: { promptTokenCount, candidatesTokenCount, totalTokenCount }
226
+ if (obj.usageMetadata && (obj.usageMetadata.promptTokenCount || obj.usageMetadata.candidatesTokenCount)) {
227
+ const um = obj.usageMetadata;
228
+ const geminiModel = obj.modelVersion || obj.model || 'gemini';
229
+ const hashKey = `${filePath}:${lineOffset}:gemini:${um.promptTokenCount || 0}:${um.candidatesTokenCount || 0}`;
230
+ const hash = crypto.createHash('md5').update(hashKey).digest('hex');
231
+ if (isDuplicate(hash)) continue;
232
+
233
+ usageEvents.push({
234
+ provider: 'google',
235
+ model: geminiModel,
236
+ source: 'gemini_cli',
237
+ inputTokens: um.promptTokenCount || 0,
238
+ outputTokens: um.candidatesTokenCount || 0,
239
+ thinkingTokens: um.thoughtsTokenCount || 0,
240
+ cacheReadTokens: um.cachedContentTokenCount || 0,
241
+ cacheWriteTokens: 0,
242
+ });
243
+ continue;
244
+ }
245
+
246
+ continue;
247
+ }
248
+
249
+ // Skip synthetic/internal messages
250
+ if (msg.model === '<synthetic>') continue;
251
+
252
+ const model = msg.model || '';
253
+
254
+ // Check content blocks for thinking tokens and completion status
255
+ const contentBlocks = msg.content || [];
256
+ const hasTextContent = contentBlocks.some(b => b.type === 'text' || b.type === 'tool_use');
257
+ for (const block of contentBlocks) {
258
+ if (block.type === 'thinking' && block.thinking) {
259
+ pendingThinkingChars = Math.max(pendingThinkingChars, block.thinking.length);
260
+ }
261
+ }
262
+
263
+ // Skip streaming in-progress messages:
264
+ // Null stop_reason with ONLY thinking content = streaming reasoning still in progress.
265
+ // Null stop_reason WITH text/tool_use content = complete response (e.g., haiku sub-agent
266
+ // calls that never receive a stop_reason in the JSONL but are finished).
267
+ if (!msg.stop_reason && !hasTextContent) continue;
268
+
269
+ const u = msg.usage;
270
+
271
+ // Estimate thinking tokens: ~4 chars per token (conservative estimate)
272
+ // The API doesn't separate thinking_tokens in the JSONL usage field
273
+ const estimatedThinkingTokens = pendingThinkingChars > 0
274
+ ? Math.ceil(pendingThinkingChars / 4)
275
+ : 0;
276
+ pendingThinkingChars = 0; // Reset for next turn
277
+
278
+ // Build dedup hash.
279
+ // For entries with a message ID (progress or assistant), use ONLY the message
280
+ // ID so the same call is counted once even if it appears in both the parent
281
+ // session file (as progress) and the subagent file (as assistant).
282
+ // For entries without an ID, fall back to file+offset.
283
+ const effectiveId = msgId || msg.id;
284
+ const hashKey = effectiveId
285
+ ? `msgid:${effectiveId}`
286
+ : `${filePath}:${lineOffset}:${model}:${u.input_tokens || 0}:${u.output_tokens || 0}`;
287
+ const hash = crypto.createHash('md5').update(hashKey).digest('hex');
288
+
289
+ if (isDuplicate(hash)) continue;
290
+
291
+ usageEvents.push({
292
+ provider: 'anthropic',
293
+ model,
294
+ source: detectSource(filePath),
295
+ inputTokens: u.input_tokens || 0,
296
+ outputTokens: u.output_tokens || 0,
297
+ thinkingTokens: estimatedThinkingTokens,
298
+ cacheReadTokens: u.cache_read_input_tokens || 0,
299
+ cacheWriteTokens: u.cache_creation_input_tokens || 0,
300
+ });
301
+ }
302
+
303
+ // Update offset to current file size
304
+ setOffset(filePath, currentSize);
305
+
306
+ return usageEvents;
307
+ }
308
+
309
+ // ---------------------------------------------------------------------------
310
+ // Cursor IDE — extract usage from SQLite state.vscdb
311
+ // ---------------------------------------------------------------------------
312
+
313
+ // Track last-seen composerData keys to detect new conversations
314
+ const _cursorSeenKeys = new Set();
315
+ let _cursorSqliteAvailable = null; // null = unchecked, true/false after first check
316
+
317
+ // Locate sqlite3 binary — may be in PATH or in the WinGet install location
318
+ let _sqlite3Path = null;
319
+
320
+ function findSqlite3() {
321
+ if (_cursorSqliteAvailable !== null) return _cursorSqliteAvailable;
322
+
323
+ // Try bare command first
324
+ try {
325
+ execSync('sqlite3 --version', { stdio: 'pipe', timeout: 5000 });
326
+ _sqlite3Path = 'sqlite3';
327
+ _cursorSqliteAvailable = true;
328
+ return true;
329
+ } catch {}
330
+
331
+ // Windows: check WinGet install location
332
+ if (process.platform === 'win32') {
333
+ const wingetDir = path.join(process.env.LOCALAPPDATA || '', 'Microsoft', 'WinGet', 'Packages');
334
+ try {
335
+ const entries = fs.readdirSync(wingetDir);
336
+ for (const entry of entries) {
337
+ if (entry.startsWith('SQLite.SQLite')) {
338
+ const candidate = path.join(wingetDir, entry, 'sqlite3.exe');
339
+ if (fs.existsSync(candidate)) {
340
+ _sqlite3Path = candidate;
341
+ _cursorSqliteAvailable = true;
342
+ return true;
343
+ }
344
+ }
345
+ }
346
+ } catch {}
347
+ }
348
+
349
+ _cursorSqliteAvailable = false;
350
+ log('sqlite3 CLI not found — Cursor tracking disabled. Install sqlite3 to enable.');
351
+ return false;
352
+ }
353
+
354
+ function runSqlite(dbPath, query) {
355
+ // Write query to temp file to avoid shell escaping nightmares on Windows
356
+ const tmpFile = path.join(require('os').tmpdir(), `aimeter_sql_${Date.now()}.sql`);
357
+ try {
358
+ fs.writeFileSync(tmpFile, query + ';\n');
359
+ return execSync(`"${_sqlite3Path}" "${dbPath}" < "${tmpFile}"`, {
360
+ timeout: 10000,
361
+ maxBuffer: 50 * 1024 * 1024,
362
+ encoding: 'utf8',
363
+ stdio: ['pipe', 'pipe', 'pipe'],
364
+ shell: true,
365
+ });
366
+ } finally {
367
+ try { fs.unlinkSync(tmpFile); } catch {}
368
+ }
369
+ }
370
+
371
+ function extractCursorUsage(dbPath) {
372
+ if (!findSqlite3()) return [];
373
+ if (!fs.existsSync(dbPath)) return [];
374
+
375
+ // Check if the DB file has been modified since our last check
376
+ let stat;
377
+ try { stat = fs.statSync(dbPath); } catch { return []; }
378
+ const currentMtime = stat.mtimeMs;
379
+ const lastMtime = getOffset(dbPath + ':mtime');
380
+ if (currentMtime <= lastMtime) return [];
381
+
382
+ const usageEvents = [];
383
+
384
+ try {
385
+ // Cursor v11 schema: usageData is always empty, tokenCount is always 0.
386
+ // Strategy: query composerData for model + timestamp, then count AI bubbles
387
+ // and measure their text length to estimate tokens.
388
+ //
389
+ // Query: join composerData (for model) with bubble count + total text length
390
+ // We use a simpler approach: get conversations, then get their bubble stats.
391
+
392
+ // Step 1: Get conversations with model info
393
+ const composerRaw = runSqlite(dbPath,
394
+ "SELECT key, json_extract(value, '$.composerId'), json_extract(value, '$.modelConfig.modelName'), json_extract(value, '$.createdAt') FROM cursorDiskKV WHERE key LIKE 'composerData:%'"
395
+ );
396
+
397
+ if (!composerRaw || !composerRaw.trim()) {
398
+ setOffset(dbPath + ':mtime', currentMtime);
399
+ return [];
400
+ }
401
+
402
+ const conversations = [];
403
+ for (const line of composerRaw.split('\n')) {
404
+ if (!line.trim()) continue;
405
+ const parts = line.split('|');
406
+ if (parts.length < 4) continue;
407
+ const [key, composerId, modelName, createdAt] = parts;
408
+ if (_cursorSeenKeys.has(key)) continue;
409
+ conversations.push({ key, composerId, modelName: modelName || 'default', createdAt: parseInt(createdAt) || 0 });
410
+ }
411
+
412
+ if (conversations.length === 0) {
413
+ setOffset(dbPath + ':mtime', currentMtime);
414
+ return [];
415
+ }
416
+
417
+ // Step 2: For new conversations, count AI response bubbles and measure text
418
+ for (const conv of conversations) {
419
+ try {
420
+ // Count AI responses (type=2) and sum their text lengths
421
+ const bubbleRaw = runSqlite(dbPath,
422
+ `SELECT count(*), sum(length(json_extract(value, '$.text'))) FROM cursorDiskKV WHERE key LIKE 'bubbleId:${conv.composerId}:%' AND json_extract(value, '$.type') = 2`
423
+ );
424
+
425
+ if (!bubbleRaw || !bubbleRaw.trim()) {
426
+ _cursorSeenKeys.add(conv.key);
427
+ continue;
428
+ }
429
+
430
+ const [countStr, textLenStr] = bubbleRaw.trim().split('|');
431
+ const responseCount = parseInt(countStr) || 0;
432
+ const totalTextLen = parseInt(textLenStr) || 0;
433
+
434
+ if (responseCount === 0) {
435
+ _cursorSeenKeys.add(conv.key);
436
+ continue;
437
+ }
438
+
439
+ // Also count user messages for input estimation
440
+ const userRaw = runSqlite(dbPath,
441
+ `SELECT sum(length(json_extract(value, '$.text'))) FROM cursorDiskKV WHERE key LIKE 'bubbleId:${conv.composerId}:%' AND json_extract(value, '$.type') = 1`
442
+ );
443
+ const userTextLen = parseInt((userRaw || '').trim()) || 0;
444
+
445
+ // Estimate tokens: ~4 chars per token
446
+ const estimatedInputTokens = Math.ceil(userTextLen / 4);
447
+ const estimatedOutputTokens = Math.ceil(totalTextLen / 4);
448
+
449
+ if (estimatedInputTokens === 0 && estimatedOutputTokens === 0) {
450
+ _cursorSeenKeys.add(conv.key);
451
+ continue;
452
+ }
453
+
454
+ const hashKey = `cursor:${conv.composerId}:${responseCount}:${totalTextLen}`;
455
+ const hash = crypto.createHash('md5').update(hashKey).digest('hex');
456
+ if (isDuplicate(hash)) {
457
+ _cursorSeenKeys.add(conv.key);
458
+ continue;
459
+ }
460
+
461
+ // Determine provider from model name
462
+ let provider = 'openai';
463
+ let model = conv.modelName;
464
+ if (model === 'default') model = 'cursor-default';
465
+ if (model.includes('claude') || model.includes('anthropic')) {
466
+ provider = 'anthropic';
467
+ } else if (model.includes('gemini')) {
468
+ provider = 'google';
469
+ }
470
+
471
+ usageEvents.push({
472
+ provider,
473
+ model,
474
+ source: 'cursor',
475
+ inputTokens: estimatedInputTokens,
476
+ outputTokens: estimatedOutputTokens,
477
+ thinkingTokens: 0,
478
+ cacheReadTokens: 0,
479
+ cacheWriteTokens: 0,
480
+ });
481
+ } catch {}
482
+
483
+ _cursorSeenKeys.add(conv.key);
484
+ }
485
+ } catch (err) {
486
+ if (!err.message.includes('database is locked')) {
487
+ logError('Cursor SQLite read error:', err.message);
488
+ }
489
+ }
490
+
491
+ setOffset(dbPath + ':mtime', currentMtime);
492
+ return usageEvents;
493
+ }
494
+
495
+ // ---------------------------------------------------------------------------
496
+ // Report usage events to backend
497
+ // ---------------------------------------------------------------------------
498
+
499
+ async function sleep(ms) {
500
+ return new Promise(r => setTimeout(r, ms));
501
+ }
502
+
503
+ async function reportEvents(events) {
504
+ const apiKey = getApiKey();
505
+ if (!apiKey) {
506
+ logError('No API key configured. Run: aimeter setup');
507
+ return;
508
+ }
509
+
510
+ for (const evt of events) {
511
+ let attempt = 0;
512
+ while (attempt < 4) {
513
+ const result = await postUsage(apiKey, evt);
514
+ if (result.ok) {
515
+ log(`Reported: ${evt.source} ${evt.model} in=${evt.inputTokens} out=${evt.outputTokens} cache_r=${evt.cacheReadTokens}`);
516
+ break;
517
+ } else if (result.status === 429) {
518
+ attempt++;
519
+ const wait = attempt * 15_000; // 15s, 30s, 45s
520
+ logError(`Rate limited (429). Retry ${attempt}/3 in ${wait / 1000}s...`);
521
+ await sleep(wait);
522
+ } else {
523
+ logError(`Failed to report: HTTP ${result.status} ${result.error || ''}`);
524
+ break;
525
+ }
526
+ }
527
+ }
528
+ }
529
+
530
+ // ---------------------------------------------------------------------------
531
+ // File watcher
532
+ // ---------------------------------------------------------------------------
533
+
534
+ /**
535
+ * Process a single file: extract new usage and report it.
536
+ * Called only from the poll loop — single-threaded, no races.
537
+ */
538
+ async function processFile(filePath) {
539
+ try {
540
+ const events = extractNewUsage(filePath);
541
+ if (events.length > 0) {
542
+ await reportEvents(events);
543
+ saveState();
544
+ }
545
+ } catch (err) {
546
+ logError(`Processing ${filePath}:`, err.message);
547
+ }
548
+ }
549
+
550
+ /**
551
+ * Recursively find all .jsonl files under a directory.
552
+ */
553
+ function findJsonlFiles(dir) {
554
+ const results = [];
555
+ let entries;
556
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { return results; }
557
+
558
+ for (const entry of entries) {
559
+ const full = path.join(dir, entry.name);
560
+ if (entry.isDirectory()) {
561
+ results.push(...findJsonlFiles(full));
562
+ } else if (entry.name.endsWith('.jsonl')) {
563
+ results.push(full);
564
+ }
565
+ }
566
+ return results;
567
+ }
568
+
569
+ /**
570
+ * Find Cursor state.vscdb files in watch paths.
571
+ */
572
+ function findCursorDbs(dirs) {
573
+ const results = [];
574
+ for (const dir of dirs) {
575
+ const vscdb = path.join(dir, 'state.vscdb');
576
+ if (fs.existsSync(vscdb)) {
577
+ results.push(vscdb);
578
+ }
579
+ }
580
+ return results;
581
+ }
582
+
583
+ /**
584
+ * Start watching all configured paths.
585
+ * Returns a cleanup function.
586
+ */
587
+ function startWatching() {
588
+ const watchPaths = getWatchPaths();
589
+
590
+ if (watchPaths.length === 0) {
591
+ logError('No AI tool directories found. Is Claude Code, Cursor, Codex, or Gemini CLI installed?');
592
+ process.exit(1);
593
+ }
594
+
595
+ log('AIMeter Watcher starting...');
596
+ log('Watching:', watchPaths.join(', '));
597
+
598
+ const apiKey = getApiKey();
599
+ if (!apiKey) {
600
+ log('WARNING: No API key found. Usage will not be reported.');
601
+ log('Run: aimeter setup');
602
+ } else {
603
+ log('API key:', apiKey.slice(0, 8) + '...' + apiKey.slice(-4));
604
+ }
605
+
606
+ // Initial scan: mark existing files as "already read" so we only report
607
+ // NEW usage going forward. Without this, first run floods the backend.
608
+ const { load: loadState } = require('./state');
609
+ const state = loadState();
610
+ const isFirstRun = Object.keys(state.fileOffsets || {}).length === 0;
611
+
612
+ let filesMarked = 0;
613
+ for (const watchPath of watchPaths) {
614
+ const files = findJsonlFiles(watchPath);
615
+ for (const file of files) {
616
+ if (isFirstRun) {
617
+ // First run: skip to end of all files
618
+ try {
619
+ const size = fs.statSync(file).size;
620
+ setOffset(file, size);
621
+ filesMarked++;
622
+ } catch {}
623
+ } else {
624
+ // Subsequent runs: process new data since last offset
625
+ const events = extractNewUsage(file);
626
+ if (events.length > 0) {
627
+ reportEvents(events);
628
+ filesMarked += events.length;
629
+ }
630
+ }
631
+ }
632
+ }
633
+ if (isFirstRun) {
634
+ log(`First run: marked ${filesMarked} existing files as read. Only new usage will be reported.`);
635
+ } else if (filesMarked > 0) {
636
+ log(`Catch-up: processed ${filesMarked} new events since last run`);
637
+ }
638
+ saveState();
639
+
640
+ // Poll every 5 seconds — simple, reliable, no race conditions.
641
+ // fs.watch is unreliable on Windows for deeply nested dirs and fires duplicates.
642
+ const POLL_INTERVAL = 5_000;
643
+ let polling = false;
644
+
645
+ // Find Cursor DBs once at startup
646
+ const cursorDbs = findCursorDbs(watchPaths);
647
+ if (cursorDbs.length > 0) {
648
+ log('Cursor databases found:', cursorDbs.join(', '));
649
+ // Mark existing Cursor data as seen on first run
650
+ if (isFirstRun) {
651
+ for (const dbPath of cursorDbs) {
652
+ try {
653
+ const mtime = fs.statSync(dbPath).mtimeMs;
654
+ setOffset(dbPath + ':mtime', mtime);
655
+ // Pre-populate seen keys so we don't report historical data
656
+ extractCursorUsage(dbPath); // populates _cursorSeenKeys but we discard results
657
+ } catch {}
658
+ }
659
+ log('Cursor: marked existing conversations as read');
660
+ }
661
+ }
662
+
663
+ const pollInterval = setInterval(async () => {
664
+ if (polling) return; // skip if previous poll still running
665
+ polling = true;
666
+ try {
667
+ // Poll JSONL files (Claude, Codex, Gemini, etc.)
668
+ for (const watchPath of watchPaths) {
669
+ const files = findJsonlFiles(watchPath);
670
+ for (const file of files) {
671
+ try {
672
+ const currentSize = fs.statSync(file).size;
673
+ const lastOffset = getOffset(file);
674
+ if (currentSize > lastOffset) {
675
+ await processFile(file);
676
+ }
677
+ } catch {}
678
+ }
679
+ }
680
+
681
+ // Poll Cursor SQLite databases
682
+ for (const dbPath of cursorDbs) {
683
+ try {
684
+ const events = extractCursorUsage(dbPath);
685
+ if (events.length > 0) {
686
+ await reportEvents(events);
687
+ saveState();
688
+ }
689
+ } catch (err) {
690
+ logError('Cursor poll error:', err.message);
691
+ }
692
+ }
693
+ } finally {
694
+ polling = false;
695
+ }
696
+ }, POLL_INTERVAL);
697
+ log(`Polling every ${POLL_INTERVAL / 1000}s`);
698
+
699
+ // Periodic state save
700
+ const saveInterval = setInterval(() => saveState(), 30_000);
701
+
702
+ // Return cleanup
703
+ return () => {
704
+ clearInterval(saveInterval);
705
+ clearInterval(pollInterval);
706
+ saveState();
707
+ log('Watcher stopped.');
708
+ };
709
+ }
710
+
711
+ module.exports = { startWatching };