@ryandemelo/token-monitor 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +208 -0
- package/dist/src/adapters/antigravity.js +197 -0
- package/dist/src/adapters/claude-code.js +121 -0
- package/dist/src/adapters/codex.js +119 -0
- package/dist/src/adapters/copilot.js +147 -0
- package/dist/src/adapters/cursor.js +217 -0
- package/dist/src/adapters/gemini-cli.js +91 -0
- package/dist/src/adapters/index.js +20 -0
- package/dist/src/analyze.js +216 -0
- package/dist/src/classify.js +53 -0
- package/dist/src/cli.js +252 -0
- package/dist/src/deploy.js +161 -0
- package/dist/src/followthrough.js +111 -0
- package/dist/src/html.js +114 -0
- package/dist/src/metrics.js +89 -0
- package/dist/src/personas.js +94 -0
- package/dist/src/pricing.js +18 -0
- package/dist/src/protowire.js +102 -0
- package/dist/src/report.js +166 -0
- package/dist/src/sign.js +95 -0
- package/dist/src/store.js +78 -0
- package/dist/src/team.js +105 -0
- package/dist/src/types.js +8 -0
- package/package.json +50 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ryan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# token-monitor
|
|
2
|
+
|
|
3
|
+
[](https://github.com/ryandemelo/token-monitor/actions/workflows/ci.yml)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
|
|
6
|
+
Measure how effectively your team spends AI coding-agent tokens — locally, with zero setup.
|
|
7
|
+
|
|
8
|
+
Most token dashboards tell you *how much* you spent. token-monitor tells you *what you spent it on* — separating thinking and defining from actual coding, testing, and shipping — and what to change. It parses the session logs that Claude Code, Gemini CLI, Codex, Cursor, Antigravity, and Copilot Chat already write to your machine. No API keys, no server, no telemetry.
|
|
9
|
+
|
|
10
|
+
```
|
|
11
|
+
Where the tokens go (activity share of input+output)
|
|
12
|
+
|
|
13
|
+
thinking ████████░░░░░░░░░░░░░░░░ 31.5% 33.5M 18087
|
|
14
|
+
exploration ████░░░░░░░░░░░░░░░░░░░░ 18.0% 19.1M 19067
|
|
15
|
+
coding █████░░░░░░░░░░░░░░░░░░░ 21.6% 22.9M 7351
|
|
16
|
+
testing ░░░░░░░░░░░░░░░░░░░░░░░░ 1.2% 1.3M 1407
|
|
17
|
+
shipping █░░░░░░░░░░░░░░░░░░░░░░░ 2.7% 2.8M 2064
|
|
18
|
+
|
|
19
|
+
rework ratio 17.1% · think:code 2.30
|
|
20
|
+
|
|
21
|
+
Project Tokens Cost Cache Rework Persona
|
|
22
|
+
──────────── ────── ──────── ───── ────── ───────────
|
|
23
|
+
checkout-api 12.4M $2104 97% 13% 📐 Architect
|
|
24
|
+
etl-pipeline 5.0M $730 96% 60% 🚒 Firefighter
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick start
|
|
28
|
+
|
|
29
|
+
Requires Node.js ≥ 24 (uses the built-in `node:sqlite` — zero runtime dependencies).
|
|
30
|
+
|
|
31
|
+
```sh
|
|
32
|
+
npx @ryandemelo/token-monitor collect # scan local agent logs
|
|
33
|
+
npx @ryandemelo/token-monitor report # activity breakdown, personas, recommendations
|
|
34
|
+
npx @ryandemelo/token-monitor html # self-contained dashboard -> report.html
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Persistent install: `npm install -g @ryandemelo/token-monitor`, then `token-monitor <command>`. For development: clone, `npm install && npm test`.
|
|
38
|
+
|
|
39
|
+
### Or let your coding agent install it
|
|
40
|
+
|
|
41
|
+
Paste this into Claude Code, Gemini CLI, or any coding agent:
|
|
42
|
+
|
|
43
|
+
> Install token-monitor from https://github.com/ryandemelo/token-monitor (instructions in its AGENTS.md), run `collect` and `report`, and walk me through what my token usage says.
|
|
44
|
+
|
|
45
|
+
The repo ships [`AGENTS.md`](AGENTS.md) and [`llms.txt`](llms.txt) so agents can install and operate it without guesswork.
|
|
46
|
+
|
|
47
|
+
## What it measures
|
|
48
|
+
|
|
49
|
+
| Metric | Why it matters |
|
|
50
|
+
|---|---|
|
|
51
|
+
| **Activity breakdown** | Each turn is classified by its tool calls: *thinking/defining* (plan mode, reasoning-only turns), *exploration* (read/search), *coding* (edits), *testing* (test runners), *shipping* (commit/push/PR), *conversation*. |
|
|
52
|
+
| **Cache hit ratio** | Cache reads cost ~10% of fresh input — the single biggest cost lever. Low ratios point at session and prompt-structure problems. |
|
|
53
|
+
| **Rework ratio** | Share of tokens spent on code/test turns *after* the first failed turn in a session. High rework usually means skipped planning. Distinct from `analyze`'s **fix iterations**, which counts testing→coding transitions — sessions that barely test can have high rework but zero visible fix loops. User-declined permission prompts are *not* counted as failures. |
|
|
54
|
+
| **Think:code ratio** | Planning+exploration tokens per coding token. Too low correlates with high rework. |
|
|
55
|
+
| **Model mix** | Premium-model tokens on turns a cheaper tier would handle. |
|
|
56
|
+
| **Estimated cost** | API-equivalent USD from a built-in price table (`src/pricing.ts`). Non-Anthropic prices are placeholders marked `~` — edit to match your contract. |
|
|
57
|
+
|
|
58
|
+
## Personas
|
|
59
|
+
|
|
60
|
+
Aggregate metrics are assigned a behavioral archetype, each with tailored recommendations:
|
|
61
|
+
|
|
62
|
+
| Persona | Signature |
|
|
63
|
+
|---|---|
|
|
64
|
+
| 📐 **Architect** | Plans up front, low rework downstream |
|
|
65
|
+
| 🔪 **Surgeon** | High cache reuse, targeted exploration, minimal waste |
|
|
66
|
+
| 🧭 **Explorer** | Most tokens go to reading/searching before changes land |
|
|
67
|
+
| 🏃 **Sprinter** | Straight to code, minimal planning, rework eats the savings |
|
|
68
|
+
| 🚒 **Firefighter** | Heavy test-fail-fix loops |
|
|
69
|
+
| ⚖️ **Balanced** | No dominant pattern |
|
|
70
|
+
|
|
71
|
+
Personas are computed per-project and overall, so one expensive workflow can't hide in the average.
|
|
72
|
+
|
|
73
|
+
## Supported agents
|
|
74
|
+
|
|
75
|
+
| Agent | Source | Status |
|
|
76
|
+
|---|---|---|
|
|
77
|
+
| **Claude Code** | `~/.claude/projects/**/*.jsonl` | ✅ Verified — per-turn tokens, cache split, model, tools, git branch |
|
|
78
|
+
| **Gemini CLI** | `~/.gemini/tmp/*/chats/*.json` | ✅ Verified — per-turn tokens incl. thoughts, tool calls |
|
|
79
|
+
| **Codex CLI** | `~/.codex/sessions/**/rollout-*.jsonl` | ⚠️ Experimental — diffs cumulative `token_count` events; verify against Codex's own usage screens |
|
|
80
|
+
| **Cursor** | `Cursor/User/globalStorage/state.vscdb` (SQLite) | ✅ Verified — per-turn tokens on completed turns, tool calls, agent/chat sessions. Cursor doesn't persist cache tokens or the resolved backend model (Auto mode reports as `cursor-auto`) |
|
|
81
|
+
| **Antigravity CLI** | `~/.gemini/antigravity-cli/conversations/*.db` (SQLite + protobuf) | ✅ Verified — per-call prompt/cached/output tokens, per-row model, tool steps, workspace + branch. Vendor-internal format: fails soft if the schema changes |
|
|
82
|
+
| **Copilot Chat** (VS Code) | `Code/User/workspaceStorage/*/chatSessions/*` | ⚠️ Experimental — Copilot doesn't record token usage locally, so counts are **estimated** from text length (~4 chars/token) and models are suffixed `(est)`. Turn counts, timestamps, tools, and errors are real |
|
|
83
|
+
|
|
84
|
+
Adapters skip gracefully when a tool isn't installed. The Cursor adapter reads only composer/bubble keys — never the auth entries that live in the same database.
|
|
85
|
+
|
|
86
|
+
## Team usage
|
|
87
|
+
|
|
88
|
+
### Remote rollout (lead → team)
|
|
89
|
+
|
|
90
|
+
The lead hosts one config file anywhere (gist, internal wiki, S3) and sends one line — pasteable by the dev, an MDM/onboarding script, or their coding agent:
|
|
91
|
+
|
|
92
|
+
```sh
|
|
93
|
+
npx @ryandemelo/token-monitor init --from https://example.com/team-config.json
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
```jsonc
|
|
97
|
+
// team-config.json
|
|
98
|
+
{
|
|
99
|
+
"teamName": "acme-eng",
|
|
100
|
+
"push": { "type": "http", "url": "https://reports.example.com/token-monitor" },
|
|
101
|
+
// or: "push": { "type": "path", "dir": "/Volumes/shared/token-monitor" }
|
|
102
|
+
"scheduleHours": 24,
|
|
103
|
+
"windowDays": 30
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
`init` saves the config, generates the signing keypair, runs the first collection, installs the recurring collect+push job (launchd on macOS, cron on Linux), and prints the dev's fingerprint for the lead's `keys.json`. From then on signed exports arrive on schedule; the lead runs `merge <files> --verify --keys keys.json`. `token-monitor schedule --remove` uninstalls.
|
|
108
|
+
|
|
109
|
+
### Manual flow
|
|
110
|
+
|
|
111
|
+
Each developer exports locally; the JSON contains aggregate metrics only (no prompts, no code, no file paths beyond project basenames), so it's safe to share:
|
|
112
|
+
|
|
113
|
+
```sh
|
|
114
|
+
# each developer (exports/ is gitignored — keep real metrics out of repos)
|
|
115
|
+
mkdir -p exports
|
|
116
|
+
token-monitor collect && token-monitor report --json > exports/$(whoami).json
|
|
117
|
+
|
|
118
|
+
# team lead
|
|
119
|
+
cat > team.yaml <<'EOF'
|
|
120
|
+
alice: frontend
|
|
121
|
+
bob: backend
|
|
122
|
+
carol: data
|
|
123
|
+
EOF
|
|
124
|
+
token-monitor merge exports/*.json --team team.yaml
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
The team report shows per-discipline rollups: tokens, cost, cache hit, rework, think:code ratio, dominant activity, and persona — so you can see *which discipline* needs which intervention, not just a total bill.
|
|
128
|
+
|
|
129
|
+
## Deep analysis & LLM-powered recommendations
|
|
130
|
+
|
|
131
|
+
`token-monitor analyze` goes a level deeper than the report — which sessions and habits burn the tokens:
|
|
132
|
+
|
|
133
|
+
- **Most expensive sessions** — turns, fix loops, avg context per turn, duration, dominant activity
|
|
134
|
+
- **Fix-loop sessions** — testing→coding churn
|
|
135
|
+
- **Context-heavy sessions** — average tokens fed per turn (context-bloat proxy)
|
|
136
|
+
- **Tool error rates** — tools that keep failing
|
|
137
|
+
|
|
138
|
+
Add `--llm` and the aggregates go to a coding agent you already have installed (`claude`, `gemini`, or `codex` — auto-detected, override with `--agent`), which returns prioritized interventions with the evidence, the workflow change, and the metric to watch:
|
|
139
|
+
|
|
140
|
+
```sh
|
|
141
|
+
token-monitor analyze --llm
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
No API key management: it reuses your existing agent CLI and its subscription. The payload is the same aggregates-only data as `report --json` (token counts, ratios, tool names, project basenames — never prompts or code). It does leave your machine via that agent's provider, so skip `--llm` if even project names are sensitive.
|
|
145
|
+
|
|
146
|
+
## Follow-through
|
|
147
|
+
|
|
148
|
+
Recommendations are tracked, not just printed. The first time one fires, its target metric is recorded as a baseline; every later report re-measures and shows the delta:
|
|
149
|
+
|
|
150
|
+
```
|
|
151
|
+
Recommendation Metric Baseline Now Since Status
|
|
152
|
+
high-rework reworkRatio 24% 11% 2026-06-01 ✅ resolved
|
|
153
|
+
premium-model-overuse premiumShare 99% 97% 2026-06-12 — tracking
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Resolved findings re-open automatically if the metric regresses.
|
|
157
|
+
|
|
158
|
+
## CLI
|
|
159
|
+
|
|
160
|
+
```
|
|
161
|
+
token-monitor collect [--source claude-code|gemini-cli|codex|cursor|antigravity|copilot] [--db <path>]
|
|
162
|
+
token-monitor report [--days 30] [--project <name>] [--source <name>] [--json] [--db <path>]
|
|
163
|
+
token-monitor analyze [--days 30] [--llm] [--agent claude|gemini|codex] [--json] [--db <path>]
|
|
164
|
+
token-monitor html [--out report.html] [--days 30] [--db <path>]
|
|
165
|
+
token-monitor merge <export.json>... [--team team.yaml] [--json]
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Contributing
|
|
169
|
+
|
|
170
|
+
The most valuable contribution: an adapter for another agent CLI (Aider, OpenCode, Cursor…). See [CONTRIBUTING.md](CONTRIBUTING.md) for the adapter guide, fixtures, and conventions. `npm test` runs the suite; CI covers Node 24/25 on Linux + macOS.
|
|
171
|
+
|
|
172
|
+
## Roadmap
|
|
173
|
+
|
|
174
|
+
- [x] Team rollups: `merge` command + `team.yaml` discipline mapping
|
|
175
|
+
- [x] Self-contained HTML dashboard
|
|
176
|
+
- [x] Follow-through tracking: baseline on first firing, delta on every later report
|
|
177
|
+
- [x] IDE coverage: Cursor, Antigravity, Copilot Chat adapters
|
|
178
|
+
- [ ] Adapters: Aider, OpenCode, Windsurf (needs a contributor with Windsurf — #12)
|
|
179
|
+
- [ ] VS Code-family extension: status-bar cost + dashboard webview (#13)
|
|
180
|
+
- [ ] Org-level cross-check via provider usage APIs
|
|
181
|
+
- [x] npm publish: `npx @ryandemelo/token-monitor`
|
|
182
|
+
|
|
183
|
+
## Integrity & threat model
|
|
184
|
+
|
|
185
|
+
Exports are **tamper-evident**. Each machine generates an Ed25519 keypair on first export (`~/.token-monitor/signing-key.pem`, mode 0600); `report --json` signs a canonical serialization of the payload. The team lead verifies on merge:
|
|
186
|
+
|
|
187
|
+
```sh
|
|
188
|
+
# dev, once: print fingerprint for enrollment
|
|
189
|
+
token-monitor fingerprint # e.g. 3f9a1c0b2d4e5f67
|
|
190
|
+
|
|
191
|
+
# lead: pin who may sign for whom (keys.json), then verify on every merge
|
|
192
|
+
echo '{"alice": "3f9a1c0b2d4e5f67"}' > keys.json
|
|
193
|
+
token-monitor merge exports/*.json --verify --keys keys.json
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
`--verify` rejects any export modified after signing or unsigned; `--keys` additionally rejects exports signed by a key not enrolled for that username (impersonation).
|
|
197
|
+
|
|
198
|
+
**What this does not cover — read before relying on it:** a developer controls their own machine, so someone determined to game metrics could edit the *source logs* before collection. Signing detects tampering after export, not dishonest inputs. The planned mitigation is reconciling team totals against the provider's billing/usage APIs (roadmap) — gamed numbers won't reconcile. Treat these metrics as a coaching instrument, not a performance-review weapon; the latter invites exactly the gaming this can't stop.
|
|
199
|
+
|
|
200
|
+
## Privacy
|
|
201
|
+
|
|
202
|
+
Everything stays on your machine. token-monitor reads log files locally, stores aggregate numbers in a local SQLite file, and never makes a network request. Prompt and code content is never stored — only token counts, tool names, timestamps, and project/branch names.
|
|
203
|
+
|
|
204
|
+
The one opt-in exception: `analyze --llm` sends those aggregates to your own agent CLI's provider for analysis. Everything else is fully offline.
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
MIT
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import { readdirSync, existsSync, copyFileSync, mkdtempSync, rmSync } from 'node:fs';
|
|
2
|
+
import { join, basename } from 'node:path';
|
|
3
|
+
import { homedir, tmpdir } from 'node:os';
|
|
4
|
+
import { DatabaseSync } from 'node:sqlite';
|
|
5
|
+
import { classify } from '../classify.js';
|
|
6
|
+
import { decodeMessage, intField, strField, msgField, msgFields } from './../protowire.js';
|
|
7
|
+
/**
|
|
8
|
+
* Antigravity CLI stores one SQLite db per conversation under
|
|
9
|
+
* ~/.gemini/antigravity-cli/conversations/<id>.db. The interesting table is
|
|
10
|
+
* gen_metadata: one protobuf blob per LLM call, with token usage, model id,
|
|
11
|
+
* and timestamps. The steps table records turn/tool events; a gen row's
|
|
12
|
+
* "last step index at request time" lets us attribute the tool steps each
|
|
13
|
+
* generation produced. Field numbers were mapped empirically (issue #10) —
|
|
14
|
+
* the schema is vendor-internal, so everything here fails soft.
|
|
15
|
+
*
|
|
16
|
+
* Privacy: only usage/model/timing fields are decoded. The blobs also carry
|
|
17
|
+
* full prompt/conversation snapshots (f1.1/f1.2/f1.8/f1.16) — those are
|
|
18
|
+
* never descended into.
|
|
19
|
+
*/
|
|
20
|
+
const ROOT = join(homedir(), '.gemini', 'antigravity-cli');
|
|
21
|
+
// steps.step_type -> normalized tool name (observed enum values)
|
|
22
|
+
const STEP_TOOLS = {
|
|
23
|
+
8: 'view_file',
|
|
24
|
+
9: 'list_directory',
|
|
25
|
+
21: 'run_command',
|
|
26
|
+
23: 'update_plan',
|
|
27
|
+
};
|
|
28
|
+
// Non-tool step types: 14 = user input, 15 = planner response, 98 = history.
|
|
29
|
+
const NON_TOOL_STEPS = new Set([14, 15, 98]);
|
|
30
|
+
function tsToIso(msg) {
|
|
31
|
+
const sec = intField(msg, 1);
|
|
32
|
+
if (!sec)
|
|
33
|
+
return undefined;
|
|
34
|
+
return new Date(sec * 1000 + Math.floor(intField(msg, 2) / 1e6)).toISOString();
|
|
35
|
+
}
|
|
36
|
+
function collectConversation(dbPath, scratch, events) {
|
|
37
|
+
const convId = basename(dbPath, '.db');
|
|
38
|
+
const copy = join(scratch, `${convId}.db`);
|
|
39
|
+
copyFileSync(dbPath, copy);
|
|
40
|
+
for (const suffix of ['-wal', '-shm']) {
|
|
41
|
+
if (existsSync(dbPath + suffix))
|
|
42
|
+
copyFileSync(dbPath + suffix, copy + suffix);
|
|
43
|
+
}
|
|
44
|
+
const db = new DatabaseSync(copy);
|
|
45
|
+
try {
|
|
46
|
+
// Workspace / branch attribution.
|
|
47
|
+
let project = 'unknown';
|
|
48
|
+
let gitBranch;
|
|
49
|
+
try {
|
|
50
|
+
const row = db.prepare('SELECT data FROM trajectory_metadata_blob LIMIT 1').get();
|
|
51
|
+
if (row?.data) {
|
|
52
|
+
const ctx = msgField(decodeMessage(row.data), 1);
|
|
53
|
+
const workspaceUri = strField(ctx, 1);
|
|
54
|
+
if (workspaceUri)
|
|
55
|
+
project = basename(decodeURIComponent(workspaceUri.replace(/^file:\/\//, '')));
|
|
56
|
+
gitBranch = strField(ctx, 4);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
catch {
|
|
60
|
+
/* attribution is optional */
|
|
61
|
+
}
|
|
62
|
+
// Step events, for tool attribution per generation.
|
|
63
|
+
const steps = [];
|
|
64
|
+
try {
|
|
65
|
+
const rows = db
|
|
66
|
+
.prepare('SELECT idx, step_type, status, step_payload, error_details FROM steps ORDER BY idx')
|
|
67
|
+
.all();
|
|
68
|
+
for (const r of rows) {
|
|
69
|
+
const info = { idx: r.idx, type: r.step_type, status: r.status };
|
|
70
|
+
if (r.step_type === 21 && r.step_payload) {
|
|
71
|
+
try {
|
|
72
|
+
const payload = msgField(decodeMessage(r.step_payload), 28);
|
|
73
|
+
info.command = strField(payload, 25) ?? strField(payload, 23);
|
|
74
|
+
}
|
|
75
|
+
catch { /* ignore */ }
|
|
76
|
+
}
|
|
77
|
+
if (r.status === 7 && r.error_details) {
|
|
78
|
+
try {
|
|
79
|
+
info.errorShort = strField(decodeMessage(r.error_details), 2);
|
|
80
|
+
}
|
|
81
|
+
catch { /* ignore */ }
|
|
82
|
+
}
|
|
83
|
+
steps.push(info);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
catch {
|
|
87
|
+
/* steps are optional — events still carry tokens */
|
|
88
|
+
}
|
|
89
|
+
const genRows = db.prepare('SELECT idx, data FROM gen_metadata ORDER BY idx').all();
|
|
90
|
+
const gens = [];
|
|
91
|
+
for (const r of genRows) {
|
|
92
|
+
if (!r.data)
|
|
93
|
+
continue;
|
|
94
|
+
try {
|
|
95
|
+
const gen = msgField(decodeMessage(r.data), 1);
|
|
96
|
+
if (!gen)
|
|
97
|
+
continue;
|
|
98
|
+
// f20 is a repeated {1: key, 2: value} annotation list; last_step_index
|
|
99
|
+
// is authoritative there (f6 is off by one from it).
|
|
100
|
+
let lastStep = intField(gen, 6) + 1;
|
|
101
|
+
for (const kv of msgFields(gen, 20)) {
|
|
102
|
+
if (strField(kv, 1) === 'last_step_index') {
|
|
103
|
+
const v = Number(strField(kv, 2));
|
|
104
|
+
if (Number.isFinite(v))
|
|
105
|
+
lastStep = v;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
gens.push({ idx: r.idx, lastStep, gen });
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
continue; // unknown layout — fail soft per row
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
gens.forEach(({ idx, lastStep, gen }, i) => {
|
|
115
|
+
const usage = msgField(gen, 4);
|
|
116
|
+
if (!usage)
|
|
117
|
+
return;
|
|
118
|
+
// Steps produced by this generation: after its request snapshot, up to
|
|
119
|
+
// the next generation's snapshot (or end of trajectory for the last).
|
|
120
|
+
const upper = i + 1 < gens.length ? gens[i + 1].lastStep : Number.MAX_SAFE_INTEGER;
|
|
121
|
+
const mySteps = steps.filter((s) => s.idx > lastStep && s.idx <= upper);
|
|
122
|
+
const tools = [];
|
|
123
|
+
const commands = [];
|
|
124
|
+
let isError = false;
|
|
125
|
+
for (const s of mySteps) {
|
|
126
|
+
if (NON_TOOL_STEPS.has(s.type))
|
|
127
|
+
continue;
|
|
128
|
+
tools.push(STEP_TOOLS[s.type] ?? `step_${s.type}`);
|
|
129
|
+
if (s.command)
|
|
130
|
+
commands.push(s.command);
|
|
131
|
+
// status 7 covers both failures and user cancellations — a cancel is a choice, not an error.
|
|
132
|
+
if (s.status === 7 && !/cancel/i.test(s.errorShort ?? ''))
|
|
133
|
+
isError = true;
|
|
134
|
+
}
|
|
135
|
+
const timing = msgField(gen, 9);
|
|
136
|
+
const ev = {
|
|
137
|
+
source: 'antigravity',
|
|
138
|
+
eventKey: `${convId}:${idx}`,
|
|
139
|
+
sessionId: convId,
|
|
140
|
+
project,
|
|
141
|
+
timestamp: tsToIso(msgField(timing, 4)) ?? new Date(0).toISOString(),
|
|
142
|
+
model: strField(gen, 19) ?? strField(gen, 21) ?? 'antigravity',
|
|
143
|
+
inputTokens: intField(usage, 2),
|
|
144
|
+
outputTokens: intField(usage, 3),
|
|
145
|
+
cacheReadTokens: intField(usage, 5),
|
|
146
|
+
cacheCreationTokens: 0,
|
|
147
|
+
thinkingTokens: 0,
|
|
148
|
+
tools,
|
|
149
|
+
commands,
|
|
150
|
+
hasThinking: false,
|
|
151
|
+
isError,
|
|
152
|
+
gitBranch,
|
|
153
|
+
};
|
|
154
|
+
ev.activity = classify(ev);
|
|
155
|
+
events.push(ev);
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
finally {
|
|
159
|
+
db.close();
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
export function collectAntigravity(root = ROOT) {
|
|
163
|
+
const events = [];
|
|
164
|
+
const convDir = join(root, 'conversations');
|
|
165
|
+
if (!existsSync(convDir)) {
|
|
166
|
+
return { events, result: { source: 'antigravity', filesScanned: 0, eventsFound: 0, eventsInserted: 0, note: `${convDir} not found — Antigravity not detected` } };
|
|
167
|
+
}
|
|
168
|
+
const scratch = mkdtempSync(join(tmpdir(), 'tm-antigravity-'));
|
|
169
|
+
let filesScanned = 0;
|
|
170
|
+
let failed = 0;
|
|
171
|
+
try {
|
|
172
|
+
for (const file of readdirSync(convDir)) {
|
|
173
|
+
if (!file.endsWith('.db'))
|
|
174
|
+
continue;
|
|
175
|
+
filesScanned++;
|
|
176
|
+
try {
|
|
177
|
+
collectConversation(join(convDir, file), scratch, events);
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
failed++; // vendor-internal format — skip conversations we can't read
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
finally {
|
|
185
|
+
rmSync(scratch, { recursive: true, force: true });
|
|
186
|
+
}
|
|
187
|
+
return {
|
|
188
|
+
events,
|
|
189
|
+
result: {
|
|
190
|
+
source: 'antigravity',
|
|
191
|
+
filesScanned,
|
|
192
|
+
eventsFound: events.length,
|
|
193
|
+
eventsInserted: 0,
|
|
194
|
+
note: failed > 0 ? `${failed} conversation db(s) skipped (unreadable or unknown format)` : undefined,
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { readdirSync, readFileSync, existsSync } from 'node:fs';
|
|
2
|
+
import { join, basename } from 'node:path';
|
|
3
|
+
import { homedir } from 'node:os';
|
|
4
|
+
import { classify } from '../classify.js';
|
|
5
|
+
const ROOT = join(homedir(), '.claude', 'projects');
|
|
6
|
+
/**
|
|
7
|
+
* User declinations arrive as is_error tool_results but are choices, not
|
|
8
|
+
* failures — counting them poisons tool-error and rework metrics. Matches
|
|
9
|
+
* the harness's standard rejection/interruption phrasings.
|
|
10
|
+
*/
|
|
11
|
+
const DECLINED_RE = /user doesn't want to proceed|tool use was rejected|user rejected|request interrupted by user|user declined/i;
|
|
12
|
+
export function isDeclination(content) {
|
|
13
|
+
const text = typeof content === 'string'
|
|
14
|
+
? content
|
|
15
|
+
: Array.isArray(content)
|
|
16
|
+
? content.map((b) => (typeof b?.text === 'string' ? b.text : '')).join(' ')
|
|
17
|
+
: '';
|
|
18
|
+
return DECLINED_RE.test(text);
|
|
19
|
+
}
|
|
20
|
+
/** Parse Claude Code session transcripts: ~/.claude/projects/<dir>/<session>.jsonl */
|
|
21
|
+
export function collectClaudeCode(root = ROOT) {
|
|
22
|
+
const events = [];
|
|
23
|
+
let filesScanned = 0;
|
|
24
|
+
if (!existsSync(root)) {
|
|
25
|
+
return { events, result: { source: 'claude-code', filesScanned: 0, eventsFound: 0, eventsInserted: 0, note: `${root} not found` } };
|
|
26
|
+
}
|
|
27
|
+
for (const dir of readdirSync(root)) {
|
|
28
|
+
const dirPath = join(root, dir);
|
|
29
|
+
let files;
|
|
30
|
+
try {
|
|
31
|
+
files = readdirSync(dirPath).filter((f) => f.endsWith('.jsonl'));
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
for (const file of files) {
|
|
37
|
+
filesScanned++;
|
|
38
|
+
let text;
|
|
39
|
+
try {
|
|
40
|
+
text = readFileSync(join(dirPath, file), 'utf8');
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
// tool_use id -> event, so a failed tool_result can flag its turn
|
|
46
|
+
const byToolUseId = new Map();
|
|
47
|
+
for (const line of text.split('\n')) {
|
|
48
|
+
if (!line.trim())
|
|
49
|
+
continue;
|
|
50
|
+
let d;
|
|
51
|
+
try {
|
|
52
|
+
d = JSON.parse(line);
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (d.type === 'user' && Array.isArray(d.message?.content)) {
|
|
58
|
+
for (const block of d.message.content) {
|
|
59
|
+
if (block.type === 'tool_result' &&
|
|
60
|
+
block.is_error &&
|
|
61
|
+
block.tool_use_id &&
|
|
62
|
+
!isDeclination(block.content)) {
|
|
63
|
+
const ev = byToolUseId.get(block.tool_use_id);
|
|
64
|
+
if (ev)
|
|
65
|
+
ev.isError = true;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
if (d.type !== 'assistant' || !d.message?.usage || !d.uuid)
|
|
71
|
+
continue;
|
|
72
|
+
const u = d.message.usage;
|
|
73
|
+
const total = (u.input_tokens ?? 0) + (u.output_tokens ?? 0);
|
|
74
|
+
if (total === 0 && !(u.cache_read_input_tokens || u.cache_creation_input_tokens))
|
|
75
|
+
continue;
|
|
76
|
+
const tools = [];
|
|
77
|
+
const commands = [];
|
|
78
|
+
let hasThinking = false;
|
|
79
|
+
const toolUseIds = [];
|
|
80
|
+
for (const block of d.message.content ?? []) {
|
|
81
|
+
if (block.type === 'tool_use' && block.name) {
|
|
82
|
+
tools.push(block.name);
|
|
83
|
+
if (block.id)
|
|
84
|
+
toolUseIds.push(block.id);
|
|
85
|
+
if (typeof block.input?.command === 'string')
|
|
86
|
+
commands.push(block.input.command);
|
|
87
|
+
}
|
|
88
|
+
else if (block.type === 'thinking') {
|
|
89
|
+
hasThinking = true;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
const ev = {
|
|
93
|
+
source: 'claude-code',
|
|
94
|
+
eventKey: d.uuid,
|
|
95
|
+
sessionId: d.sessionId ?? file.replace('.jsonl', ''),
|
|
96
|
+
project: d.cwd ? basename(d.cwd) : dir,
|
|
97
|
+
timestamp: d.timestamp ?? new Date(0).toISOString(),
|
|
98
|
+
model: d.message.model ?? 'unknown',
|
|
99
|
+
inputTokens: u.input_tokens ?? 0,
|
|
100
|
+
outputTokens: u.output_tokens ?? 0,
|
|
101
|
+
cacheReadTokens: u.cache_read_input_tokens ?? 0,
|
|
102
|
+
cacheCreationTokens: u.cache_creation_input_tokens ?? 0,
|
|
103
|
+
thinkingTokens: 0,
|
|
104
|
+
tools,
|
|
105
|
+
commands,
|
|
106
|
+
hasThinking,
|
|
107
|
+
isError: false,
|
|
108
|
+
gitBranch: d.gitBranch,
|
|
109
|
+
};
|
|
110
|
+
ev.activity = classify(ev);
|
|
111
|
+
events.push(ev);
|
|
112
|
+
for (const id of toolUseIds)
|
|
113
|
+
byToolUseId.set(id, ev);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return {
|
|
118
|
+
events,
|
|
119
|
+
result: { source: 'claude-code', filesScanned, eventsFound: events.length, eventsInserted: 0 },
|
|
120
|
+
};
|
|
121
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import { readdirSync, readFileSync, existsSync, statSync } from 'node:fs';
|
|
2
|
+
import { join, basename } from 'node:path';
|
|
3
|
+
import { homedir } from 'node:os';
|
|
4
|
+
import { classify } from '../classify.js';
|
|
5
|
+
const ROOT = join(homedir(), '.codex', 'sessions');
|
|
6
|
+
function* walk(dir) {
|
|
7
|
+
for (const entry of readdirSync(dir)) {
|
|
8
|
+
const p = join(dir, entry);
|
|
9
|
+
if (statSync(p).isDirectory())
|
|
10
|
+
yield* walk(p);
|
|
11
|
+
else if (entry.startsWith('rollout-') && entry.endsWith('.jsonl'))
|
|
12
|
+
yield p;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
export function collectCodex(root = ROOT) {
|
|
16
|
+
const events = [];
|
|
17
|
+
let filesScanned = 0;
|
|
18
|
+
if (!existsSync(root)) {
|
|
19
|
+
return { events, result: { source: 'codex', filesScanned: 0, eventsFound: 0, eventsInserted: 0, note: `${root} not found — Codex CLI not detected` } };
|
|
20
|
+
}
|
|
21
|
+
for (const file of walk(root)) {
|
|
22
|
+
filesScanned++;
|
|
23
|
+
let text;
|
|
24
|
+
try {
|
|
25
|
+
text = readFileSync(file, 'utf8');
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
let sessionId = basename(file, '.jsonl');
|
|
31
|
+
let project = 'unknown';
|
|
32
|
+
let model = 'codex';
|
|
33
|
+
let prev = null;
|
|
34
|
+
let pendingTools = [];
|
|
35
|
+
let pendingCommands = [];
|
|
36
|
+
let tick = 0;
|
|
37
|
+
for (const line of text.split('\n')) {
|
|
38
|
+
if (!line.trim())
|
|
39
|
+
continue;
|
|
40
|
+
let d;
|
|
41
|
+
try {
|
|
42
|
+
d = JSON.parse(line);
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
const p = d.payload;
|
|
48
|
+
if (!p)
|
|
49
|
+
continue;
|
|
50
|
+
if (d.type === 'session_meta' || p.type === 'session_meta') {
|
|
51
|
+
if (p.id)
|
|
52
|
+
sessionId = p.id;
|
|
53
|
+
if (p.cwd)
|
|
54
|
+
project = basename(p.cwd);
|
|
55
|
+
}
|
|
56
|
+
else if (p.type === 'turn_context' && p.model) {
|
|
57
|
+
model = p.model;
|
|
58
|
+
}
|
|
59
|
+
else if (d.type === 'response_item' && p.type === 'function_call' && p.name) {
|
|
60
|
+
pendingTools.push(p.name);
|
|
61
|
+
if (/shell|exec/.test(p.name) && typeof p.arguments === 'string') {
|
|
62
|
+
try {
|
|
63
|
+
const args = JSON.parse(p.arguments);
|
|
64
|
+
const cmd = Array.isArray(args.command) ? args.command.join(' ') : args.command;
|
|
65
|
+
if (typeof cmd === 'string')
|
|
66
|
+
pendingCommands.push(cmd);
|
|
67
|
+
}
|
|
68
|
+
catch { /* ignore */ }
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
else if (d.type === 'event_msg' && p.type === 'token_count' && p.info) {
|
|
72
|
+
const total = p.info.total_token_usage ?? p.info.last_token_usage;
|
|
73
|
+
if (!total)
|
|
74
|
+
continue;
|
|
75
|
+
const delta = prev
|
|
76
|
+
? {
|
|
77
|
+
input_tokens: Math.max(0, (total.input_tokens ?? 0) - (prev.input_tokens ?? 0)),
|
|
78
|
+
cached_input_tokens: Math.max(0, (total.cached_input_tokens ?? 0) - (prev.cached_input_tokens ?? 0)),
|
|
79
|
+
output_tokens: Math.max(0, (total.output_tokens ?? 0) - (prev.output_tokens ?? 0)),
|
|
80
|
+
reasoning_output_tokens: Math.max(0, (total.reasoning_output_tokens ?? 0) - (prev.reasoning_output_tokens ?? 0)),
|
|
81
|
+
}
|
|
82
|
+
: total;
|
|
83
|
+
prev = total;
|
|
84
|
+
tick++;
|
|
85
|
+
const ev = {
|
|
86
|
+
source: 'codex',
|
|
87
|
+
eventKey: `${sessionId}:${tick}`,
|
|
88
|
+
sessionId,
|
|
89
|
+
project,
|
|
90
|
+
timestamp: d.timestamp ?? new Date(0).toISOString(),
|
|
91
|
+
model,
|
|
92
|
+
inputTokens: (delta.input_tokens ?? 0) - (delta.cached_input_tokens ?? 0),
|
|
93
|
+
outputTokens: delta.output_tokens ?? 0,
|
|
94
|
+
cacheReadTokens: delta.cached_input_tokens ?? 0,
|
|
95
|
+
cacheCreationTokens: 0,
|
|
96
|
+
thinkingTokens: delta.reasoning_output_tokens ?? 0,
|
|
97
|
+
tools: pendingTools,
|
|
98
|
+
commands: pendingCommands,
|
|
99
|
+
hasThinking: (delta.reasoning_output_tokens ?? 0) > 0,
|
|
100
|
+
isError: false,
|
|
101
|
+
};
|
|
102
|
+
ev.activity = classify(ev);
|
|
103
|
+
events.push(ev);
|
|
104
|
+
pendingTools = [];
|
|
105
|
+
pendingCommands = [];
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return {
|
|
110
|
+
events,
|
|
111
|
+
result: {
|
|
112
|
+
source: 'codex',
|
|
113
|
+
filesScanned,
|
|
114
|
+
eventsFound: events.length,
|
|
115
|
+
eventsInserted: 0,
|
|
116
|
+
note: filesScanned > 0 ? 'codex adapter is experimental — verify totals against `codex` usage screens' : undefined,
|
|
117
|
+
},
|
|
118
|
+
};
|
|
119
|
+
}
|