claude-roi 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +119 -0
- package/README.md +8 -1
- package/package.json +1 -1
- package/skills-lock.json +10 -0
- package/src/cache.js +1 -1
- package/src/claude-parser.js +56 -0
- package/src/dashboard.html +352 -2
- package/src/index.js +24 -1
- package/src/metrics.js +152 -8
- package/src/server.js +5 -0
package/CLAUDE.md
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# CLAUDE.md — Codelens-AI
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
|
|
5
|
+
**Codelens AI** (`claude-roi` on npm) is a CLI tool that measures ROI from AI coding agents by correlating Claude Code token usage with git commit output. It parses Claude Code session files, analyzes git history, and serves an interactive dashboard at `http://localhost:3457`.
|
|
6
|
+
|
|
7
|
+
**Version:** 0.6.0
|
|
8
|
+
**License:** MIT
|
|
9
|
+
**npm package:** `claude-roi`
|
|
10
|
+
|
|
11
|
+
## Tech Stack
|
|
12
|
+
|
|
13
|
+
- **Runtime:** Node.js >= 18, ES modules (`"type": "module"`)
|
|
14
|
+
- **Backend:** Express.js 5.0.0
|
|
15
|
+
- **CLI:** Commander.js 13.0.0
|
|
16
|
+
- **Frontend:** Single-file HTML (`src/dashboard.html`) with vanilla JS + Chart.js 4.4.7
|
|
17
|
+
- **Testing:** Playwright (E2E)
|
|
18
|
+
- **Styling:** Inline CSS with CSS variables, glassmorphism design, dark/light theme
|
|
19
|
+
|
|
20
|
+
## Project Structure
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
src/
|
|
24
|
+
├── index.js # CLI entry point & orchestration (Commander)
|
|
25
|
+
├── claude-parser.js # Parses JSONL session files from ~/.claude/projects/
|
|
26
|
+
├── git-analyzer.js # Git log analysis, branch detection, diff stats
|
|
27
|
+
├── correlator.js # Matches sessions to commits via file overlap + time window
|
|
28
|
+
├── metrics.js # ROI calculations, grades, insights, heatmap, survival rate
|
|
29
|
+
├── server.js # Express REST API routes
|
|
30
|
+
├── cache.js # Smart caching with stale file detection
|
|
31
|
+
├── dashboard.html # Single-file SPA dashboard (3000+ lines)
|
|
32
|
+
└── agents/ # Agent integration stubs (claude/, cursor/)
|
|
33
|
+
|
|
34
|
+
tests/
|
|
35
|
+
└── dashboard.spec.js # Playwright E2E tests
|
|
36
|
+
|
|
37
|
+
.github/workflows/
|
|
38
|
+
├── ci.yml # CI: syntax check, Node 18/20/22 matrix
|
|
39
|
+
└── release.yml # npm publish on version tag push
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Data Flow
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
Claude Sessions (JSONL) → claude-parser.js → [Cache] → git-analyzer.js
|
|
46
|
+
→ correlator.js → metrics.js → server.js (REST API) → dashboard.html
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Key API Routes (server.js)
|
|
50
|
+
|
|
51
|
+
- `GET /` — dashboard HTML
|
|
52
|
+
- `GET /api/all` — full payload
|
|
53
|
+
- `GET /api/summary` — hero stats + insights
|
|
54
|
+
- `GET /api/timeline` — daily cost/output chart data
|
|
55
|
+
- `GET /api/sessions` — paginated sessions with sorting
|
|
56
|
+
- `GET /api/models` — model breakdown
|
|
57
|
+
- `GET /api/heatmap` — productivity heatmap
|
|
58
|
+
- `GET /api/tools` — tool usage breakdown
|
|
59
|
+
- `GET /api/survival` — line survival stats
|
|
60
|
+
- `GET /api/tokens` — detailed token analytics
|
|
61
|
+
- `POST /api/refresh` — force re-parse
|
|
62
|
+
|
|
63
|
+
## CLI Usage
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
npx claude-roi # defaults: 30 days, port 3457
|
|
67
|
+
npx claude-roi --days 90 # custom lookback
|
|
68
|
+
npx claude-roi --port 8080 # custom port
|
|
69
|
+
npx claude-roi --no-open # don't auto-open browser
|
|
70
|
+
npx claude-roi --json # dump raw JSON to stdout
|
|
71
|
+
npx claude-roi --project X # filter by project name
|
|
72
|
+
npx claude-roi --refresh # force full re-parse
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Development Commands
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
npm install # install dependencies
|
|
79
|
+
npm test # run Playwright E2E tests (needs fixtures)
|
|
80
|
+
node src/index.js # run locally
|
|
81
|
+
node --check src/*.js # syntax validation
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Key Design Decisions
|
|
85
|
+
|
|
86
|
+
- **Single-file dashboard** — no build step, served directly by Express
|
|
87
|
+
- **Zero-config** — auto-discovers `~/.claude/projects/`
|
|
88
|
+
- **Smart caching** — incremental parsing, only re-processes changed JSONL files (`~/.cache/agent-analytics/`)
|
|
89
|
+
- **File-first correlation** — sessions matched to commits by file overlap, 2-hour temporal buffer
|
|
90
|
+
- **Privacy-first** — all data stays local, no telemetry
|
|
91
|
+
- **Version-aware pricing** — token costs reflect Anthropic's pricing tiers per model
|
|
92
|
+
|
|
93
|
+
## Coding Conventions
|
|
94
|
+
|
|
95
|
+
- ES module imports (`import`/`export`)
|
|
96
|
+
- No build tooling or transpilation
|
|
97
|
+
- Inline styles and scripts in dashboard.html (no external CSS/JS bundles)
|
|
98
|
+
- Express 5 (path params, async error handling)
|
|
99
|
+
- Functions and variables use camelCase
|
|
100
|
+
- Constants defined at module top
|
|
101
|
+
|
|
102
|
+
## Available Skills
|
|
103
|
+
|
|
104
|
+
Use these skills when working on this project:
|
|
105
|
+
|
|
106
|
+
- **`/simplify`** — Review changed code for reuse, quality, and efficiency, then fix issues found. Use after writing or modifying code.
|
|
107
|
+
- **`/frontend-design`** — Create distinctive, production-grade frontend interfaces. Use when modifying `dashboard.html` or building new UI components.
|
|
108
|
+
- **`/claude-developer-platform`** — Build apps with the Claude API or Anthropic SDK. Use when working on agent integrations in `src/agents/`.
|
|
109
|
+
- **`/find-skills`** — Discover and install new agent skills for extended capabilities.
|
|
110
|
+
- **`/keybindings-help`** — Configure keyboard shortcuts for Claude Code.
|
|
111
|
+
|
|
112
|
+
## Important Notes
|
|
113
|
+
|
|
114
|
+
- The dashboard is a single 3000+ line HTML file — changes should maintain the inline architecture
|
|
115
|
+
- Cache is stored at `~/.cache/agent-analytics/parsed-sessions.json`
|
|
116
|
+
- Session JSONL files are at `~/.claude/projects/`
|
|
117
|
+
- Token pricing is hardcoded in `claude-parser.js` — update when Anthropic changes pricing
|
|
118
|
+
- Playwright tests require Claude session fixtures to run (run locally, not in CI)
|
|
119
|
+
- CI runs syntax checks only; E2E tests are local-only
|
package/README.md
CHANGED
|
@@ -79,6 +79,11 @@ This parses your `~/.claude/projects/` session data, analyzes your git repos, an
|
|
|
79
79
|
| **Model Comparison** | Efficiency breakdown across Opus, Sonnet, and Haiku |
|
|
80
80
|
| **Branch Awareness** | What % of AI commits landed on production |
|
|
81
81
|
| **Peak Hours** | Hour-of-day x day-of-week productivity heatmap |
|
|
82
|
+
| **Autonomy Score** | Composite A-F grade measuring how independently the agent works |
|
|
83
|
+
| **Autopilot Ratio** | Assistant messages per user prompt (higher = more autonomous) |
|
|
84
|
+
| **Self-Heal Score** | % of bash calls that are test/lint commands (self-verification) |
|
|
85
|
+
| **Toolbelt Coverage** | % of available tools used per session (workflow breadth) |
|
|
86
|
+
| **Commit Velocity** | Tool calls per commit (lower = more efficient) |
|
|
82
87
|
|
|
83
88
|
## CLI Options
|
|
84
89
|
|
|
@@ -90,6 +95,7 @@ claude-roi --no-open # don't auto-open browser
|
|
|
90
95
|
claude-roi --json # dump all metrics as JSON to stdout
|
|
91
96
|
claude-roi --project techops # filter to a specific project
|
|
92
97
|
claude-roi --refresh # force full re-parse (ignore cache)
|
|
98
|
+
claude-roi --autonomy # print autonomy score to terminal and exit
|
|
93
99
|
```
|
|
94
100
|
|
|
95
101
|
## Dashboard
|
|
@@ -102,7 +108,8 @@ The dashboard includes:
|
|
|
102
108
|
- **Model comparison** — cost breakdown by Claude model
|
|
103
109
|
- **Session length analysis** — which session sizes have the best ROI
|
|
104
110
|
- **Productivity heatmap** — GitHub-style grid showing when you're most productive
|
|
105
|
-
- **
|
|
111
|
+
- **Agent Autonomy** — autonomy score badge, autopilot ratio, self-heal score, toolbelt coverage, commit velocity, and top verification commands
|
|
112
|
+
- **Sessions table** — sortable, expandable table with per-session metrics, matched commits, and autopilot ratio
|
|
106
113
|
|
|
107
114
|
## How It Works
|
|
108
115
|
|
package/package.json
CHANGED
package/skills-lock.json
ADDED
package/src/cache.js
CHANGED
|
@@ -4,7 +4,7 @@ import os from 'node:os';
|
|
|
4
4
|
|
|
5
5
|
const CACHE_DIR = path.join(os.homedir(), '.cache', 'agent-analytics');
|
|
6
6
|
const CACHE_FILE = path.join(CACHE_DIR, 'parsed-sessions.json');
|
|
7
|
-
const CACHE_VERSION =
|
|
7
|
+
const CACHE_VERSION = 2;
|
|
8
8
|
|
|
9
9
|
export function loadCache() {
|
|
10
10
|
if (!existsSync(CACHE_FILE)) {
|
package/src/claude-parser.js
CHANGED
|
@@ -96,6 +96,43 @@ function toRelativePath(absolutePath, repoPath) {
|
|
|
96
96
|
return absolutePath.split('/').pop();
|
|
97
97
|
}
|
|
98
98
|
|
|
99
|
+
// Commands that are clearly NOT verification even if they contain matching keywords
|
|
100
|
+
const NON_VERIFICATION_PATTERNS = [
|
|
101
|
+
/^\s*node\s+-e\b/, // inline JS eval
|
|
102
|
+
/^\s*find\s+/, // file search
|
|
103
|
+
/^\s*cat\s+/, // file display
|
|
104
|
+
/^\s*echo\s+/, // printing
|
|
105
|
+
/^\s*ls\b/, // listing
|
|
106
|
+
/^\s*rm\s+/, // file deletion
|
|
107
|
+
];
|
|
108
|
+
|
|
109
|
+
// Patterns that identify test/lint/typecheck commands (for autonomy self-heal score)
|
|
110
|
+
const VERIFICATION_PATTERNS = [
|
|
111
|
+
/\bnpm\s+(test|run\s+(test|lint|check|typecheck))\b/,
|
|
112
|
+
/\b(pnpm|yarn|bun)\s+(run\s+)?(test|lint|check|typecheck)\b/,
|
|
113
|
+
/\b(jest|vitest|mocha|ava)\b/,
|
|
114
|
+
/\b(pytest|python\s+-m\s+(pytest|unittest))\b/,
|
|
115
|
+
/\b(go\s+test|cargo\s+(test|clippy))\b/,
|
|
116
|
+
/\b(eslint|biome|prettier\b.*--check)/,
|
|
117
|
+
/\btsc(\s+--noEmit|\s+-p)\b/,
|
|
118
|
+
/\b(mypy|ruff|flake8|pylint|rubocop)\b/,
|
|
119
|
+
/\bnode\s+--check\b/,
|
|
120
|
+
/\bmake\s+(test|check|lint)\b/,
|
|
121
|
+
];
|
|
122
|
+
|
|
123
|
+
function isVerificationCommand(command) {
|
|
124
|
+
if (!command || typeof command !== 'string') return false;
|
|
125
|
+
// Strip cd/path and env var prefixes to get the core command
|
|
126
|
+
const core = command
|
|
127
|
+
.replace(/^(?:cd\s+\S+\s*&&\s*)+/g, '')
|
|
128
|
+
.replace(/^(?:cd\s+\S+\s*;\s*)+/g, '')
|
|
129
|
+
.replace(/^(?:\w+=\S+\s+)+/g, '')
|
|
130
|
+
.trim();
|
|
131
|
+
// Exclude commands that are clearly not verification
|
|
132
|
+
if (NON_VERIFICATION_PATTERNS.some(p => p.test(core))) return false;
|
|
133
|
+
return VERIFICATION_PATTERNS.some(p => p.test(core));
|
|
134
|
+
}
|
|
135
|
+
|
|
99
136
|
function extractToolUse(session, msg) {
|
|
100
137
|
const content = msg.content;
|
|
101
138
|
if (!Array.isArray(content)) return;
|
|
@@ -108,6 +145,17 @@ function extractToolUse(session, msg) {
|
|
|
108
145
|
// Count tool calls
|
|
109
146
|
session.toolCalls[toolName] = (session.toolCalls[toolName] || 0) + 1;
|
|
110
147
|
|
|
148
|
+
// Track Bash commands for autonomy self-heal scoring
|
|
149
|
+
if (toolName === 'Bash') {
|
|
150
|
+
const command = block.input?.command || block.input?.content;
|
|
151
|
+
if (command) {
|
|
152
|
+
session.totalBashCalls++;
|
|
153
|
+
const isVerif = isVerificationCommand(command);
|
|
154
|
+
if (isVerif) session.verificationBashCalls++;
|
|
155
|
+
session.bashCommands.push({ command: command.slice(0, 200), isVerification: isVerif });
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
111
159
|
// Track files written/read
|
|
112
160
|
const filePath = block.input?.file_path;
|
|
113
161
|
if (!filePath) continue;
|
|
@@ -145,6 +193,9 @@ function createEmptySession(sessionId) {
|
|
|
145
193
|
filesRead: [],
|
|
146
194
|
userMessageCount: 0,
|
|
147
195
|
assistantMessageCount: 0,
|
|
196
|
+
bashCommands: [],
|
|
197
|
+
totalBashCalls: 0,
|
|
198
|
+
verificationBashCalls: 0,
|
|
148
199
|
};
|
|
149
200
|
}
|
|
150
201
|
|
|
@@ -380,6 +431,11 @@ function mergeSubagentIntoSession(parent, sub) {
|
|
|
380
431
|
parent.toolCalls[tool] = (parent.toolCalls[tool] || 0) + count;
|
|
381
432
|
}
|
|
382
433
|
|
|
434
|
+
// Merge bash command tracking
|
|
435
|
+
parent.totalBashCalls += sub.totalBashCalls;
|
|
436
|
+
parent.verificationBashCalls += sub.verificationBashCalls;
|
|
437
|
+
parent.bashCommands.push(...sub.bashCommands);
|
|
438
|
+
|
|
383
439
|
// Merge files
|
|
384
440
|
for (const f of sub.filesWritten) {
|
|
385
441
|
if (!parent.filesWritten.includes(f)) parent.filesWritten.push(f);
|
package/src/dashboard.html
CHANGED
|
@@ -1517,6 +1517,254 @@
|
|
|
1517
1517
|
.share-toast.visible {
|
|
1518
1518
|
opacity: 1;
|
|
1519
1519
|
}
|
|
1520
|
+
|
|
1521
|
+
/* ── Autonomy Section ───────────────────────── */
|
|
1522
|
+
@property --auto-deg {
|
|
1523
|
+
syntax: '<angle>';
|
|
1524
|
+
initial-value: 0deg;
|
|
1525
|
+
inherits: false;
|
|
1526
|
+
}
|
|
1527
|
+
.autonomy-section { margin-bottom: 48px; }
|
|
1528
|
+
.autonomy-section h2 {
|
|
1529
|
+
font-family: var(--font-display);
|
|
1530
|
+
font-size: 1rem;
|
|
1531
|
+
font-weight: 700;
|
|
1532
|
+
text-transform: uppercase;
|
|
1533
|
+
letter-spacing: 0.1em;
|
|
1534
|
+
color: var(--text-secondary);
|
|
1535
|
+
margin-bottom: 24px;
|
|
1536
|
+
display: flex;
|
|
1537
|
+
align-items: center;
|
|
1538
|
+
gap: 8px;
|
|
1539
|
+
}
|
|
1540
|
+
.autonomy-layout {
|
|
1541
|
+
display: grid;
|
|
1542
|
+
grid-template-columns: 220px 1fr;
|
|
1543
|
+
gap: 24px;
|
|
1544
|
+
align-items: stretch;
|
|
1545
|
+
}
|
|
1546
|
+
@media (max-width: 768px) {
|
|
1547
|
+
.autonomy-layout { grid-template-columns: 1fr; }
|
|
1548
|
+
.autonomy-score-hub { justify-self: center; max-width: 240px; }
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
/* Score hub — the big badge */
|
|
1552
|
+
.autonomy-score-hub {
|
|
1553
|
+
display: flex;
|
|
1554
|
+
flex-direction: column;
|
|
1555
|
+
align-items: center;
|
|
1556
|
+
justify-content: center;
|
|
1557
|
+
gap: 12px;
|
|
1558
|
+
padding: 28px 20px;
|
|
1559
|
+
background: linear-gradient(145deg, var(--glass-bg-from), var(--glass-bg-to));
|
|
1560
|
+
backdrop-filter: blur(16px);
|
|
1561
|
+
border: 1px solid var(--glass-border);
|
|
1562
|
+
border-radius: var(--radius);
|
|
1563
|
+
position: relative;
|
|
1564
|
+
overflow: hidden;
|
|
1565
|
+
}
|
|
1566
|
+
.autonomy-score-hub::before {
|
|
1567
|
+
content: '';
|
|
1568
|
+
position: absolute;
|
|
1569
|
+
inset: 0;
|
|
1570
|
+
background: radial-gradient(circle at 50% 20%, var(--auto-glow, rgba(34, 211, 168, 0.08)), transparent 70%);
|
|
1571
|
+
pointer-events: none;
|
|
1572
|
+
}
|
|
1573
|
+
.autonomy-ring {
|
|
1574
|
+
width: 130px;
|
|
1575
|
+
height: 130px;
|
|
1576
|
+
border-radius: 50%;
|
|
1577
|
+
display: flex;
|
|
1578
|
+
align-items: center;
|
|
1579
|
+
justify-content: center;
|
|
1580
|
+
position: relative;
|
|
1581
|
+
flex-shrink: 0;
|
|
1582
|
+
}
|
|
1583
|
+
.autonomy-ring::before {
|
|
1584
|
+
content: '';
|
|
1585
|
+
position: absolute;
|
|
1586
|
+
inset: 0;
|
|
1587
|
+
border-radius: 50%;
|
|
1588
|
+
padding: 5px;
|
|
1589
|
+
background: conic-gradient(var(--auto-color) 0deg, var(--auto-color) var(--auto-deg, 0deg), var(--bg-hover) var(--auto-deg, 0deg));
|
|
1590
|
+
-webkit-mask: linear-gradient(#fff 0 0) content-box, linear-gradient(#fff 0 0);
|
|
1591
|
+
-webkit-mask-composite: xor;
|
|
1592
|
+
mask-composite: exclude;
|
|
1593
|
+
animation: autoRingReveal 1.5s cubic-bezier(0.25, 0.46, 0.45, 0.94) forwards;
|
|
1594
|
+
}
|
|
1595
|
+
@keyframes autoRingReveal {
|
|
1596
|
+
from { --auto-deg: 0deg; }
|
|
1597
|
+
}
|
|
1598
|
+
.autonomy-ring .auto-grade {
|
|
1599
|
+
font-family: var(--font-display);
|
|
1600
|
+
font-size: 3rem;
|
|
1601
|
+
font-weight: 800;
|
|
1602
|
+
line-height: 1;
|
|
1603
|
+
}
|
|
1604
|
+
.autonomy-score-label {
|
|
1605
|
+
font-family: var(--font-display);
|
|
1606
|
+
font-size: 0.75rem;
|
|
1607
|
+
color: var(--text-muted);
|
|
1608
|
+
text-transform: uppercase;
|
|
1609
|
+
letter-spacing: 0.1em;
|
|
1610
|
+
text-align: center;
|
|
1611
|
+
}
|
|
1612
|
+
.autonomy-score-num {
|
|
1613
|
+
font-family: var(--font-display);
|
|
1614
|
+
font-size: 1.1rem;
|
|
1615
|
+
font-weight: 700;
|
|
1616
|
+
color: var(--text-primary);
|
|
1617
|
+
}
|
|
1618
|
+
|
|
1619
|
+
/* Metric cards grid */
|
|
1620
|
+
.autonomy-cards {
|
|
1621
|
+
display: grid;
|
|
1622
|
+
grid-template-columns: repeat(2, 1fr);
|
|
1623
|
+
gap: 16px;
|
|
1624
|
+
}
|
|
1625
|
+
@media (max-width: 520px) {
|
|
1626
|
+
.autonomy-cards { grid-template-columns: 1fr; }
|
|
1627
|
+
}
|
|
1628
|
+
.auto-metric {
|
|
1629
|
+
background: linear-gradient(145deg, var(--glass-bg-from), var(--glass-bg-to));
|
|
1630
|
+
backdrop-filter: blur(16px);
|
|
1631
|
+
border: 1px solid var(--glass-border);
|
|
1632
|
+
border-radius: var(--radius);
|
|
1633
|
+
padding: 20px;
|
|
1634
|
+
display: flex;
|
|
1635
|
+
flex-direction: column;
|
|
1636
|
+
gap: 10px;
|
|
1637
|
+
transition: border-color 0.2s, box-shadow 0.2s;
|
|
1638
|
+
}
|
|
1639
|
+
.auto-metric:hover {
|
|
1640
|
+
border-color: var(--glass-border-hover);
|
|
1641
|
+
box-shadow: 0 4px 20px var(--shadow-card);
|
|
1642
|
+
}
|
|
1643
|
+
.auto-metric .metric-header {
|
|
1644
|
+
display: flex;
|
|
1645
|
+
justify-content: space-between;
|
|
1646
|
+
align-items: center;
|
|
1647
|
+
}
|
|
1648
|
+
.auto-metric .metric-name {
|
|
1649
|
+
font-family: var(--font-display);
|
|
1650
|
+
font-size: 0.7rem;
|
|
1651
|
+
text-transform: uppercase;
|
|
1652
|
+
letter-spacing: 0.08em;
|
|
1653
|
+
color: var(--text-muted);
|
|
1654
|
+
display: flex;
|
|
1655
|
+
align-items: center;
|
|
1656
|
+
gap: 6px;
|
|
1657
|
+
}
|
|
1658
|
+
.auto-metric .metric-value {
|
|
1659
|
+
font-family: var(--font-display);
|
|
1660
|
+
font-size: 1.6rem;
|
|
1661
|
+
font-weight: 800;
|
|
1662
|
+
line-height: 1;
|
|
1663
|
+
}
|
|
1664
|
+
.auto-metric .metric-bar {
|
|
1665
|
+
height: 6px;
|
|
1666
|
+
background: var(--overlay-medium);
|
|
1667
|
+
border-radius: 3px;
|
|
1668
|
+
overflow: hidden;
|
|
1669
|
+
}
|
|
1670
|
+
.auto-metric .metric-bar .fill {
|
|
1671
|
+
height: 100%;
|
|
1672
|
+
border-radius: 3px;
|
|
1673
|
+
transition: width 1s cubic-bezier(0.25, 0.46, 0.45, 0.94);
|
|
1674
|
+
}
|
|
1675
|
+
.auto-metric .metric-sub {
|
|
1676
|
+
font-size: 0.75rem;
|
|
1677
|
+
color: var(--text-muted);
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
/* Breakdown bar */
|
|
1681
|
+
.autonomy-breakdown {
|
|
1682
|
+
margin-top: 20px;
|
|
1683
|
+
grid-column: 1 / -1;
|
|
1684
|
+
}
|
|
1685
|
+
.autonomy-breakdown .breakdown-label {
|
|
1686
|
+
font-family: var(--font-display);
|
|
1687
|
+
font-size: 0.7rem;
|
|
1688
|
+
text-transform: uppercase;
|
|
1689
|
+
letter-spacing: 0.08em;
|
|
1690
|
+
color: var(--text-muted);
|
|
1691
|
+
margin-bottom: 8px;
|
|
1692
|
+
}
|
|
1693
|
+
.breakdown-bar {
|
|
1694
|
+
display: flex;
|
|
1695
|
+
height: 10px;
|
|
1696
|
+
border-radius: 5px;
|
|
1697
|
+
overflow: hidden;
|
|
1698
|
+
background: var(--overlay-medium);
|
|
1699
|
+
}
|
|
1700
|
+
.breakdown-bar .seg {
|
|
1701
|
+
height: 100%;
|
|
1702
|
+
transition: width 0.8s ease;
|
|
1703
|
+
}
|
|
1704
|
+
.breakdown-legend {
|
|
1705
|
+
display: flex;
|
|
1706
|
+
flex-wrap: wrap;
|
|
1707
|
+
gap: 12px;
|
|
1708
|
+
margin-top: 8px;
|
|
1709
|
+
font-size: 0.7rem;
|
|
1710
|
+
color: var(--text-secondary);
|
|
1711
|
+
}
|
|
1712
|
+
.breakdown-legend span {
|
|
1713
|
+
display: flex;
|
|
1714
|
+
align-items: center;
|
|
1715
|
+
gap: 5px;
|
|
1716
|
+
}
|
|
1717
|
+
.breakdown-legend .ldot {
|
|
1718
|
+
width: 6px;
|
|
1719
|
+
height: 6px;
|
|
1720
|
+
border-radius: 50%;
|
|
1721
|
+
display: inline-block;
|
|
1722
|
+
}
|
|
1723
|
+
|
|
1724
|
+
/* Top commands */
|
|
1725
|
+
.auto-commands {
|
|
1726
|
+
margin-top: 16px;
|
|
1727
|
+
grid-column: 1 / -1;
|
|
1728
|
+
background: linear-gradient(145deg, var(--glass-bg-from), var(--glass-bg-to));
|
|
1729
|
+
border: 1px solid var(--glass-border);
|
|
1730
|
+
border-radius: var(--radius);
|
|
1731
|
+
padding: 16px 20px;
|
|
1732
|
+
}
|
|
1733
|
+
.auto-commands .cmd-title {
|
|
1734
|
+
font-family: var(--font-display);
|
|
1735
|
+
font-size: 0.7rem;
|
|
1736
|
+
text-transform: uppercase;
|
|
1737
|
+
letter-spacing: 0.08em;
|
|
1738
|
+
color: var(--text-muted);
|
|
1739
|
+
margin-bottom: 10px;
|
|
1740
|
+
}
|
|
1741
|
+
.auto-commands .cmd-list {
|
|
1742
|
+
display: flex;
|
|
1743
|
+
flex-wrap: wrap;
|
|
1744
|
+
gap: 8px;
|
|
1745
|
+
}
|
|
1746
|
+
.auto-commands .cmd-tag {
|
|
1747
|
+
font-family: var(--font-display);
|
|
1748
|
+
font-size: 0.72rem;
|
|
1749
|
+
padding: 4px 10px;
|
|
1750
|
+
border-radius: 4px;
|
|
1751
|
+
background: var(--overlay-strong);
|
|
1752
|
+
color: var(--text-secondary);
|
|
1753
|
+
border: 1px solid var(--overlay-intense);
|
|
1754
|
+
}
|
|
1755
|
+
.auto-commands .cmd-tag .cmd-count {
|
|
1756
|
+
color: var(--accent-green);
|
|
1757
|
+
font-weight: 600;
|
|
1758
|
+
margin-left: 4px;
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1761
|
+
/* Autopilot badge in sessions table */
|
|
1762
|
+
.autopilot-badge {
|
|
1763
|
+
font-family: var(--font-display);
|
|
1764
|
+
font-size: 0.75rem;
|
|
1765
|
+
font-weight: 600;
|
|
1766
|
+
color: var(--accent-cyan);
|
|
1767
|
+
}
|
|
1520
1768
|
</style>
|
|
1521
1769
|
</head>
|
|
1522
1770
|
<body>
|
|
@@ -1803,6 +2051,7 @@ function render() {
|
|
|
1803
2051
|
<div id="heatmap-container"></div>
|
|
1804
2052
|
</div>
|
|
1805
2053
|
</div>
|
|
2054
|
+
<div class="scroll-reveal">${renderAutonomySection(d.autonomyMetrics)}</div>
|
|
1806
2055
|
<div class="scroll-reveal">${renderCacheEfficiency(t)}</div>
|
|
1807
2056
|
<div class="scroll-reveal">${renderSurvival(d.lineSurvival)}</div>
|
|
1808
2057
|
<div class="scroll-reveal">${renderSessionsTable(d.sessions)}</div>
|
|
@@ -2093,6 +2342,105 @@ function renderTokenFunFacts(facts) {
|
|
|
2093
2342
|
</div>`;
|
|
2094
2343
|
}
|
|
2095
2344
|
|
|
2345
|
+
function renderAutonomySection(am) {
|
|
2346
|
+
if (!am) return '';
|
|
2347
|
+
const g = am.overall.grade;
|
|
2348
|
+
const s = am.overall.score;
|
|
2349
|
+
const gradeColor = GRADE_VAR[g] || GRADE_VAR.F;
|
|
2350
|
+
const gradeBg = GRADE_BG_VAR[g] || GRADE_BG_VAR.F;
|
|
2351
|
+
const deg = Math.round((s / 100) * 360);
|
|
2352
|
+
|
|
2353
|
+
// Glow color map
|
|
2354
|
+
const glowMap = { A: 'rgba(34,211,168,0.08)', B: 'rgba(59,130,246,0.08)', C: 'rgba(245,158,11,0.08)', D: 'rgba(240,136,62,0.08)', F: 'rgba(239,68,68,0.08)' };
|
|
2355
|
+
|
|
2356
|
+
// Velocity bar: lower is better, cap at 100 steps
|
|
2357
|
+
const velPct = am.commitVelocity !== null ? Math.max(0, 100 - am.commitVelocity) : 0;
|
|
2358
|
+
const velLabel = am.commitVelocity !== null
|
|
2359
|
+
? (am.commitVelocity <= 20 ? 'Blazing' : am.commitVelocity <= 50 ? 'Solid' : 'Heavy')
|
|
2360
|
+
: 'N/A';
|
|
2361
|
+
|
|
2362
|
+
// Breakdown percentages (out of total weight = 100)
|
|
2363
|
+
const bd = am.breakdown;
|
|
2364
|
+
const bTotal = (bd.autopilotScore * 0.25 + bd.selfHealWeighted * 0.30 + bd.toolbeltWeighted * 0.20 + bd.velocityScore * 0.25) || 1;
|
|
2365
|
+
const bAuto = Math.round((bd.autopilotScore * 0.25 / bTotal) * 100);
|
|
2366
|
+
const bHeal = Math.round((bd.selfHealWeighted * 0.30 / bTotal) * 100);
|
|
2367
|
+
const bTool = Math.round((bd.toolbeltWeighted * 0.20 / bTotal) * 100);
|
|
2368
|
+
const bVel = 100 - bAuto - bHeal - bTool;
|
|
2369
|
+
|
|
2370
|
+
return `<div class="autonomy-section">
|
|
2371
|
+
<h2>Agent Autonomy <i class="info-tip" data-tip="How independently your AI agent works. Measures autopilot ratio (actions per prompt), self-healing (test/lint usage), tool diversity, and commit efficiency.">i</i></h2>
|
|
2372
|
+
<div class="autonomy-layout">
|
|
2373
|
+
<div class="autonomy-score-hub" style="--auto-glow: ${glowMap[g] || glowMap.F};">
|
|
2374
|
+
<div class="autonomy-ring" style="--auto-color: ${gradeColor}; --auto-deg: ${deg}deg;">
|
|
2375
|
+
<div class="auto-grade" style="color: ${gradeColor};">${g}</div>
|
|
2376
|
+
</div>
|
|
2377
|
+
<div class="autonomy-score-num">${s} / 100</div>
|
|
2378
|
+
<div class="autonomy-score-label">Autonomy Score</div>
|
|
2379
|
+
</div>
|
|
2380
|
+
<div>
|
|
2381
|
+
<div class="autonomy-cards">
|
|
2382
|
+
<div class="auto-metric">
|
|
2383
|
+
<div class="metric-header">
|
|
2384
|
+
<span class="metric-name">Autopilot Ratio <i class="info-tip" data-tip="Agent responses per user message. Higher means the agent does more per prompt. 5x = max score.">i</i></span>
|
|
2385
|
+
</div>
|
|
2386
|
+
<div class="metric-value" style="color: var(--accent-cyan);">${am.autopilotRatio}x</div>
|
|
2387
|
+
<div class="metric-bar"><div class="fill" style="width: ${Math.min(am.autopilotRatio / 5 * 100, 100)}%; background: var(--accent-cyan);"></div></div>
|
|
2388
|
+
<div class="metric-sub">${am.autopilotRatio >= 4 ? 'High autonomy' : am.autopilotRatio >= 2 ? 'Moderate' : 'Low — agent needs more prompts'}</div>
|
|
2389
|
+
</div>
|
|
2390
|
+
<div class="auto-metric">
|
|
2391
|
+
<div class="metric-header">
|
|
2392
|
+
<span class="metric-name">Self-Heal Score <i class="info-tip" data-tip="Percentage of Bash commands that are tests, lints, or type checks. Higher means the agent verifies its own work.">i</i></span>
|
|
2393
|
+
</div>
|
|
2394
|
+
<div class="metric-value" style="color: var(--accent-green);">${am.selfHealScore}%</div>
|
|
2395
|
+
<div class="metric-bar"><div class="fill" style="width: ${am.selfHealScore}%; background: var(--accent-green);"></div></div>
|
|
2396
|
+
<div class="metric-sub">${am.totalVerificationCalls} of ${am.totalBashCalls} bash calls were tests/lints</div>
|
|
2397
|
+
</div>
|
|
2398
|
+
<div class="auto-metric">
|
|
2399
|
+
<div class="metric-header">
|
|
2400
|
+
<span class="metric-name">Toolbelt Coverage <i class="info-tip" data-tip="Percentage of available tools (Read, Write, Edit, Bash, Grep, Glob, etc.) the agent uses per session. Higher diversity suggests more complete autonomous workflow.">i</i></span>
|
|
2401
|
+
</div>
|
|
2402
|
+
<div class="metric-value" style="color: var(--accent-purple);">${am.toolbeltCoverage}%</div>
|
|
2403
|
+
<div class="metric-bar"><div class="fill" style="width: ${am.toolbeltCoverage}%; background: var(--accent-purple);"></div></div>
|
|
2404
|
+
<div class="metric-sub">${am.toolbeltCoverage >= 70 ? 'Full toolbelt utilization' : am.toolbeltCoverage >= 40 ? 'Moderate diversity' : 'Narrow tool usage'}</div>
|
|
2405
|
+
</div>
|
|
2406
|
+
<div class="auto-metric">
|
|
2407
|
+
<div class="metric-header">
|
|
2408
|
+
<span class="metric-name">Commit Velocity <i class="info-tip" data-tip="Average tool calls needed per commit. Lower = more efficient agent. Under 20 is blazing fast.">i</i></span>
|
|
2409
|
+
</div>
|
|
2410
|
+
<div class="metric-value" style="color: var(--accent-blue);">${am.commitVelocity !== null ? am.commitVelocity : '—'}<span style="font-size:0.7rem;font-weight:500;color:var(--text-muted);margin-left:4px;">${am.commitVelocity !== null ? 'steps/commit' : ''}</span></div>
|
|
2411
|
+
<div class="metric-bar"><div class="fill" style="width: ${velPct}%; background: var(--accent-blue);"></div></div>
|
|
2412
|
+
<div class="metric-sub">${velLabel}</div>
|
|
2413
|
+
</div>
|
|
2414
|
+
</div>
|
|
2415
|
+
<div class="autonomy-breakdown">
|
|
2416
|
+
<div class="breakdown-label">Score Composition</div>
|
|
2417
|
+
<div class="breakdown-bar">
|
|
2418
|
+
<div class="seg" style="width: ${bAuto}%; background: var(--accent-cyan);" title="Autopilot ${bd.autopilotScore}"></div>
|
|
2419
|
+
<div class="seg" style="width: ${bHeal}%; background: var(--accent-green);" title="Self-Heal ${bd.selfHealWeighted}"></div>
|
|
2420
|
+
<div class="seg" style="width: ${bTool}%; background: var(--accent-purple);" title="Toolbelt ${bd.toolbeltWeighted}"></div>
|
|
2421
|
+
<div class="seg" style="width: ${bVel}%; background: var(--accent-blue);" title="Velocity ${bd.velocityScore}"></div>
|
|
2422
|
+
</div>
|
|
2423
|
+
<div class="breakdown-legend">
|
|
2424
|
+
<span><span class="ldot" style="background: var(--accent-cyan);"></span>Autopilot 25%</span>
|
|
2425
|
+
<span><span class="ldot" style="background: var(--accent-green);"></span>Self-Heal 30%</span>
|
|
2426
|
+
<span><span class="ldot" style="background: var(--accent-purple);"></span>Toolbelt 20%</span>
|
|
2427
|
+
<span><span class="ldot" style="background: var(--accent-blue);"></span>Velocity 25%</span>
|
|
2428
|
+
</div>
|
|
2429
|
+
</div>
|
|
2430
|
+
${am.topVerificationCommands.length > 0 ? `
|
|
2431
|
+
<div class="auto-commands">
|
|
2432
|
+
<div class="cmd-title">Top Verification Commands</div>
|
|
2433
|
+
<div class="cmd-list">
|
|
2434
|
+
${am.topVerificationCommands.slice(0, 6).map(c =>
|
|
2435
|
+
`<span class="cmd-tag">${c.command}<span class="cmd-count">×${c.count}</span></span>`
|
|
2436
|
+
).join('')}
|
|
2437
|
+
</div>
|
|
2438
|
+
</div>` : ''}
|
|
2439
|
+
</div>
|
|
2440
|
+
</div>
|
|
2441
|
+
</div>`;
|
|
2442
|
+
}
|
|
2443
|
+
|
|
2096
2444
|
function renderCacheEfficiency(t) {
|
|
2097
2445
|
const hitRate = t.cacheHitRate;
|
|
2098
2446
|
const accentColor = hitRate >= 60 ? 'var(--accent-blue)' : hitRate >= 30 ? 'var(--accent-orange)' : 'var(--accent-red)';
|
|
@@ -2142,6 +2490,7 @@ function renderSessionsTable(sessions) {
|
|
|
2142
2490
|
<th onclick="sortTable('projectName')" class="${sortCol === 'projectName' ? 'sorted' : ''}">Project${thArrow('projectName')}</th>
|
|
2143
2491
|
<th onclick="sortTable('model')" class="${sortCol === 'model' ? 'sorted' : ''}">Model${thArrow('model')} <i class="info-tip" data-tip="Primary model used in the session. Models marked (sub) are subagents spawned for background tasks like code search and exploration.">i</i></th>
|
|
2144
2492
|
<th onclick="sortTable('msgCount')" class="${sortCol === 'msgCount' ? 'sorted' : ''}">Msgs${thArrow('msgCount')} <i class="info-tip" data-tip="Total messages in the session (your messages + Claude's responses).">i</i></th>
|
|
2493
|
+
<th onclick="sortTable('autopilotRatio')" class="${sortCol === 'autopilotRatio' ? 'sorted' : ''}">Autopilot${thArrow('autopilotRatio')} <i class="info-tip" data-tip="Agent responses per user message. Higher = more autonomous. 5x is excellent.">i</i></th>
|
|
2145
2494
|
<th onclick="sortTable('cost.totalCost')" class="${sortCol === 'cost.totalCost' ? 'sorted' : ''}">Cost${thArrow('cost.totalCost')} <i class="info-tip" data-tip="Estimated cost based on token usage and Anthropic's pricing. Calculated from input, output, cache read (90% discount), and cache write (25% premium) tokens.">i</i></th>
|
|
2146
2495
|
<th onclick="sortTable('commitCount')" class="${sortCol === 'commitCount' ? 'sorted' : ''}">Commits${thArrow('commitCount')} <i class="info-tip" data-tip="Git commits that touch files Claude edited in this session. Falls back to time-window for chat-only sessions.">i</i></th>
|
|
2147
2496
|
<th onclick="sortTable('linesAdded')" class="${sortCol === 'linesAdded' ? 'sorted' : ''}">Lines${thArrow('linesAdded')} <i class="info-tip" data-tip="Lines added / lines deleted in matched commits.">i</i></th>
|
|
@@ -2168,13 +2517,14 @@ function renderSessionsTable(sessions) {
|
|
|
2168
2517
|
<td>${s.projectName || '—'}</td>
|
|
2169
2518
|
<td>${modelDisplay}</td>
|
|
2170
2519
|
<td>${s.userMessageCount + s.assistantMessageCount}</td>
|
|
2520
|
+
<td><span class="autopilot-badge">${s.autopilotRatio > 0 ? s.autopilotRatio + 'x' : '—'}</span></td>
|
|
2171
2521
|
<td>$${s.cost.totalCost.toFixed(2)}</td>
|
|
2172
2522
|
<td>${s.commitCount}</td>
|
|
2173
2523
|
<td><span style="color:var(--accent-green)">+${s.linesAdded.toLocaleString()}</span> / <span style="color:var(--accent-red)">-${s.linesDeleted.toLocaleString()}</span></td>
|
|
2174
2524
|
<td><span class="grade-badge" style="background:${gradeBg};color:${gradeColor};">${s.grade}</span></td>
|
|
2175
2525
|
</tr>
|
|
2176
2526
|
<tr class="expand-row" id="expand-${idx}">
|
|
2177
|
-
<td colspan="
|
|
2527
|
+
<td colspan="9">
|
|
2178
2528
|
<div class="expand-content">
|
|
2179
2529
|
${s.commits.length > 0 ? s.commits.map(c => `
|
|
2180
2530
|
<div class="commit-item">
|
|
@@ -2556,7 +2906,7 @@ function bindEvents() {
|
|
|
2556
2906
|
});
|
|
2557
2907
|
|
|
2558
2908
|
// Animate bar fills
|
|
2559
|
-
document.querySelectorAll('.survival-bar .fill, .waste-bar .fill').forEach(el => {
|
|
2909
|
+
document.querySelectorAll('.survival-bar .fill, .waste-bar .fill, .metric-bar .fill').forEach(el => {
|
|
2560
2910
|
const targetWidth = el.style.width;
|
|
2561
2911
|
el.style.width = '0%';
|
|
2562
2912
|
requestAnimationFrame(() => {
|
package/src/index.js
CHANGED
|
@@ -90,7 +90,8 @@ async function main() {
|
|
|
90
90
|
.option('--no-open', 'do not auto-open browser')
|
|
91
91
|
.option('--json', 'output raw JSON to stdout instead of starting server')
|
|
92
92
|
.option('--project <name>', 'filter to specific project')
|
|
93
|
-
.option('--refresh', 'force full re-parse, ignore cache')
|
|
93
|
+
.option('--refresh', 'force full re-parse, ignore cache')
|
|
94
|
+
.option('--autonomy', 'print autonomy metrics table to stdout and exit');
|
|
94
95
|
|
|
95
96
|
program.parse();
|
|
96
97
|
const opts = program.opts();
|
|
@@ -115,6 +116,28 @@ async function main() {
|
|
|
115
116
|
process.exit(0);
|
|
116
117
|
}
|
|
117
118
|
|
|
119
|
+
if (opts.autonomy) {
|
|
120
|
+
const am = payload.autonomyMetrics;
|
|
121
|
+
const GRADE_COLOR = { A: '\x1b[32m', B: '\x1b[36m', C: '\x1b[33m', D: '\x1b[33m', F: '\x1b[31m' };
|
|
122
|
+
const gc = GRADE_COLOR[am.overall.grade] || '\x1b[0m';
|
|
123
|
+
const line = '\u2500'.repeat(35);
|
|
124
|
+
console.log('');
|
|
125
|
+
console.log(` ${gc}Autonomy Score: ${am.overall.grade}\x1b[0m (${am.overall.score}/100)`);
|
|
126
|
+
console.log(` ${line}`);
|
|
127
|
+
console.log(` Autopilot Ratio ${am.autopilotRatio}x`);
|
|
128
|
+
console.log(` Self-Heal Score ${am.selfHealScore}%`);
|
|
129
|
+
console.log(` Toolbelt Coverage ${am.toolbeltCoverage}%`);
|
|
130
|
+
console.log(` Commit Velocity ${am.commitVelocity !== null ? am.commitVelocity + ' steps/commit' : 'N/A'}`);
|
|
131
|
+
console.log(` ${line}`);
|
|
132
|
+
if (am.topVerificationCommands.length > 0) {
|
|
133
|
+
const top3 = am.topVerificationCommands.slice(0, 3)
|
|
134
|
+
.map(c => `${c.command} (${c.count})`).join(', ');
|
|
135
|
+
console.log(` Top Tests: ${top3}`);
|
|
136
|
+
}
|
|
137
|
+
console.log('');
|
|
138
|
+
process.exit(0);
|
|
139
|
+
}
|
|
140
|
+
|
|
118
141
|
// Start server — pass a rebuild function so /api/refresh can re-run the pipeline
|
|
119
142
|
const rebuild = () => buildPayload(claudeDir, days, opts.project, true);
|
|
120
143
|
const app = createServer(payload, rebuild);
|
package/src/metrics.js
CHANGED
|
@@ -149,6 +149,110 @@ function computeLineSurvival(commitsByRepo) {
|
|
|
149
149
|
return { totalAdded, totalChurned, surviving, survivalRate };
|
|
150
150
|
}
|
|
151
151
|
|
|
152
|
+
// ---- Autonomy metrics ----
|
|
153
|
+
const KNOWN_TOOLS = [
|
|
154
|
+
'Bash', 'Read', 'Write', 'Edit', 'MultiEdit', 'Glob', 'Grep', 'LS',
|
|
155
|
+
'WebFetch', 'WebSearch', 'NotebookEdit', 'NotebookRead', 'TodoWrite', 'Agent',
|
|
156
|
+
];
|
|
157
|
+
const TOTAL_AVAILABLE_TOOLS = KNOWN_TOOLS.length; // 14
|
|
158
|
+
|
|
159
|
+
function computeAutonomyGrade(score) {
|
|
160
|
+
if (score >= 80) return 'A';
|
|
161
|
+
if (score >= 60) return 'B';
|
|
162
|
+
if (score >= 40) return 'C';
|
|
163
|
+
if (score >= 20) return 'D';
|
|
164
|
+
return 'F';
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function computeAutonomyMetrics(correlatedSessions) {
|
|
168
|
+
const perSession = correlatedSessions.map(s => {
|
|
169
|
+
const autopilotRatio = s.userMessageCount > 0
|
|
170
|
+
? Math.round((s.assistantMessageCount / s.userMessageCount) * 100) / 100
|
|
171
|
+
: 0;
|
|
172
|
+
|
|
173
|
+
const selfHealScore = s.totalBashCalls > 0
|
|
174
|
+
? Math.round((s.verificationBashCalls / s.totalBashCalls) * 100)
|
|
175
|
+
: 0;
|
|
176
|
+
|
|
177
|
+
const uniqueTools = Object.keys(s.toolCalls).length;
|
|
178
|
+
const toolbeltCoverage = Math.round((uniqueTools / TOTAL_AVAILABLE_TOOLS) * 100);
|
|
179
|
+
|
|
180
|
+
const totalToolCalls = Object.values(s.toolCalls).reduce((sum, c) => sum + c, 0);
|
|
181
|
+
const commitVelocity = s.commitCount > 0 ? Math.round(totalToolCalls / s.commitCount) : null;
|
|
182
|
+
|
|
183
|
+
return { sessionId: s.sessionId, autopilotRatio, selfHealScore, toolbeltCoverage, commitVelocity };
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
// Aggregates
|
|
187
|
+
const totalUser = correlatedSessions.reduce((s, c) => s + c.userMessageCount, 0);
|
|
188
|
+
const totalAssistant = correlatedSessions.reduce((s, c) => s + c.assistantMessageCount, 0);
|
|
189
|
+
const autopilotRatio = totalUser > 0
|
|
190
|
+
? Math.round((totalAssistant / totalUser) * 100) / 100
|
|
191
|
+
: 0;
|
|
192
|
+
|
|
193
|
+
const totalBash = correlatedSessions.reduce((s, c) => s + (c.totalBashCalls || 0), 0);
|
|
194
|
+
const totalVerif = correlatedSessions.reduce((s, c) => s + (c.verificationBashCalls || 0), 0);
|
|
195
|
+
const selfHealScore = totalBash > 0 ? Math.round((totalVerif / totalBash) * 100) : 0;
|
|
196
|
+
|
|
197
|
+
const toolbeltCoverage = perSession.length > 0
|
|
198
|
+
? Math.round(perSession.reduce((s, a) => s + a.toolbeltCoverage, 0) / perSession.length)
|
|
199
|
+
: 0;
|
|
200
|
+
|
|
201
|
+
const withCommits = perSession.filter(a => a.commitVelocity !== null);
|
|
202
|
+
const commitVelocity = withCommits.length > 0
|
|
203
|
+
? Math.round(withCommits.reduce((s, a) => s + a.commitVelocity, 0) / withCommits.length)
|
|
204
|
+
: null;
|
|
205
|
+
|
|
206
|
+
// Composite score (0-100): clamp and weight each component
|
|
207
|
+
const autopilotScore = Math.round(Math.min(autopilotRatio / 5, 1) * 100);
|
|
208
|
+
const selfHealWeighted = selfHealScore;
|
|
209
|
+
const toolbeltWeighted = toolbeltCoverage;
|
|
210
|
+
const velocityScore = commitVelocity !== null
|
|
211
|
+
? Math.round(Math.max(0, Math.min(1, 1 - (commitVelocity / 100))) * 100)
|
|
212
|
+
: 50; // neutral when no commits
|
|
213
|
+
|
|
214
|
+
const overallScore = Math.round(
|
|
215
|
+
autopilotScore * 0.25 +
|
|
216
|
+
selfHealWeighted * 0.30 +
|
|
217
|
+
toolbeltWeighted * 0.20 +
|
|
218
|
+
velocityScore * 0.25
|
|
219
|
+
);
|
|
220
|
+
|
|
221
|
+
// Top verification commands — extract the actual test/lint command, stripping cd/path prefixes
|
|
222
|
+
const verifCounts = {};
|
|
223
|
+
for (const s of correlatedSessions) {
|
|
224
|
+
for (const bc of (s.bashCommands || [])) {
|
|
225
|
+
if (bc.isVerification) {
|
|
226
|
+
// Strip "cd /path && ", "cd /path;", and "VAR=val " prefixes to get the real command
|
|
227
|
+
const stripped = bc.command
|
|
228
|
+
.replace(/^(?:cd\s+\S+\s*&&\s*)+/g, '')
|
|
229
|
+
.replace(/^(?:cd\s+\S+\s*;\s*)+/g, '')
|
|
230
|
+
.replace(/^(?:\w+=\S+\s+)+/g, '')
|
|
231
|
+
.trim();
|
|
232
|
+
const key = stripped.split(' ').slice(0, 3).join(' ') || bc.command.split(' ').slice(0, 3).join(' ');
|
|
233
|
+
verifCounts[key] = (verifCounts[key] || 0) + 1;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
const topVerificationCommands = Object.entries(verifCounts)
|
|
238
|
+
.sort((a, b) => b[1] - a[1])
|
|
239
|
+
.slice(0, 10)
|
|
240
|
+
.map(([command, count]) => ({ command, count }));
|
|
241
|
+
|
|
242
|
+
return {
|
|
243
|
+
overall: { score: overallScore, grade: computeAutonomyGrade(overallScore) },
|
|
244
|
+
autopilotRatio,
|
|
245
|
+
selfHealScore,
|
|
246
|
+
toolbeltCoverage,
|
|
247
|
+
commitVelocity,
|
|
248
|
+
totalBashCalls: totalBash,
|
|
249
|
+
totalVerificationCalls: totalVerif,
|
|
250
|
+
topVerificationCommands,
|
|
251
|
+
perSession,
|
|
252
|
+
breakdown: { autopilotScore, selfHealWeighted, toolbeltWeighted, velocityScore },
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
|
|
152
256
|
function computeEfficiencyGrade(costPerCommit, survivalRate) {
|
|
153
257
|
// Grade based on cost per commit (more meaningful than raw token count)
|
|
154
258
|
if (costPerCommit <= 2 && survivalRate >= 90) return 'A';
|
|
@@ -165,7 +269,7 @@ function computeSessionGrade(session) {
|
|
|
165
269
|
return computeEfficiencyGrade(costPerCommit, 80);
|
|
166
270
|
}
|
|
167
271
|
|
|
168
|
-
function generateInsights(summary, correlatedSessions, modelBreakdown, sessionBuckets, tokenAnalytics) {
|
|
272
|
+
function generateInsights(summary, correlatedSessions, modelBreakdown, sessionBuckets, tokenAnalytics, autonomyMetrics) {
|
|
169
273
|
const insights = [];
|
|
170
274
|
|
|
171
275
|
// Orphaned session rate
|
|
@@ -350,6 +454,34 @@ function generateInsights(summary, correlatedSessions, modelBreakdown, sessionBu
|
|
|
350
454
|
}
|
|
351
455
|
}
|
|
352
456
|
|
|
457
|
+
// ---- Autonomy insights ----
|
|
458
|
+
if (autonomyMetrics) {
|
|
459
|
+
const am = autonomyMetrics;
|
|
460
|
+
if (am.autopilotRatio >= 4) {
|
|
461
|
+
insights.push({
|
|
462
|
+
type: 'success',
|
|
463
|
+
text: `Your agent averaged ${am.autopilotRatio}x autopilot — it handled ${Math.round(am.autopilotRatio)} actions per prompt.`,
|
|
464
|
+
});
|
|
465
|
+
}
|
|
466
|
+
if (am.totalBashCalls > 5 && am.selfHealScore < 10) {
|
|
467
|
+
insights.push({
|
|
468
|
+
type: 'warning',
|
|
469
|
+
text: `Your agent ran ${am.totalBashCalls} bash commands but only ${am.selfHealScore}% were tests or lints — low self-healing.`,
|
|
470
|
+
});
|
|
471
|
+
} else if (am.selfHealScore >= 40) {
|
|
472
|
+
insights.push({
|
|
473
|
+
type: 'success',
|
|
474
|
+
text: `${am.selfHealScore}% of bash commands were tests/lints — your agent self-heals well.`,
|
|
475
|
+
});
|
|
476
|
+
}
|
|
477
|
+
if (am.toolbeltCoverage < 30 && correlatedSessions.length >= 3) {
|
|
478
|
+
insights.push({
|
|
479
|
+
type: 'tip',
|
|
480
|
+
text: `Your agent only used ${Math.round(am.toolbeltCoverage * TOTAL_AVAILABLE_TOOLS / 100)} of ${TOTAL_AVAILABLE_TOOLS} available tools — low toolbelt coverage.`,
|
|
481
|
+
});
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
353
485
|
return insights;
|
|
354
486
|
}
|
|
355
487
|
|
|
@@ -516,12 +648,6 @@ export function computeMetrics(correlatedSessions, organicCommits, commitsByRepo
|
|
|
516
648
|
mainBranchPct: p.commits > 0 ? Math.round((p.commitsOnMain / p.commits) * 100) : 0,
|
|
517
649
|
}));
|
|
518
650
|
|
|
519
|
-
// Add grades to sessions
|
|
520
|
-
const sessionsWithGrades = correlatedSessions.map(s => ({
|
|
521
|
-
...s,
|
|
522
|
-
grade: computeSessionGrade(s),
|
|
523
|
-
}));
|
|
524
|
-
|
|
525
651
|
// ---- Cost breakdown by time period ----
|
|
526
652
|
const now = new Date();
|
|
527
653
|
const todayStr = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, '0')}-${String(now.getDate()).padStart(2, '0')}`;
|
|
@@ -567,7 +693,24 @@ export function computeMetrics(correlatedSessions, organicCommits, commitsByRepo
|
|
|
567
693
|
// ---- Token analytics ----
|
|
568
694
|
const tokenAnalytics = computeTokenAnalytics(correlatedSessions, lineSurvival, totalCommits, totalLinesAdded, modelBreakdown);
|
|
569
695
|
|
|
570
|
-
|
|
696
|
+
// ---- Autonomy metrics ----
|
|
697
|
+
const autonomyMetrics = computeAutonomyMetrics(correlatedSessions);
|
|
698
|
+
|
|
699
|
+
// Add grades + autonomy to sessions
|
|
700
|
+
const autonomyBySession = new Map(autonomyMetrics.perSession.map(a => [a.sessionId, a]));
|
|
701
|
+
const sessionsWithGrades = correlatedSessions.map(s => {
|
|
702
|
+
const a = autonomyBySession.get(s.sessionId);
|
|
703
|
+
return {
|
|
704
|
+
...s,
|
|
705
|
+
grade: computeSessionGrade(s),
|
|
706
|
+
autopilotRatio: a?.autopilotRatio ?? 0,
|
|
707
|
+
selfHealScore: a?.selfHealScore ?? 0,
|
|
708
|
+
toolbeltCoverage: a?.toolbeltCoverage ?? 0,
|
|
709
|
+
commitVelocity: a?.commitVelocity ?? null,
|
|
710
|
+
};
|
|
711
|
+
});
|
|
712
|
+
|
|
713
|
+
const insights = generateInsights(summary, correlatedSessions, modelBreakdown, sessionBuckets, tokenAnalytics, autonomyMetrics);
|
|
571
714
|
|
|
572
715
|
return {
|
|
573
716
|
meta: {
|
|
@@ -585,6 +728,7 @@ export function computeMetrics(correlatedSessions, organicCommits, commitsByRepo
|
|
|
585
728
|
},
|
|
586
729
|
summary,
|
|
587
730
|
tokenAnalytics,
|
|
731
|
+
autonomyMetrics,
|
|
588
732
|
insights,
|
|
589
733
|
daily,
|
|
590
734
|
projects,
|
package/src/server.js
CHANGED
|
@@ -134,5 +134,10 @@ export function createServer(initialPayload, rebuildFn) {
|
|
|
134
134
|
res.json(payload.tokenAnalytics);
|
|
135
135
|
});
|
|
136
136
|
|
|
137
|
+
// Autonomy metrics
|
|
138
|
+
app.get('/api/autonomy', (req, res) => {
|
|
139
|
+
res.json(payload.autonomyMetrics);
|
|
140
|
+
});
|
|
141
|
+
|
|
137
142
|
return app;
|
|
138
143
|
}
|