@jeganwrites/claudash 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +35 -0
- package/LICENSE +21 -0
- package/README.md +261 -0
- package/analyzer.py +890 -0
- package/bin/claudash.js +121 -0
- package/claude_ai_tracker.py +358 -0
- package/cli.py +1034 -0
- package/config.py +100 -0
- package/db.py +1156 -0
- package/fix_tracker.py +539 -0
- package/insights.py +359 -0
- package/mcp_server.py +414 -0
- package/package.json +39 -0
- package/scanner.py +385 -0
- package/server.py +762 -0
- package/templates/accounts.html +936 -0
- package/templates/dashboard.html +1742 -0
- package/tools/get-derived-keys.py +112 -0
- package/tools/mac-sync.py +386 -0
- package/tools/oauth_sync.py +308 -0
- package/tools/setup-pm2.sh +53 -0
- package/waste_patterns.py +334 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Claudash OAuth sync — push claude.ai usage to a Claudash server using
|
|
3
|
+
Claude Code's existing OAuth access token.
|
|
4
|
+
|
|
5
|
+
This is the recommended collector for anyone who uses Claude Code: it
|
|
6
|
+
reuses the token that `claude` already put in ~/.claude/.credentials.json
|
|
7
|
+
so you don't need to scrape cookies or decrypt a keychain entry.
|
|
8
|
+
|
|
9
|
+
For claude.ai browser-only users (no Claude Code install), use the
|
|
10
|
+
companion tools/mac-sync.py script instead.
|
|
11
|
+
|
|
12
|
+
Works on Linux, macOS, and Windows. Pure Python stdlib. Zero pip deps.
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
1. On your Claudash server:
|
|
16
|
+
python3 cli.py keys
|
|
17
|
+
Copy the sync_token value.
|
|
18
|
+
2. Edit this file, set SYNC_TOKEN to that value, and VPS_IP to your
|
|
19
|
+
server (or "localhost" if you SSH-tunnel).
|
|
20
|
+
3. Run:
|
|
21
|
+
python3 oauth_sync.py
|
|
22
|
+
4. Add to cron for automatic syncing:
|
|
23
|
+
*/15 * * * * /usr/bin/python3 /path/to/oauth_sync.py >/dev/null 2>&1
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
import ssl
|
|
29
|
+
import subprocess
|
|
30
|
+
import sys
|
|
31
|
+
import time
|
|
32
|
+
from urllib.request import Request, urlopen
|
|
33
|
+
from urllib.error import HTTPError, URLError
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ─── Configuration ───────────────────────────────────────────────
|
|
37
|
+
# Edit these three values.
|
|
38
|
+
VPS_IP = "localhost"
|
|
39
|
+
VPS_PORT = 8080
|
|
40
|
+
SYNC_TOKEN = ""
|
|
41
|
+
|
|
42
|
+
# Where Claude Code stores credentials. First hit wins per file; the
|
|
43
|
+
# script iterates all of them to support multi-account setups (one
|
|
44
|
+
# account_id per Claude install).
|
|
45
|
+
CREDENTIALS_PATHS = [
|
|
46
|
+
"~/.claude/.credentials.json",
|
|
47
|
+
"~/.claude-personal/.credentials.json",
|
|
48
|
+
"~/.claude-work/.credentials.json",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
# macOS keychain fallback
|
|
52
|
+
KEYCHAIN_SERVICE = "Claude Code-credentials"
|
|
53
|
+
KEYCHAIN_ACCOUNT = "Claude Code"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ─── Credential sources ──────────────────────────────────────────
|
|
57
|
+
|
|
58
|
+
def _read_credentials_file(path):
|
|
59
|
+
"""Return the parsed .credentials.json or None."""
|
|
60
|
+
expanded = os.path.expanduser(path)
|
|
61
|
+
if not os.path.exists(expanded):
|
|
62
|
+
return None
|
|
63
|
+
try:
|
|
64
|
+
with open(expanded, "r", encoding="utf-8") as f:
|
|
65
|
+
data = json.load(f)
|
|
66
|
+
except (OSError, json.JSONDecodeError) as e:
|
|
67
|
+
print(f" {path}: could not parse — {e}", file=sys.stderr)
|
|
68
|
+
return None
|
|
69
|
+
oauth = data.get("claudeAiOauth") or {}
|
|
70
|
+
if not oauth.get("accessToken"):
|
|
71
|
+
return None
|
|
72
|
+
return {"source": expanded, "oauth": oauth}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _read_macos_keychain():
|
|
76
|
+
"""Return the parsed credentials stored in the macOS keychain, or None.
|
|
77
|
+
Keychain holds the same shape as .credentials.json. Only tries on
|
|
78
|
+
macOS — returns None on Linux/Windows."""
|
|
79
|
+
if sys.platform != "darwin":
|
|
80
|
+
return None
|
|
81
|
+
try:
|
|
82
|
+
raw = subprocess.check_output(
|
|
83
|
+
["security", "find-generic-password", "-w",
|
|
84
|
+
"-s", KEYCHAIN_SERVICE, "-a", KEYCHAIN_ACCOUNT],
|
|
85
|
+
stderr=subprocess.DEVNULL,
|
|
86
|
+
).decode("utf-8", errors="replace").strip()
|
|
87
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
88
|
+
return None
|
|
89
|
+
if not raw:
|
|
90
|
+
return None
|
|
91
|
+
try:
|
|
92
|
+
data = json.loads(raw)
|
|
93
|
+
except json.JSONDecodeError:
|
|
94
|
+
return None
|
|
95
|
+
oauth = data.get("claudeAiOauth") or {}
|
|
96
|
+
if not oauth.get("accessToken"):
|
|
97
|
+
return None
|
|
98
|
+
return {"source": "macOS keychain", "oauth": oauth}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def collect_credentials():
|
|
102
|
+
"""Yield every credential source we can find (files + keychain)."""
|
|
103
|
+
seen_tokens = set()
|
|
104
|
+
for path in CREDENTIALS_PATHS:
|
|
105
|
+
info = _read_credentials_file(path)
|
|
106
|
+
if info and info["oauth"].get("accessToken") not in seen_tokens:
|
|
107
|
+
seen_tokens.add(info["oauth"]["accessToken"])
|
|
108
|
+
yield info
|
|
109
|
+
# Keychain as a secondary source — de-duped by accessToken above.
|
|
110
|
+
info = _read_macos_keychain()
|
|
111
|
+
if info and info["oauth"].get("accessToken") not in seen_tokens:
|
|
112
|
+
seen_tokens.add(info["oauth"]["accessToken"])
|
|
113
|
+
yield info
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ─── claude.ai API calls (OAuth Bearer) ──────────────────────────
|
|
117
|
+
|
|
118
|
+
def _bearer_request(url, access_token, timeout=15):
|
|
119
|
+
"""Authenticated GET to claude.ai using the OAuth access token.
|
|
120
|
+
Returns (data_dict, None) on success, (None, error_str) on failure."""
|
|
121
|
+
req = Request(url)
|
|
122
|
+
req.add_header("Authorization", f"Bearer {access_token}")
|
|
123
|
+
req.add_header("Accept", "application/json")
|
|
124
|
+
req.add_header("User-Agent", "Claudash-oauth-sync/1.0")
|
|
125
|
+
ctx = ssl.create_default_context()
|
|
126
|
+
try:
|
|
127
|
+
with urlopen(req, timeout=timeout, context=ctx) as resp:
|
|
128
|
+
body = resp.read().decode("utf-8", errors="replace")
|
|
129
|
+
return json.loads(body), None
|
|
130
|
+
except HTTPError as e:
|
|
131
|
+
if e.code in (401, 403):
|
|
132
|
+
return None, "expired"
|
|
133
|
+
return None, f"http_{e.code}"
|
|
134
|
+
except (URLError, OSError, json.JSONDecodeError, ValueError) as e:
|
|
135
|
+
return None, f"network_error:{type(e).__name__}"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def fetch_account(access_token):
|
|
139
|
+
"""GET /api/account → (email, org_id, plan) or (None, None, None)."""
|
|
140
|
+
data, err = _bearer_request("https://claude.ai/api/account", access_token)
|
|
141
|
+
if err or not data:
|
|
142
|
+
return None, None, None, err
|
|
143
|
+
email = data.get("email_address") or data.get("email") or ""
|
|
144
|
+
org_id = ""
|
|
145
|
+
plan = "max"
|
|
146
|
+
memberships = data.get("memberships") or data.get("organizations") or []
|
|
147
|
+
if isinstance(memberships, list) and memberships:
|
|
148
|
+
first = memberships[0]
|
|
149
|
+
org = first.get("organization") if isinstance(first, dict) else None
|
|
150
|
+
if isinstance(org, dict):
|
|
151
|
+
org_id = org.get("uuid") or ""
|
|
152
|
+
caps = org.get("capabilities") or []
|
|
153
|
+
if isinstance(caps, list):
|
|
154
|
+
joined = " ".join(str(c).lower() for c in caps)
|
|
155
|
+
if "max" in joined:
|
|
156
|
+
plan = "max"
|
|
157
|
+
elif "pro" in joined:
|
|
158
|
+
plan = "pro"
|
|
159
|
+
elif isinstance(first, dict):
|
|
160
|
+
org_id = first.get("uuid") or first.get("id") or ""
|
|
161
|
+
return email, org_id, plan, None
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def fetch_usage(access_token, org_id):
|
|
165
|
+
"""GET /api/organizations/{org_id}/usage → normalized usage dict."""
|
|
166
|
+
if not org_id:
|
|
167
|
+
return None, "no_org_id"
|
|
168
|
+
url = f"https://claude.ai/api/organizations/{org_id}/usage"
|
|
169
|
+
data, err = _bearer_request(url, access_token)
|
|
170
|
+
if err or not data:
|
|
171
|
+
return None, err
|
|
172
|
+
|
|
173
|
+
five_hour = data.get("five_hour") or {}
|
|
174
|
+
seven_day = data.get("seven_day") or {}
|
|
175
|
+
extra = data.get("extra_usage") or {}
|
|
176
|
+
pct_used = float(five_hour.get("utilization") or 0)
|
|
177
|
+
|
|
178
|
+
# Parse reset timestamp → epoch
|
|
179
|
+
window_end = 0
|
|
180
|
+
resets_at = five_hour.get("resets_at") or five_hour.get("reset_at")
|
|
181
|
+
if isinstance(resets_at, str):
|
|
182
|
+
try:
|
|
183
|
+
from datetime import datetime, timezone as _tz
|
|
184
|
+
clean = resets_at.replace("Z", "+00:00")
|
|
185
|
+
dt = datetime.fromisoformat(clean)
|
|
186
|
+
if dt.tzinfo is None:
|
|
187
|
+
dt = dt.replace(tzinfo=_tz.utc)
|
|
188
|
+
window_end = int(dt.timestamp())
|
|
189
|
+
except Exception:
|
|
190
|
+
window_end = 0
|
|
191
|
+
window_start = (window_end - 18000) if window_end else 0
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
"pct_used": round(pct_used, 2),
|
|
195
|
+
"five_hour_utilization": pct_used,
|
|
196
|
+
"seven_day_utilization": float(seven_day.get("utilization") or 0),
|
|
197
|
+
"extra_credits_used": float(extra.get("used_credits") or 0),
|
|
198
|
+
"extra_credits_limit": float(extra.get("monthly_limit") or 0),
|
|
199
|
+
"window_start": window_start,
|
|
200
|
+
"window_end": window_end,
|
|
201
|
+
"tokens_used": int(pct_used * 10_000), # normalized estimate
|
|
202
|
+
"tokens_limit": 1_000_000,
|
|
203
|
+
"messages_used": 0,
|
|
204
|
+
"messages_limit": 0,
|
|
205
|
+
"raw": json.dumps(data),
|
|
206
|
+
}, None
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# ─── Push to Claudash server ─────────────────────────────────────
|
|
210
|
+
|
|
211
|
+
def push_to_claudash(access_token, org_id, email, usage, plan):
|
|
212
|
+
url = f"http://{VPS_IP}:{VPS_PORT}/api/claude-ai/sync"
|
|
213
|
+
payload = {
|
|
214
|
+
"session_key": access_token, # stored verbatim on the server
|
|
215
|
+
"org_id": org_id,
|
|
216
|
+
"browser": "oauth",
|
|
217
|
+
"account_hint": email,
|
|
218
|
+
"plan": plan,
|
|
219
|
+
}
|
|
220
|
+
if usage:
|
|
221
|
+
payload["usage"] = usage
|
|
222
|
+
body = json.dumps(payload).encode("utf-8")
|
|
223
|
+
req = Request(url, data=body, method="POST")
|
|
224
|
+
req.add_header("Content-Type", "application/json")
|
|
225
|
+
req.add_header("X-Sync-Token", SYNC_TOKEN)
|
|
226
|
+
try:
|
|
227
|
+
with urlopen(req, timeout=15) as resp:
|
|
228
|
+
data = json.loads(resp.read().decode("utf-8", errors="replace"))
|
|
229
|
+
return data.get("success", False), data
|
|
230
|
+
except HTTPError as e:
|
|
231
|
+
try:
|
|
232
|
+
err_body = json.loads(e.read().decode("utf-8", errors="replace"))
|
|
233
|
+
except Exception:
|
|
234
|
+
err_body = {"error": f"HTTP {e.code}"}
|
|
235
|
+
return False, err_body
|
|
236
|
+
except (URLError, OSError) as e:
|
|
237
|
+
return False, {"error": f"network: {e}"}
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
# ─── Main ────────────────────────────────────────────────────────
|
|
241
|
+
|
|
242
|
+
def main():
|
|
243
|
+
if not SYNC_TOKEN:
|
|
244
|
+
print("ERROR: SYNC_TOKEN is empty.", file=sys.stderr)
|
|
245
|
+
print("", file=sys.stderr)
|
|
246
|
+
print("Get your token on the Claudash server:", file=sys.stderr)
|
|
247
|
+
print(" python3 cli.py keys", file=sys.stderr)
|
|
248
|
+
print("", file=sys.stderr)
|
|
249
|
+
print("Then edit this file and set SYNC_TOKEN at the top.", file=sys.stderr)
|
|
250
|
+
sys.exit(1)
|
|
251
|
+
|
|
252
|
+
sources = list(collect_credentials())
|
|
253
|
+
if not sources:
|
|
254
|
+
print("No Claude Code credentials found.", file=sys.stderr)
|
|
255
|
+
print("", file=sys.stderr)
|
|
256
|
+
print("Run 'claude' in your terminal to authenticate first,", file=sys.stderr)
|
|
257
|
+
print("or edit CREDENTIALS_PATHS at the top of this file.", file=sys.stderr)
|
|
258
|
+
sys.exit(2)
|
|
259
|
+
|
|
260
|
+
pushed = 0
|
|
261
|
+
for src in sources:
|
|
262
|
+
oauth = src["oauth"]
|
|
263
|
+
token = oauth.get("accessToken") or ""
|
|
264
|
+
expires_at = oauth.get("expiresAt") or 0
|
|
265
|
+
# Claude Code stores expiresAt in milliseconds
|
|
266
|
+
if expires_at and expires_at > 1e12:
|
|
267
|
+
expires_at_sec = expires_at / 1000.0
|
|
268
|
+
else:
|
|
269
|
+
expires_at_sec = expires_at or 0
|
|
270
|
+
if expires_at_sec and expires_at_sec < time.time():
|
|
271
|
+
print(f" {src['source']}: token expired at "
|
|
272
|
+
f"{time.strftime('%Y-%m-%d %H:%M UTC', time.gmtime(expires_at_sec))}",
|
|
273
|
+
file=sys.stderr)
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
email, org_id, plan, err = fetch_account(token)
|
|
277
|
+
if err == "expired":
|
|
278
|
+
print(f" {src['source']}: token rejected by claude.ai "
|
|
279
|
+
"(run `claude` to refresh)", file=sys.stderr)
|
|
280
|
+
continue
|
|
281
|
+
if err:
|
|
282
|
+
print(f" {src['source']}: account lookup failed — {err}", file=sys.stderr)
|
|
283
|
+
continue
|
|
284
|
+
if not org_id:
|
|
285
|
+
print(f" {src['source']}: no org_id in /api/account response", file=sys.stderr)
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
usage, usage_err = fetch_usage(token, org_id)
|
|
289
|
+
if usage_err:
|
|
290
|
+
print(f" {src['source']}: {email} ({plan}) — usage fetch failed ({usage_err})", file=sys.stderr)
|
|
291
|
+
usage = None
|
|
292
|
+
|
|
293
|
+
ok, resp = push_to_claudash(token, org_id, email, usage, plan)
|
|
294
|
+
if ok:
|
|
295
|
+
pct = (usage or {}).get("pct_used", 0)
|
|
296
|
+
pct_str = f" — {pct:.1f}%" if usage else ""
|
|
297
|
+
print(f" {src['source']}: {email or '(no email)'} ({plan}){pct_str} → pushed OK")
|
|
298
|
+
pushed += 1
|
|
299
|
+
else:
|
|
300
|
+
print(f" {src['source']}: push failed — {resp.get('error') if isinstance(resp, dict) else resp}", file=sys.stderr)
|
|
301
|
+
|
|
302
|
+
print()
|
|
303
|
+
print(f"Claudash OAuth sync complete: {pushed}/{len(sources)} accounts pushed")
|
|
304
|
+
sys.exit(0 if pushed > 0 else 3)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
if __name__ == "__main__":
|
|
308
|
+
main()
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Sets up Claudash as a PM2 managed process
|
|
3
|
+
# PM2 auto-restarts on crash, survives VPS reboots
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
|
|
7
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
8
|
+
|
|
9
|
+
echo "Setting up Claudash with PM2..."
|
|
10
|
+
|
|
11
|
+
# Install PM2 if not present
|
|
12
|
+
which pm2 >/dev/null 2>&1 || npm install -g pm2
|
|
13
|
+
|
|
14
|
+
# Create PM2 ecosystem file
|
|
15
|
+
cat > "$SCRIPT_DIR/ecosystem.config.js" << 'PMEOF'
|
|
16
|
+
module.exports = {
|
|
17
|
+
apps: [{
|
|
18
|
+
name: 'claudash',
|
|
19
|
+
script: 'cli.py',
|
|
20
|
+
interpreter: 'python3',
|
|
21
|
+
args: 'dashboard --skip-init --no-browser',
|
|
22
|
+
cwd: __dirname,
|
|
23
|
+
watch: false,
|
|
24
|
+
autorestart: true,
|
|
25
|
+
max_restarts: 10,
|
|
26
|
+
min_uptime: '10s',
|
|
27
|
+
restart_delay: 5000,
|
|
28
|
+
error_file: '/tmp/claudash-error.log',
|
|
29
|
+
out_file: '/tmp/claudash-out.log',
|
|
30
|
+
log_date_format: 'YYYY-MM-DD HH:mm:ss',
|
|
31
|
+
env: {
|
|
32
|
+
PORT: 8080
|
|
33
|
+
}
|
|
34
|
+
}]
|
|
35
|
+
}
|
|
36
|
+
PMEOF
|
|
37
|
+
|
|
38
|
+
# Start with PM2
|
|
39
|
+
cd "$SCRIPT_DIR"
|
|
40
|
+
pm2 start ecosystem.config.js
|
|
41
|
+
pm2 save
|
|
42
|
+
pm2 startup || true
|
|
43
|
+
|
|
44
|
+
echo ""
|
|
45
|
+
echo "Claudash is now managed by PM2."
|
|
46
|
+
echo "Commands:"
|
|
47
|
+
echo " pm2 status — see if running"
|
|
48
|
+
echo " pm2 logs claudash — see logs"
|
|
49
|
+
echo " pm2 restart claudash — restart"
|
|
50
|
+
echo " pm2 stop claudash — stop"
|
|
51
|
+
echo ""
|
|
52
|
+
echo "Dashboard: http://localhost:8080"
|
|
53
|
+
echo "On VPS: ssh -L 8080:localhost:8080 your-server"
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
"""Waste pattern detection — Claudash intelligence layer.
|
|
2
|
+
|
|
3
|
+
Detects four patterns of wasteful Claude Code usage:
|
|
4
|
+
|
|
5
|
+
1. FLOUNDERING — same tool name called >=4 times in a row
|
|
6
|
+
without any other tool, suggesting Claude
|
|
7
|
+
is stuck retrying.
|
|
8
|
+
2. REPEATED_READS — the same file is read via `Read` >=3 times
|
|
9
|
+
in one session (cache churn, re-fetching).
|
|
10
|
+
3. COST_OUTLIER — a single session's cost is >3x the 30-day
|
|
11
|
+
per-project average.
|
|
12
|
+
4. DEEP_CONTEXT_NO_COMPACT — session has >100 turns and zero compaction
|
|
13
|
+
events (`/compact` never fired).
|
|
14
|
+
|
|
15
|
+
Each detection is UPSERTed into `waste_events` keyed on
|
|
16
|
+
(session_id, pattern_type).
|
|
17
|
+
|
|
18
|
+
This module reads JSONL files directly via the scan_state table — it
|
|
19
|
+
does NOT require new columns on the sessions table for tool_use data.
|
|
20
|
+
That keeps the waste detection independent of the main ingestion path.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import hashlib
|
|
24
|
+
import json
|
|
25
|
+
import os
|
|
26
|
+
import sqlite3
|
|
27
|
+
import time
|
|
28
|
+
from collections import defaultdict
|
|
29
|
+
|
|
30
|
+
from db import get_conn, insert_waste_event, clear_waste_events, get_setting, set_setting
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ─── Parameters ──────────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
FLOUNDER_THRESHOLD = 4 # consecutive same-tool calls
|
|
36
|
+
REPEATED_READ_THRESHOLD = 3 # same file read N times in one session
|
|
37
|
+
COST_OUTLIER_MULTIPLIER = 3.0 # session cost > Nx project avg
|
|
38
|
+
DEEP_TURN_THRESHOLD = 100 # turns in a session
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ─── JSONL tool-use extraction ───────────────────────────────────
|
|
42
|
+
|
|
43
|
+
def _iter_assistant_tool_calls(filepath):
|
|
44
|
+
"""Yield (turn_index, tool_name, tool_input_dict) for every tool_use
|
|
45
|
+
block in the assistant messages of a Claude Code JSONL file.
|
|
46
|
+
|
|
47
|
+
Claude Code writes one JSON object per line. Assistant messages with
|
|
48
|
+
tool use have shape:
|
|
49
|
+
|
|
50
|
+
{"type": "assistant",
|
|
51
|
+
"message": {"role": "assistant",
|
|
52
|
+
"content": [{"type": "tool_use",
|
|
53
|
+
"name": "Bash",
|
|
54
|
+
"input": {"command": "..."}}]}}
|
|
55
|
+
"""
|
|
56
|
+
turn = 0
|
|
57
|
+
try:
|
|
58
|
+
with open(filepath, "r", errors="replace") as f:
|
|
59
|
+
for line in f:
|
|
60
|
+
line = line.strip()
|
|
61
|
+
if not line:
|
|
62
|
+
continue
|
|
63
|
+
try:
|
|
64
|
+
obj = json.loads(line)
|
|
65
|
+
except json.JSONDecodeError:
|
|
66
|
+
continue
|
|
67
|
+
turn += 1
|
|
68
|
+
if obj.get("type") != "assistant":
|
|
69
|
+
continue
|
|
70
|
+
msg = obj.get("message") or {}
|
|
71
|
+
if not isinstance(msg, dict):
|
|
72
|
+
continue
|
|
73
|
+
content = msg.get("content")
|
|
74
|
+
if not isinstance(content, list):
|
|
75
|
+
continue
|
|
76
|
+
for block in content:
|
|
77
|
+
if not isinstance(block, dict):
|
|
78
|
+
continue
|
|
79
|
+
if block.get("type") != "tool_use":
|
|
80
|
+
continue
|
|
81
|
+
name = block.get("name") or ""
|
|
82
|
+
inp = block.get("input") or {}
|
|
83
|
+
if isinstance(inp, dict):
|
|
84
|
+
yield turn, name, inp
|
|
85
|
+
except OSError:
|
|
86
|
+
return
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _file_session_id(filepath):
|
|
90
|
+
"""Return the first sessionId/session_id/uuid in the file, or None."""
|
|
91
|
+
try:
|
|
92
|
+
with open(filepath, "r", errors="replace") as f:
|
|
93
|
+
for line in f:
|
|
94
|
+
line = line.strip()
|
|
95
|
+
if not line:
|
|
96
|
+
continue
|
|
97
|
+
try:
|
|
98
|
+
obj = json.loads(line)
|
|
99
|
+
except json.JSONDecodeError:
|
|
100
|
+
continue
|
|
101
|
+
sid = obj.get("sessionId") or obj.get("session_id") or obj.get("uuid")
|
|
102
|
+
if sid:
|
|
103
|
+
return sid
|
|
104
|
+
except OSError:
|
|
105
|
+
return None
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# ─── Pattern detectors ───────────────────────────────────────────
|
|
110
|
+
|
|
111
|
+
def _input_hash(inp):
|
|
112
|
+
"""Short hash of tool input for deduplication. Identical (tool, input)
|
|
113
|
+
pairs are intentional retries, not floundering."""
|
|
114
|
+
if not inp:
|
|
115
|
+
return ""
|
|
116
|
+
return hashlib.md5(str(inp)[:200].encode()).hexdigest()[:8]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _detect_floundering(tool_calls):
|
|
120
|
+
"""Return (count, detail) for FLOUNDERING — runs of >=4 consecutive
|
|
121
|
+
identical (tool_name, input_hash) pairs. Using input_hash means
|
|
122
|
+
running Bash("npm test") 5 times intentionally is NOT flagged —
|
|
123
|
+
only identical (tool, input) pairs count. `tool_calls` is an
|
|
124
|
+
iterable of (turn, name, input) tuples."""
|
|
125
|
+
runs = []
|
|
126
|
+
current_key = None
|
|
127
|
+
current_name = None
|
|
128
|
+
current_len = 0
|
|
129
|
+
current_start = 0
|
|
130
|
+
for turn, name, inp in tool_calls:
|
|
131
|
+
key = (name, _input_hash(inp))
|
|
132
|
+
if key == current_key:
|
|
133
|
+
current_len += 1
|
|
134
|
+
else:
|
|
135
|
+
if current_name and current_len >= FLOUNDER_THRESHOLD:
|
|
136
|
+
runs.append({"tool": current_name, "length": current_len, "start_turn": current_start})
|
|
137
|
+
current_key = key
|
|
138
|
+
current_name = name
|
|
139
|
+
current_len = 1
|
|
140
|
+
current_start = turn
|
|
141
|
+
if current_name and current_len >= FLOUNDER_THRESHOLD:
|
|
142
|
+
runs.append({"tool": current_name, "length": current_len, "start_turn": current_start})
|
|
143
|
+
return len(runs), {"runs": runs, "total_flounder_calls": sum(r["length"] for r in runs)}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _detect_repeated_reads(tool_calls):
|
|
147
|
+
"""Return (count, detail) for REPEATED_READS — files `Read` >=3 times."""
|
|
148
|
+
read_counts = defaultdict(int)
|
|
149
|
+
for _turn, name, inp in tool_calls:
|
|
150
|
+
if name != "Read":
|
|
151
|
+
continue
|
|
152
|
+
file_path = inp.get("file_path") or inp.get("path") or inp.get("filename")
|
|
153
|
+
if not file_path:
|
|
154
|
+
continue
|
|
155
|
+
read_counts[file_path] += 1
|
|
156
|
+
repeats = {p: c for p, c in read_counts.items() if c >= REPEATED_READ_THRESHOLD}
|
|
157
|
+
return len(repeats), {"files": [{"path": p, "reads": c} for p, c in repeats.items()]}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# ─── Main detection pass ─────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
def detect_all(conn=None):
|
|
163
|
+
"""Run every detector against the latest scan and refresh waste_events.
|
|
164
|
+
|
|
165
|
+
Returns a dict with per-pattern counts for logging.
|
|
166
|
+
"""
|
|
167
|
+
should_close = False
|
|
168
|
+
if conn is None:
|
|
169
|
+
conn = get_conn()
|
|
170
|
+
should_close = True
|
|
171
|
+
|
|
172
|
+
# Incremental: only reprocess sessions newer than last waste scan
|
|
173
|
+
last_waste_scan = get_setting(conn, "last_waste_scan")
|
|
174
|
+
last_waste_ts = int(last_waste_scan) if last_waste_scan else 0
|
|
175
|
+
|
|
176
|
+
# Only clear waste events on full re-scan (first run or reset)
|
|
177
|
+
if last_waste_ts == 0:
|
|
178
|
+
clear_waste_events(conn)
|
|
179
|
+
|
|
180
|
+
# ── 1 & 2: per-file detectors (FLOUNDERING, REPEATED_READS) ──
|
|
181
|
+
if last_waste_ts > 0:
|
|
182
|
+
file_rows = conn.execute(
|
|
183
|
+
"SELECT file_path FROM scan_state WHERE last_scanned >= ? ORDER BY file_path",
|
|
184
|
+
(last_waste_ts,),
|
|
185
|
+
).fetchall()
|
|
186
|
+
else:
|
|
187
|
+
file_rows = conn.execute("SELECT file_path FROM scan_state ORDER BY file_path").fetchall()
|
|
188
|
+
flounder_count = 0
|
|
189
|
+
repeated_count = 0
|
|
190
|
+
|
|
191
|
+
for r in file_rows:
|
|
192
|
+
filepath = r[0]
|
|
193
|
+
if not os.path.isfile(filepath):
|
|
194
|
+
continue
|
|
195
|
+
sid = _file_session_id(filepath)
|
|
196
|
+
if not sid:
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
# Look up project/account/cost from sessions table
|
|
200
|
+
info = conn.execute(
|
|
201
|
+
"SELECT project, account, COALESCE(SUM(cost_usd), 0) AS cost, COUNT(*) AS turns "
|
|
202
|
+
"FROM sessions WHERE session_id = ?",
|
|
203
|
+
(sid,),
|
|
204
|
+
).fetchone()
|
|
205
|
+
if not info or not info["project"]:
|
|
206
|
+
continue
|
|
207
|
+
project, account = info["project"], info["account"]
|
|
208
|
+
session_cost = info["cost"] or 0
|
|
209
|
+
turn_count = info["turns"] or 0
|
|
210
|
+
|
|
211
|
+
tool_calls = list(_iter_assistant_tool_calls(filepath))
|
|
212
|
+
if not tool_calls:
|
|
213
|
+
continue
|
|
214
|
+
|
|
215
|
+
# FLOUNDERING
|
|
216
|
+
n_flounder, flounder_detail = _detect_floundering(tool_calls)
|
|
217
|
+
if n_flounder > 0:
|
|
218
|
+
severity = "red" if n_flounder >= 2 else "amber"
|
|
219
|
+
insert_waste_event(
|
|
220
|
+
conn, sid, project, account, "floundering", severity,
|
|
221
|
+
turn_count, session_cost, flounder_detail,
|
|
222
|
+
)
|
|
223
|
+
flounder_count += 1
|
|
224
|
+
|
|
225
|
+
# REPEATED_READS
|
|
226
|
+
n_rep, rep_detail = _detect_repeated_reads(tool_calls)
|
|
227
|
+
if n_rep > 0:
|
|
228
|
+
severity = "amber"
|
|
229
|
+
insert_waste_event(
|
|
230
|
+
conn, sid, project, account, "repeated_reads", severity,
|
|
231
|
+
turn_count, session_cost, rep_detail,
|
|
232
|
+
)
|
|
233
|
+
repeated_count += 1
|
|
234
|
+
|
|
235
|
+
# ── 3: COST_OUTLIER — sessions whose cost is >3x project 30d avg ──
|
|
236
|
+
outlier_count = 0
|
|
237
|
+
proj_avgs = {
|
|
238
|
+
r[0]: (r[1] or 0) for r in conn.execute(
|
|
239
|
+
"SELECT project, AVG(session_cost) FROM "
|
|
240
|
+
"(SELECT project, session_id, SUM(cost_usd) AS session_cost "
|
|
241
|
+
" FROM sessions "
|
|
242
|
+
" WHERE timestamp >= strftime('%s','now') - 30*86400 "
|
|
243
|
+
" GROUP BY project, session_id) "
|
|
244
|
+
"GROUP BY project"
|
|
245
|
+
).fetchall()
|
|
246
|
+
}
|
|
247
|
+
session_totals = conn.execute(
|
|
248
|
+
"SELECT session_id, project, account, "
|
|
249
|
+
" SUM(cost_usd) AS cost, COUNT(*) AS turns "
|
|
250
|
+
"FROM sessions "
|
|
251
|
+
"WHERE timestamp >= strftime('%s','now') - 30*86400 "
|
|
252
|
+
"GROUP BY session_id, project, account"
|
|
253
|
+
).fetchall()
|
|
254
|
+
for s in session_totals:
|
|
255
|
+
avg = proj_avgs.get(s["project"], 0)
|
|
256
|
+
if avg <= 0:
|
|
257
|
+
continue
|
|
258
|
+
if (s["cost"] or 0) > avg * COST_OUTLIER_MULTIPLIER:
|
|
259
|
+
insert_waste_event(
|
|
260
|
+
conn, s["session_id"], s["project"], s["account"],
|
|
261
|
+
"cost_outlier", "amber", s["turns"], s["cost"],
|
|
262
|
+
{"session_cost": round(s["cost"], 4),
|
|
263
|
+
"project_avg": round(avg, 4),
|
|
264
|
+
"multiplier": round(s["cost"] / avg, 1)},
|
|
265
|
+
)
|
|
266
|
+
outlier_count += 1
|
|
267
|
+
|
|
268
|
+
# ── 4: DEEP_CONTEXT_NO_COMPACT — >100 turns with zero compaction ──
|
|
269
|
+
deep_count = 0
|
|
270
|
+
deep_sessions = conn.execute(
|
|
271
|
+
"SELECT session_id, project, account, COUNT(*) AS turns, "
|
|
272
|
+
" SUM(cost_usd) AS cost, MAX(compaction_detected) AS any_compact "
|
|
273
|
+
"FROM sessions "
|
|
274
|
+
"GROUP BY session_id "
|
|
275
|
+
"HAVING turns > ? AND any_compact = 0",
|
|
276
|
+
(DEEP_TURN_THRESHOLD,),
|
|
277
|
+
).fetchall()
|
|
278
|
+
for s in deep_sessions:
|
|
279
|
+
insert_waste_event(
|
|
280
|
+
conn, s["session_id"], s["project"], s["account"],
|
|
281
|
+
"deep_no_compact", "amber", s["turns"], s["cost"] or 0,
|
|
282
|
+
{"turns": s["turns"]},
|
|
283
|
+
)
|
|
284
|
+
deep_count += 1
|
|
285
|
+
|
|
286
|
+
# Record scan timestamp for incremental next run
|
|
287
|
+
set_setting(conn, "last_waste_scan", str(int(time.time())))
|
|
288
|
+
conn.commit()
|
|
289
|
+
|
|
290
|
+
summary = {
|
|
291
|
+
"floundering": flounder_count,
|
|
292
|
+
"repeated_reads": repeated_count,
|
|
293
|
+
"cost_outliers": outlier_count,
|
|
294
|
+
"deep_no_compact": deep_count,
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
if should_close:
|
|
298
|
+
conn.close()
|
|
299
|
+
return summary
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def waste_summary_by_project(conn, days=7):
|
|
303
|
+
"""Aggregate waste_events by project for the last N days. Used by
|
|
304
|
+
analyzer.full_analysis → /api/data → dashboard UI."""
|
|
305
|
+
since = int(__import__("time").time()) - (days * 86400)
|
|
306
|
+
rows = conn.execute(
|
|
307
|
+
"SELECT project, pattern_type, COUNT(*) AS n, "
|
|
308
|
+
" SUM(token_cost) AS cost "
|
|
309
|
+
"FROM waste_events WHERE detected_at >= ? "
|
|
310
|
+
"GROUP BY project, pattern_type",
|
|
311
|
+
(since,),
|
|
312
|
+
).fetchall()
|
|
313
|
+
result = defaultdict(lambda: {
|
|
314
|
+
"floundering_sessions": 0,
|
|
315
|
+
"repeated_read_sessions": 0,
|
|
316
|
+
"cost_outliers": 0,
|
|
317
|
+
"deep_no_compact": 0,
|
|
318
|
+
"total_waste_cost_est": 0.0,
|
|
319
|
+
})
|
|
320
|
+
for r in rows:
|
|
321
|
+
proj = r["project"] or "Other"
|
|
322
|
+
pt = r["pattern_type"]
|
|
323
|
+
n = r["n"] or 0
|
|
324
|
+
cost = r["cost"] or 0
|
|
325
|
+
if pt == "floundering":
|
|
326
|
+
result[proj]["floundering_sessions"] = n
|
|
327
|
+
result[proj]["total_waste_cost_est"] += cost
|
|
328
|
+
elif pt == "repeated_reads":
|
|
329
|
+
result[proj]["repeated_read_sessions"] = n
|
|
330
|
+
elif pt == "cost_outlier":
|
|
331
|
+
result[proj]["cost_outliers"] = n
|
|
332
|
+
elif pt == "deep_no_compact":
|
|
333
|
+
result[proj]["deep_no_compact"] = n
|
|
334
|
+
return {p: dict(v) for p, v in result.items()}
|