@jeganwrites/claudash 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,308 @@
1
+ #!/usr/bin/env python3
2
+ """Claudash OAuth sync — push claude.ai usage to a Claudash server using
3
+ Claude Code's existing OAuth access token.
4
+
5
+ This is the recommended collector for anyone who uses Claude Code: it
6
+ reuses the token that `claude` already put in ~/.claude/.credentials.json
7
+ so you don't need to scrape cookies or decrypt a keychain entry.
8
+
9
+ For claude.ai browser-only users (no Claude Code install), use the
10
+ companion tools/mac-sync.py script instead.
11
+
12
+ Works on Linux, macOS, and Windows. Pure Python stdlib. Zero pip deps.
13
+
14
+ Usage:
15
+ 1. On your Claudash server:
16
+ python3 cli.py keys
17
+ Copy the sync_token value.
18
+ 2. Edit this file, set SYNC_TOKEN to that value, and VPS_IP to your
19
+ server (or "localhost" if you SSH-tunnel).
20
+ 3. Run:
21
+ python3 oauth_sync.py
22
+ 4. Add to cron for automatic syncing:
23
+ */15 * * * * /usr/bin/python3 /path/to/oauth_sync.py >/dev/null 2>&1
24
+ """
25
+
26
+ import json
27
+ import os
28
+ import ssl
29
+ import subprocess
30
+ import sys
31
+ import time
32
+ from urllib.request import Request, urlopen
33
+ from urllib.error import HTTPError, URLError
34
+
35
+
36
+ # ─── Configuration ───────────────────────────────────────────────
37
+ # Edit these three values.
38
+ VPS_IP = "localhost"
39
+ VPS_PORT = 8080
40
+ SYNC_TOKEN = ""
41
+
42
+ # Where Claude Code stores credentials. First hit wins per file; the
43
+ # script iterates all of them to support multi-account setups (one
44
+ # account_id per Claude install).
45
+ CREDENTIALS_PATHS = [
46
+ "~/.claude/.credentials.json",
47
+ "~/.claude-personal/.credentials.json",
48
+ "~/.claude-work/.credentials.json",
49
+ ]
50
+
51
+ # macOS keychain fallback
52
+ KEYCHAIN_SERVICE = "Claude Code-credentials"
53
+ KEYCHAIN_ACCOUNT = "Claude Code"
54
+
55
+
56
+ # ─── Credential sources ──────────────────────────────────────────
57
+
58
+ def _read_credentials_file(path):
59
+ """Return the parsed .credentials.json or None."""
60
+ expanded = os.path.expanduser(path)
61
+ if not os.path.exists(expanded):
62
+ return None
63
+ try:
64
+ with open(expanded, "r", encoding="utf-8") as f:
65
+ data = json.load(f)
66
+ except (OSError, json.JSONDecodeError) as e:
67
+ print(f" {path}: could not parse — {e}", file=sys.stderr)
68
+ return None
69
+ oauth = data.get("claudeAiOauth") or {}
70
+ if not oauth.get("accessToken"):
71
+ return None
72
+ return {"source": expanded, "oauth": oauth}
73
+
74
+
75
+ def _read_macos_keychain():
76
+ """Return the parsed credentials stored in the macOS keychain, or None.
77
+ Keychain holds the same shape as .credentials.json. Only tries on
78
+ macOS — returns None on Linux/Windows."""
79
+ if sys.platform != "darwin":
80
+ return None
81
+ try:
82
+ raw = subprocess.check_output(
83
+ ["security", "find-generic-password", "-w",
84
+ "-s", KEYCHAIN_SERVICE, "-a", KEYCHAIN_ACCOUNT],
85
+ stderr=subprocess.DEVNULL,
86
+ ).decode("utf-8", errors="replace").strip()
87
+ except (subprocess.CalledProcessError, FileNotFoundError):
88
+ return None
89
+ if not raw:
90
+ return None
91
+ try:
92
+ data = json.loads(raw)
93
+ except json.JSONDecodeError:
94
+ return None
95
+ oauth = data.get("claudeAiOauth") or {}
96
+ if not oauth.get("accessToken"):
97
+ return None
98
+ return {"source": "macOS keychain", "oauth": oauth}
99
+
100
+
101
+ def collect_credentials():
102
+ """Yield every credential source we can find (files + keychain)."""
103
+ seen_tokens = set()
104
+ for path in CREDENTIALS_PATHS:
105
+ info = _read_credentials_file(path)
106
+ if info and info["oauth"].get("accessToken") not in seen_tokens:
107
+ seen_tokens.add(info["oauth"]["accessToken"])
108
+ yield info
109
+ # Keychain as a secondary source — de-duped by accessToken above.
110
+ info = _read_macos_keychain()
111
+ if info and info["oauth"].get("accessToken") not in seen_tokens:
112
+ seen_tokens.add(info["oauth"]["accessToken"])
113
+ yield info
114
+
115
+
116
+ # ─── claude.ai API calls (OAuth Bearer) ──────────────────────────
117
+
118
+ def _bearer_request(url, access_token, timeout=15):
119
+ """Authenticated GET to claude.ai using the OAuth access token.
120
+ Returns (data_dict, None) on success, (None, error_str) on failure."""
121
+ req = Request(url)
122
+ req.add_header("Authorization", f"Bearer {access_token}")
123
+ req.add_header("Accept", "application/json")
124
+ req.add_header("User-Agent", "Claudash-oauth-sync/1.0")
125
+ ctx = ssl.create_default_context()
126
+ try:
127
+ with urlopen(req, timeout=timeout, context=ctx) as resp:
128
+ body = resp.read().decode("utf-8", errors="replace")
129
+ return json.loads(body), None
130
+ except HTTPError as e:
131
+ if e.code in (401, 403):
132
+ return None, "expired"
133
+ return None, f"http_{e.code}"
134
+ except (URLError, OSError, json.JSONDecodeError, ValueError) as e:
135
+ return None, f"network_error:{type(e).__name__}"
136
+
137
+
138
+ def fetch_account(access_token):
139
+ """GET /api/account → (email, org_id, plan) or (None, None, None)."""
140
+ data, err = _bearer_request("https://claude.ai/api/account", access_token)
141
+ if err or not data:
142
+ return None, None, None, err
143
+ email = data.get("email_address") or data.get("email") or ""
144
+ org_id = ""
145
+ plan = "max"
146
+ memberships = data.get("memberships") or data.get("organizations") or []
147
+ if isinstance(memberships, list) and memberships:
148
+ first = memberships[0]
149
+ org = first.get("organization") if isinstance(first, dict) else None
150
+ if isinstance(org, dict):
151
+ org_id = org.get("uuid") or ""
152
+ caps = org.get("capabilities") or []
153
+ if isinstance(caps, list):
154
+ joined = " ".join(str(c).lower() for c in caps)
155
+ if "max" in joined:
156
+ plan = "max"
157
+ elif "pro" in joined:
158
+ plan = "pro"
159
+ elif isinstance(first, dict):
160
+ org_id = first.get("uuid") or first.get("id") or ""
161
+ return email, org_id, plan, None
162
+
163
+
164
+ def fetch_usage(access_token, org_id):
165
+ """GET /api/organizations/{org_id}/usage → normalized usage dict."""
166
+ if not org_id:
167
+ return None, "no_org_id"
168
+ url = f"https://claude.ai/api/organizations/{org_id}/usage"
169
+ data, err = _bearer_request(url, access_token)
170
+ if err or not data:
171
+ return None, err
172
+
173
+ five_hour = data.get("five_hour") or {}
174
+ seven_day = data.get("seven_day") or {}
175
+ extra = data.get("extra_usage") or {}
176
+ pct_used = float(five_hour.get("utilization") or 0)
177
+
178
+ # Parse reset timestamp → epoch
179
+ window_end = 0
180
+ resets_at = five_hour.get("resets_at") or five_hour.get("reset_at")
181
+ if isinstance(resets_at, str):
182
+ try:
183
+ from datetime import datetime, timezone as _tz
184
+ clean = resets_at.replace("Z", "+00:00")
185
+ dt = datetime.fromisoformat(clean)
186
+ if dt.tzinfo is None:
187
+ dt = dt.replace(tzinfo=_tz.utc)
188
+ window_end = int(dt.timestamp())
189
+ except Exception:
190
+ window_end = 0
191
+ window_start = (window_end - 18000) if window_end else 0
192
+
193
+ return {
194
+ "pct_used": round(pct_used, 2),
195
+ "five_hour_utilization": pct_used,
196
+ "seven_day_utilization": float(seven_day.get("utilization") or 0),
197
+ "extra_credits_used": float(extra.get("used_credits") or 0),
198
+ "extra_credits_limit": float(extra.get("monthly_limit") or 0),
199
+ "window_start": window_start,
200
+ "window_end": window_end,
201
+ "tokens_used": int(pct_used * 10_000), # normalized estimate
202
+ "tokens_limit": 1_000_000,
203
+ "messages_used": 0,
204
+ "messages_limit": 0,
205
+ "raw": json.dumps(data),
206
+ }, None
207
+
208
+
209
+ # ─── Push to Claudash server ─────────────────────────────────────
210
+
211
+ def push_to_claudash(access_token, org_id, email, usage, plan):
212
+ url = f"http://{VPS_IP}:{VPS_PORT}/api/claude-ai/sync"
213
+ payload = {
214
+ "session_key": access_token, # stored verbatim on the server
215
+ "org_id": org_id,
216
+ "browser": "oauth",
217
+ "account_hint": email,
218
+ "plan": plan,
219
+ }
220
+ if usage:
221
+ payload["usage"] = usage
222
+ body = json.dumps(payload).encode("utf-8")
223
+ req = Request(url, data=body, method="POST")
224
+ req.add_header("Content-Type", "application/json")
225
+ req.add_header("X-Sync-Token", SYNC_TOKEN)
226
+ try:
227
+ with urlopen(req, timeout=15) as resp:
228
+ data = json.loads(resp.read().decode("utf-8", errors="replace"))
229
+ return data.get("success", False), data
230
+ except HTTPError as e:
231
+ try:
232
+ err_body = json.loads(e.read().decode("utf-8", errors="replace"))
233
+ except Exception:
234
+ err_body = {"error": f"HTTP {e.code}"}
235
+ return False, err_body
236
+ except (URLError, OSError) as e:
237
+ return False, {"error": f"network: {e}"}
238
+
239
+
240
+ # ─── Main ────────────────────────────────────────────────────────
241
+
242
+ def main():
243
+ if not SYNC_TOKEN:
244
+ print("ERROR: SYNC_TOKEN is empty.", file=sys.stderr)
245
+ print("", file=sys.stderr)
246
+ print("Get your token on the Claudash server:", file=sys.stderr)
247
+ print(" python3 cli.py keys", file=sys.stderr)
248
+ print("", file=sys.stderr)
249
+ print("Then edit this file and set SYNC_TOKEN at the top.", file=sys.stderr)
250
+ sys.exit(1)
251
+
252
+ sources = list(collect_credentials())
253
+ if not sources:
254
+ print("No Claude Code credentials found.", file=sys.stderr)
255
+ print("", file=sys.stderr)
256
+ print("Run 'claude' in your terminal to authenticate first,", file=sys.stderr)
257
+ print("or edit CREDENTIALS_PATHS at the top of this file.", file=sys.stderr)
258
+ sys.exit(2)
259
+
260
+ pushed = 0
261
+ for src in sources:
262
+ oauth = src["oauth"]
263
+ token = oauth.get("accessToken") or ""
264
+ expires_at = oauth.get("expiresAt") or 0
265
+ # Claude Code stores expiresAt in milliseconds
266
+ if expires_at and expires_at > 1e12:
267
+ expires_at_sec = expires_at / 1000.0
268
+ else:
269
+ expires_at_sec = expires_at or 0
270
+ if expires_at_sec and expires_at_sec < time.time():
271
+ print(f" {src['source']}: token expired at "
272
+ f"{time.strftime('%Y-%m-%d %H:%M UTC', time.gmtime(expires_at_sec))}",
273
+ file=sys.stderr)
274
+ continue
275
+
276
+ email, org_id, plan, err = fetch_account(token)
277
+ if err == "expired":
278
+ print(f" {src['source']}: token rejected by claude.ai "
279
+ "(run `claude` to refresh)", file=sys.stderr)
280
+ continue
281
+ if err:
282
+ print(f" {src['source']}: account lookup failed — {err}", file=sys.stderr)
283
+ continue
284
+ if not org_id:
285
+ print(f" {src['source']}: no org_id in /api/account response", file=sys.stderr)
286
+ continue
287
+
288
+ usage, usage_err = fetch_usage(token, org_id)
289
+ if usage_err:
290
+ print(f" {src['source']}: {email} ({plan}) — usage fetch failed ({usage_err})", file=sys.stderr)
291
+ usage = None
292
+
293
+ ok, resp = push_to_claudash(token, org_id, email, usage, plan)
294
+ if ok:
295
+ pct = (usage or {}).get("pct_used", 0)
296
+ pct_str = f" — {pct:.1f}%" if usage else ""
297
+ print(f" {src['source']}: {email or '(no email)'} ({plan}){pct_str} → pushed OK")
298
+ pushed += 1
299
+ else:
300
+ print(f" {src['source']}: push failed — {resp.get('error') if isinstance(resp, dict) else resp}", file=sys.stderr)
301
+
302
+ print()
303
+ print(f"Claudash OAuth sync complete: {pushed}/{len(sources)} accounts pushed")
304
+ sys.exit(0 if pushed > 0 else 3)
305
+
306
+
307
+ if __name__ == "__main__":
308
+ main()
@@ -0,0 +1,53 @@
1
+ #!/bin/bash
2
+ # Sets up Claudash as a PM2 managed process
3
+ # PM2 auto-restarts on crash, survives VPS reboots
4
+
5
+ set -e
6
+
7
+ SCRIPT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
8
+
9
+ echo "Setting up Claudash with PM2..."
10
+
11
+ # Install PM2 if not present
12
+ which pm2 >/dev/null 2>&1 || npm install -g pm2
13
+
14
+ # Create PM2 ecosystem file
15
+ cat > "$SCRIPT_DIR/ecosystem.config.js" << 'PMEOF'
16
+ module.exports = {
17
+ apps: [{
18
+ name: 'claudash',
19
+ script: 'cli.py',
20
+ interpreter: 'python3',
21
+ args: 'dashboard --skip-init --no-browser',
22
+ cwd: __dirname,
23
+ watch: false,
24
+ autorestart: true,
25
+ max_restarts: 10,
26
+ min_uptime: '10s',
27
+ restart_delay: 5000,
28
+ error_file: '/tmp/claudash-error.log',
29
+ out_file: '/tmp/claudash-out.log',
30
+ log_date_format: 'YYYY-MM-DD HH:mm:ss',
31
+ env: {
32
+ PORT: 8080
33
+ }
34
+ }]
35
+ }
36
+ PMEOF
37
+
38
+ # Start with PM2
39
+ cd "$SCRIPT_DIR"
40
+ pm2 start ecosystem.config.js
41
+ pm2 save
42
+ pm2 startup || true
43
+
44
+ echo ""
45
+ echo "Claudash is now managed by PM2."
46
+ echo "Commands:"
47
+ echo " pm2 status — see if running"
48
+ echo " pm2 logs claudash — see logs"
49
+ echo " pm2 restart claudash — restart"
50
+ echo " pm2 stop claudash — stop"
51
+ echo ""
52
+ echo "Dashboard: http://localhost:8080"
53
+ echo "On VPS: ssh -L 8080:localhost:8080 your-server"
@@ -0,0 +1,334 @@
1
+ """Waste pattern detection — Claudash intelligence layer.
2
+
3
+ Detects four patterns of wasteful Claude Code usage:
4
+
5
+ 1. FLOUNDERING — same tool name called >=4 times in a row
6
+ without any other tool, suggesting Claude
7
+ is stuck retrying.
8
+ 2. REPEATED_READS — the same file is read via `Read` >=3 times
9
+ in one session (cache churn, re-fetching).
10
+ 3. COST_OUTLIER — a single session's cost is >3x the 30-day
11
+ per-project average.
12
+ 4. DEEP_CONTEXT_NO_COMPACT — session has >100 turns and zero compaction
13
+ events (`/compact` never fired).
14
+
15
+ Each detection is UPSERTed into `waste_events` keyed on
16
+ (session_id, pattern_type).
17
+
18
+ This module reads JSONL files directly via the scan_state table — it
19
+ does NOT require new columns on the sessions table for tool_use data.
20
+ That keeps the waste detection independent of the main ingestion path.
21
+ """
22
+
23
+ import hashlib
24
+ import json
25
+ import os
26
+ import sqlite3
27
+ import time
28
+ from collections import defaultdict
29
+
30
+ from db import get_conn, insert_waste_event, clear_waste_events, get_setting, set_setting
31
+
32
+
33
+ # ─── Parameters ──────────────────────────────────────────────────
34
+
35
+ FLOUNDER_THRESHOLD = 4 # consecutive same-tool calls
36
+ REPEATED_READ_THRESHOLD = 3 # same file read N times in one session
37
+ COST_OUTLIER_MULTIPLIER = 3.0 # session cost > Nx project avg
38
+ DEEP_TURN_THRESHOLD = 100 # turns in a session
39
+
40
+
41
+ # ─── JSONL tool-use extraction ───────────────────────────────────
42
+
43
+ def _iter_assistant_tool_calls(filepath):
44
+ """Yield (turn_index, tool_name, tool_input_dict) for every tool_use
45
+ block in the assistant messages of a Claude Code JSONL file.
46
+
47
+ Claude Code writes one JSON object per line. Assistant messages with
48
+ tool use have shape:
49
+
50
+ {"type": "assistant",
51
+ "message": {"role": "assistant",
52
+ "content": [{"type": "tool_use",
53
+ "name": "Bash",
54
+ "input": {"command": "..."}}]}}
55
+ """
56
+ turn = 0
57
+ try:
58
+ with open(filepath, "r", errors="replace") as f:
59
+ for line in f:
60
+ line = line.strip()
61
+ if not line:
62
+ continue
63
+ try:
64
+ obj = json.loads(line)
65
+ except json.JSONDecodeError:
66
+ continue
67
+ turn += 1
68
+ if obj.get("type") != "assistant":
69
+ continue
70
+ msg = obj.get("message") or {}
71
+ if not isinstance(msg, dict):
72
+ continue
73
+ content = msg.get("content")
74
+ if not isinstance(content, list):
75
+ continue
76
+ for block in content:
77
+ if not isinstance(block, dict):
78
+ continue
79
+ if block.get("type") != "tool_use":
80
+ continue
81
+ name = block.get("name") or ""
82
+ inp = block.get("input") or {}
83
+ if isinstance(inp, dict):
84
+ yield turn, name, inp
85
+ except OSError:
86
+ return
87
+
88
+
89
+ def _file_session_id(filepath):
90
+ """Return the first sessionId/session_id/uuid in the file, or None."""
91
+ try:
92
+ with open(filepath, "r", errors="replace") as f:
93
+ for line in f:
94
+ line = line.strip()
95
+ if not line:
96
+ continue
97
+ try:
98
+ obj = json.loads(line)
99
+ except json.JSONDecodeError:
100
+ continue
101
+ sid = obj.get("sessionId") or obj.get("session_id") or obj.get("uuid")
102
+ if sid:
103
+ return sid
104
+ except OSError:
105
+ return None
106
+ return None
107
+
108
+
109
+ # ─── Pattern detectors ───────────────────────────────────────────
110
+
111
+ def _input_hash(inp):
112
+ """Short hash of tool input for deduplication. Identical (tool, input)
113
+ pairs are intentional retries, not floundering."""
114
+ if not inp:
115
+ return ""
116
+ return hashlib.md5(str(inp)[:200].encode()).hexdigest()[:8]
117
+
118
+
119
+ def _detect_floundering(tool_calls):
120
+ """Return (count, detail) for FLOUNDERING — runs of >=4 consecutive
121
+ identical (tool_name, input_hash) pairs. Using input_hash means
122
+ running Bash("npm test") 5 times intentionally is NOT flagged —
123
+ only identical (tool, input) pairs count. `tool_calls` is an
124
+ iterable of (turn, name, input) tuples."""
125
+ runs = []
126
+ current_key = None
127
+ current_name = None
128
+ current_len = 0
129
+ current_start = 0
130
+ for turn, name, inp in tool_calls:
131
+ key = (name, _input_hash(inp))
132
+ if key == current_key:
133
+ current_len += 1
134
+ else:
135
+ if current_name and current_len >= FLOUNDER_THRESHOLD:
136
+ runs.append({"tool": current_name, "length": current_len, "start_turn": current_start})
137
+ current_key = key
138
+ current_name = name
139
+ current_len = 1
140
+ current_start = turn
141
+ if current_name and current_len >= FLOUNDER_THRESHOLD:
142
+ runs.append({"tool": current_name, "length": current_len, "start_turn": current_start})
143
+ return len(runs), {"runs": runs, "total_flounder_calls": sum(r["length"] for r in runs)}
144
+
145
+
146
+ def _detect_repeated_reads(tool_calls):
147
+ """Return (count, detail) for REPEATED_READS — files `Read` >=3 times."""
148
+ read_counts = defaultdict(int)
149
+ for _turn, name, inp in tool_calls:
150
+ if name != "Read":
151
+ continue
152
+ file_path = inp.get("file_path") or inp.get("path") or inp.get("filename")
153
+ if not file_path:
154
+ continue
155
+ read_counts[file_path] += 1
156
+ repeats = {p: c for p, c in read_counts.items() if c >= REPEATED_READ_THRESHOLD}
157
+ return len(repeats), {"files": [{"path": p, "reads": c} for p, c in repeats.items()]}
158
+
159
+
160
+ # ─── Main detection pass ─────────────────────────────────────────
161
+
162
+ def detect_all(conn=None):
163
+ """Run every detector against the latest scan and refresh waste_events.
164
+
165
+ Returns a dict with per-pattern counts for logging.
166
+ """
167
+ should_close = False
168
+ if conn is None:
169
+ conn = get_conn()
170
+ should_close = True
171
+
172
+ # Incremental: only reprocess sessions newer than last waste scan
173
+ last_waste_scan = get_setting(conn, "last_waste_scan")
174
+ last_waste_ts = int(last_waste_scan) if last_waste_scan else 0
175
+
176
+ # Only clear waste events on full re-scan (first run or reset)
177
+ if last_waste_ts == 0:
178
+ clear_waste_events(conn)
179
+
180
+ # ── 1 & 2: per-file detectors (FLOUNDERING, REPEATED_READS) ──
181
+ if last_waste_ts > 0:
182
+ file_rows = conn.execute(
183
+ "SELECT file_path FROM scan_state WHERE last_scanned >= ? ORDER BY file_path",
184
+ (last_waste_ts,),
185
+ ).fetchall()
186
+ else:
187
+ file_rows = conn.execute("SELECT file_path FROM scan_state ORDER BY file_path").fetchall()
188
+ flounder_count = 0
189
+ repeated_count = 0
190
+
191
+ for r in file_rows:
192
+ filepath = r[0]
193
+ if not os.path.isfile(filepath):
194
+ continue
195
+ sid = _file_session_id(filepath)
196
+ if not sid:
197
+ continue
198
+
199
+ # Look up project/account/cost from sessions table
200
+ info = conn.execute(
201
+ "SELECT project, account, COALESCE(SUM(cost_usd), 0) AS cost, COUNT(*) AS turns "
202
+ "FROM sessions WHERE session_id = ?",
203
+ (sid,),
204
+ ).fetchone()
205
+ if not info or not info["project"]:
206
+ continue
207
+ project, account = info["project"], info["account"]
208
+ session_cost = info["cost"] or 0
209
+ turn_count = info["turns"] or 0
210
+
211
+ tool_calls = list(_iter_assistant_tool_calls(filepath))
212
+ if not tool_calls:
213
+ continue
214
+
215
+ # FLOUNDERING
216
+ n_flounder, flounder_detail = _detect_floundering(tool_calls)
217
+ if n_flounder > 0:
218
+ severity = "red" if n_flounder >= 2 else "amber"
219
+ insert_waste_event(
220
+ conn, sid, project, account, "floundering", severity,
221
+ turn_count, session_cost, flounder_detail,
222
+ )
223
+ flounder_count += 1
224
+
225
+ # REPEATED_READS
226
+ n_rep, rep_detail = _detect_repeated_reads(tool_calls)
227
+ if n_rep > 0:
228
+ severity = "amber"
229
+ insert_waste_event(
230
+ conn, sid, project, account, "repeated_reads", severity,
231
+ turn_count, session_cost, rep_detail,
232
+ )
233
+ repeated_count += 1
234
+
235
+ # ── 3: COST_OUTLIER — sessions whose cost is >3x project 30d avg ──
236
+ outlier_count = 0
237
+ proj_avgs = {
238
+ r[0]: (r[1] or 0) for r in conn.execute(
239
+ "SELECT project, AVG(session_cost) FROM "
240
+ "(SELECT project, session_id, SUM(cost_usd) AS session_cost "
241
+ " FROM sessions "
242
+ " WHERE timestamp >= strftime('%s','now') - 30*86400 "
243
+ " GROUP BY project, session_id) "
244
+ "GROUP BY project"
245
+ ).fetchall()
246
+ }
247
+ session_totals = conn.execute(
248
+ "SELECT session_id, project, account, "
249
+ " SUM(cost_usd) AS cost, COUNT(*) AS turns "
250
+ "FROM sessions "
251
+ "WHERE timestamp >= strftime('%s','now') - 30*86400 "
252
+ "GROUP BY session_id, project, account"
253
+ ).fetchall()
254
+ for s in session_totals:
255
+ avg = proj_avgs.get(s["project"], 0)
256
+ if avg <= 0:
257
+ continue
258
+ if (s["cost"] or 0) > avg * COST_OUTLIER_MULTIPLIER:
259
+ insert_waste_event(
260
+ conn, s["session_id"], s["project"], s["account"],
261
+ "cost_outlier", "amber", s["turns"], s["cost"],
262
+ {"session_cost": round(s["cost"], 4),
263
+ "project_avg": round(avg, 4),
264
+ "multiplier": round(s["cost"] / avg, 1)},
265
+ )
266
+ outlier_count += 1
267
+
268
+ # ── 4: DEEP_CONTEXT_NO_COMPACT — >100 turns with zero compaction ──
269
+ deep_count = 0
270
+ deep_sessions = conn.execute(
271
+ "SELECT session_id, project, account, COUNT(*) AS turns, "
272
+ " SUM(cost_usd) AS cost, MAX(compaction_detected) AS any_compact "
273
+ "FROM sessions "
274
+ "GROUP BY session_id "
275
+ "HAVING turns > ? AND any_compact = 0",
276
+ (DEEP_TURN_THRESHOLD,),
277
+ ).fetchall()
278
+ for s in deep_sessions:
279
+ insert_waste_event(
280
+ conn, s["session_id"], s["project"], s["account"],
281
+ "deep_no_compact", "amber", s["turns"], s["cost"] or 0,
282
+ {"turns": s["turns"]},
283
+ )
284
+ deep_count += 1
285
+
286
+ # Record scan timestamp for incremental next run
287
+ set_setting(conn, "last_waste_scan", str(int(time.time())))
288
+ conn.commit()
289
+
290
+ summary = {
291
+ "floundering": flounder_count,
292
+ "repeated_reads": repeated_count,
293
+ "cost_outliers": outlier_count,
294
+ "deep_no_compact": deep_count,
295
+ }
296
+
297
+ if should_close:
298
+ conn.close()
299
+ return summary
300
+
301
+
302
+ def waste_summary_by_project(conn, days=7):
303
+ """Aggregate waste_events by project for the last N days. Used by
304
+ analyzer.full_analysis → /api/data → dashboard UI."""
305
+ since = int(__import__("time").time()) - (days * 86400)
306
+ rows = conn.execute(
307
+ "SELECT project, pattern_type, COUNT(*) AS n, "
308
+ " SUM(token_cost) AS cost "
309
+ "FROM waste_events WHERE detected_at >= ? "
310
+ "GROUP BY project, pattern_type",
311
+ (since,),
312
+ ).fetchall()
313
+ result = defaultdict(lambda: {
314
+ "floundering_sessions": 0,
315
+ "repeated_read_sessions": 0,
316
+ "cost_outliers": 0,
317
+ "deep_no_compact": 0,
318
+ "total_waste_cost_est": 0.0,
319
+ })
320
+ for r in rows:
321
+ proj = r["project"] or "Other"
322
+ pt = r["pattern_type"]
323
+ n = r["n"] or 0
324
+ cost = r["cost"] or 0
325
+ if pt == "floundering":
326
+ result[proj]["floundering_sessions"] = n
327
+ result[proj]["total_waste_cost_est"] += cost
328
+ elif pt == "repeated_reads":
329
+ result[proj]["repeated_read_sessions"] = n
330
+ elif pt == "cost_outlier":
331
+ result[proj]["cost_outliers"] = n
332
+ elif pt == "deep_no_compact":
333
+ result[proj]["deep_no_compact"] = n
334
+ return {p: dict(v) for p, v in result.items()}