squads-cli 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1457 @@
1
+ """
2
+ Squads Telemetry Bridge
3
+ Receives OpenTelemetry metrics/logs from Claude Code.
4
+ Saves to PostgreSQL (durable), Redis (real-time), Langfuse (optional).
5
+ Forwards conversations to engram/mem0 for embeddings and graph storage.
6
+ """
7
+ import os
8
+ import json
9
+ import gzip
10
+ import threading
11
+ import time
12
+ import requests
13
+ from datetime import datetime, date
14
+ from collections import deque
15
+ from flask import Flask, request, jsonify
16
+ import psycopg2
17
+ from psycopg2.extras import RealDictCursor
18
+ import redis
19
+
20
+ app = Flask(__name__)
21
+
22
+ # Configuration
23
+ DEBUG_MODE = os.environ.get("DEBUG", "1") == "1"
24
+ LANGFUSE_ENABLED = os.environ.get("LANGFUSE_ENABLED", "false").lower() == "true"
25
+ DAILY_BUDGET = float(os.environ.get("SQUADS_DAILY_BUDGET", "200.0"))
26
+ recent_logs = deque(maxlen=50)
27
+
28
+ # Engram/mem0 configuration for memory extraction
29
+ ENGRAM_URL = os.environ.get("ENGRAM_URL", "http://host.docker.internal:8000")
30
+ ENGRAM_ENABLED = os.environ.get("ENGRAM_ENABLED", "false").lower() == "true"
31
+ ENGRAM_USER_ID = os.environ.get("ENGRAM_USER_ID", "local")
32
+
33
+ # PostgreSQL connection (durable storage)
34
+ DATABASE_URL = os.environ.get(
35
+ "DATABASE_URL",
36
+ "postgresql://squads:squads_local_dev@postgres:5432/squads"
37
+ )
38
+
39
+ # Redis connection (real-time cache)
40
+ REDIS_URL = os.environ.get("REDIS_URL", "redis://redis:6379/0")
41
+ redis_client = None
42
+ try:
43
+ redis_client = redis.from_url(REDIS_URL, decode_responses=True)
44
+ redis_client.ping()
45
+ print(f"Redis connected: {REDIS_URL}")
46
+ except Exception as e:
47
+ print(f"Redis unavailable (degraded mode): {e}")
48
+ redis_client = None
49
+
50
+ def get_db():
51
+ """Get database connection."""
52
+ return psycopg2.connect(DATABASE_URL)
53
+
54
+
55
+ # =============================================================================
56
+ # Redis Keys & Helpers
57
+ # =============================================================================
58
+ def redis_key(prefix: str, *parts) -> str:
59
+ """Build Redis key: prefix:part1:part2:..."""
60
+ return ":".join([prefix] + [str(p) for p in parts])
61
+
62
+ def today_str() -> str:
63
+ """Get today's date as string for Redis keys."""
64
+ return date.today().isoformat()
65
+
66
+ def incr_cost(squad: str, cost_usd: float, input_tokens: int, output_tokens: int):
67
+ """Increment real-time cost counters in Redis."""
68
+ if not redis_client:
69
+ return
70
+
71
+ today = today_str()
72
+ pipe = redis_client.pipeline()
73
+
74
+ # Global daily counters
75
+ pipe.incrbyfloat(redis_key("cost", "daily", today), cost_usd)
76
+ pipe.incrby(redis_key("tokens", "input", today), input_tokens)
77
+ pipe.incrby(redis_key("tokens", "output", today), output_tokens)
78
+ pipe.incr(redis_key("generations", today))
79
+
80
+ # Per-squad counters
81
+ pipe.incrbyfloat(redis_key("cost", "squad", squad, today), cost_usd)
82
+ pipe.incrby(redis_key("generations", "squad", squad, today), 1)
83
+
84
+ # Set expiry (48h) for all keys
85
+ for key in [
86
+ redis_key("cost", "daily", today),
87
+ redis_key("tokens", "input", today),
88
+ redis_key("tokens", "output", today),
89
+ redis_key("generations", today),
90
+ redis_key("cost", "squad", squad, today),
91
+ redis_key("generations", "squad", squad, today),
92
+ ]:
93
+ pipe.expire(key, 172800) # 48 hours
94
+
95
+ pipe.execute()
96
+
97
+ def get_realtime_stats() -> dict:
98
+ """Get real-time stats from Redis (fast path)."""
99
+ if not redis_client:
100
+ return None
101
+
102
+ today = today_str()
103
+ try:
104
+ cost = float(redis_client.get(redis_key("cost", "daily", today)) or 0)
105
+ input_tokens = int(redis_client.get(redis_key("tokens", "input", today)) or 0)
106
+ output_tokens = int(redis_client.get(redis_key("tokens", "output", today)) or 0)
107
+ generations = int(redis_client.get(redis_key("generations", today)) or 0)
108
+
109
+ # Get per-squad costs
110
+ squad_keys = redis_client.keys(redis_key("cost", "squad", "*", today))
111
+ by_squad = []
112
+ for key in squad_keys:
113
+ parts = key.split(":")
114
+ squad_name = parts[2] if len(parts) > 2 else "unknown"
115
+ squad_cost = float(redis_client.get(key) or 0)
116
+ squad_gens = int(redis_client.get(redis_key("generations", "squad", squad_name, today)) or 0)
117
+ by_squad.append({
118
+ "squad": squad_name,
119
+ "cost_usd": squad_cost,
120
+ "generations": squad_gens,
121
+ })
122
+
123
+ by_squad.sort(key=lambda x: x["cost_usd"], reverse=True)
124
+
125
+ return {
126
+ "cost_usd": cost,
127
+ "input_tokens": input_tokens,
128
+ "output_tokens": output_tokens,
129
+ "generations": generations,
130
+ "by_squad": by_squad,
131
+ "budget_remaining": DAILY_BUDGET - cost,
132
+ "budget_pct": (cost / DAILY_BUDGET) * 100 if DAILY_BUDGET > 0 else 0,
133
+ }
134
+ except Exception as e:
135
+ print(f"Redis stats error: {e}")
136
+ return None
137
+
138
+ def cache_session(session_id: str, squad: str, agent: str):
139
+ """Cache session info in Redis for fast lookups."""
140
+ if not redis_client:
141
+ return
142
+
143
+ key = redis_key("session", session_id)
144
+ redis_client.hset(key, mapping={"squad": squad, "agent": agent, "last_seen": datetime.now().isoformat()})
145
+ redis_client.expire(key, 86400) # 24h
146
+
147
+ def get_cached_session(session_id: str) -> dict | None:
148
+ """Get cached session from Redis."""
149
+ if not redis_client:
150
+ return None
151
+
152
+ key = redis_key("session", session_id)
153
+ data = redis_client.hgetall(key)
154
+ return data if data else None
155
+
156
+
157
+ # =============================================================================
158
+ # Conversation Buffer (Redis -> Postgres + Engram)
159
+ # =============================================================================
160
+ CONV_BUFFER_KEY = "conversations:pending"
161
+ CONV_RECENT_KEY = "conversations:recent"
162
+
163
+ def buffer_conversation(conv_data: dict):
164
+ """Push conversation to Redis buffer for async processing."""
165
+ if not redis_client:
166
+ return False
167
+
168
+ try:
169
+ # Add to pending queue
170
+ redis_client.lpush(CONV_BUFFER_KEY, json.dumps(conv_data))
171
+
172
+ # Also add to recent (circular buffer of last 100)
173
+ redis_client.lpush(CONV_RECENT_KEY, json.dumps(conv_data))
174
+ redis_client.ltrim(CONV_RECENT_KEY, 0, 99)
175
+
176
+ if DEBUG_MODE:
177
+ print(f"[BUFFER] Queued conversation: {conv_data.get('role')} - {len(conv_data.get('content', ''))} chars")
178
+ return True
179
+ except Exception as e:
180
+ print(f"[BUFFER] Error: {e}")
181
+ return False
182
+
183
+ def forward_to_engram(conv_data: dict) -> bool:
184
+ """Forward conversation to engram/mem0 for extraction."""
185
+ if not ENGRAM_ENABLED:
186
+ return False
187
+
188
+ try:
189
+ # Format for mem0 API
190
+ payload = {
191
+ "messages": [{"role": conv_data.get("role", "user"), "content": conv_data.get("content", "")}],
192
+ "user_id": conv_data.get("user_id", ENGRAM_USER_ID),
193
+ "metadata": {
194
+ "session_id": conv_data.get("session_id", ""),
195
+ "type": conv_data.get("message_type", "message"),
196
+ "importance": conv_data.get("importance", "normal"),
197
+ "source": "squads-bridge",
198
+ }
199
+ }
200
+
201
+ response = requests.post(
202
+ f"{ENGRAM_URL}/memories",
203
+ json=payload,
204
+ timeout=30
205
+ )
206
+
207
+ if response.ok:
208
+ if DEBUG_MODE:
209
+ print(f"[ENGRAM] Forwarded: {conv_data.get('role')} -> mem0")
210
+ return True
211
+ else:
212
+ print(f"[ENGRAM] Error {response.status_code}: {response.text[:100]}")
213
+ return False
214
+
215
+ except requests.exceptions.ConnectionError:
216
+ if DEBUG_MODE:
217
+ print(f"[ENGRAM] Not available at {ENGRAM_URL}")
218
+ return False
219
+ except Exception as e:
220
+ print(f"[ENGRAM] Error: {e}")
221
+ return False
222
+
223
+ def process_conversation_queue():
224
+ """Background worker to process conversation buffer."""
225
+ print("[WORKER] Conversation processor started")
226
+
227
+ while True:
228
+ try:
229
+ if not redis_client:
230
+ time.sleep(5)
231
+ continue
232
+
233
+ # Block-pop from queue (timeout 5s)
234
+ result = redis_client.brpop(CONV_BUFFER_KEY, timeout=5)
235
+ if not result:
236
+ continue
237
+
238
+ _, conv_json = result
239
+ conv_data = json.loads(conv_json)
240
+
241
+ # 1. Save to local postgres (for CLI search)
242
+ try:
243
+ conn = get_db()
244
+ save_conversation(
245
+ conn,
246
+ conv_data.get("session_id", ""),
247
+ conv_data.get("user_id", "local"),
248
+ conv_data.get("role", "user"),
249
+ conv_data.get("content", ""),
250
+ conv_data.get("message_type", "message"),
251
+ conv_data.get("importance", "normal"),
252
+ conv_data.get("metadata", {})
253
+ )
254
+ conn.commit()
255
+ conn.close()
256
+ if DEBUG_MODE:
257
+ print(f"[WORKER] Saved to postgres")
258
+ except Exception as e:
259
+ print(f"[WORKER] Postgres error: {e}")
260
+
261
+ # 2. Forward to engram/mem0 (for vectors + graph)
262
+ if ENGRAM_ENABLED:
263
+ forward_to_engram(conv_data)
264
+
265
+ except Exception as e:
266
+ print(f"[WORKER] Error: {e}")
267
+ time.sleep(1)
268
+
269
+ # Start background worker thread
270
+ conv_worker_thread = None
271
+
272
+ def start_conversation_worker():
273
+ """Start the background conversation processor."""
274
+ global conv_worker_thread
275
+ if conv_worker_thread is None or not conv_worker_thread.is_alive():
276
+ conv_worker_thread = threading.Thread(target=process_conversation_queue, daemon=True)
277
+ conv_worker_thread.start()
278
+ print("[WORKER] Background thread started")
279
+
280
+
281
+ # Optional Langfuse client
282
+ langfuse = None
283
+ if LANGFUSE_ENABLED:
284
+ try:
285
+ from langfuse import Langfuse
286
+ langfuse = Langfuse(
287
+ public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
288
+ secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
289
+ host=os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com"),
290
+ )
291
+ print(f"Langfuse forwarding enabled: {os.environ.get('LANGFUSE_HOST')}")
292
+ except Exception as e:
293
+ print(f"Langfuse initialization failed: {e}")
294
+ langfuse = None
295
+
296
+
297
+ def get_json_data():
298
+ """Get JSON data, handling gzip compression if present."""
299
+ raw_data = request.get_data()
300
+
301
+ if raw_data[:2] == b'\x1f\x8b':
302
+ try:
303
+ raw_data = gzip.decompress(raw_data)
304
+ except Exception as e:
305
+ print(f"Gzip decompress error: {e}")
306
+ return {}
307
+
308
+ try:
309
+ return json.loads(raw_data)
310
+ except json.JSONDecodeError as e:
311
+ print(f"JSON decode error: {e}")
312
+ return {}
313
+
314
+
315
+ def extract_attributes(attrs_list):
316
+ """Extract attributes from OTel attribute list format."""
317
+ result = {}
318
+ for attr in attrs_list:
319
+ key = attr.get("key", "")
320
+ val = attr.get("value", {})
321
+ value = (
322
+ val.get("stringValue") or
323
+ val.get("intValue") or
324
+ val.get("doubleValue") or
325
+ val.get("boolValue") or
326
+ ""
327
+ )
328
+ result[key] = value
329
+ return result
330
+
331
+
332
+ def safe_int(val, default=0):
333
+ """Safely convert to int."""
334
+ try:
335
+ return int(val) if val else default
336
+ except (ValueError, TypeError):
337
+ return default
338
+
339
+
340
+ def safe_float(val, default=0.0):
341
+ """Safely convert to float."""
342
+ try:
343
+ return float(val) if val else default
344
+ except (ValueError, TypeError):
345
+ return default
346
+
347
+
348
+ def extract_token_data(attrs):
349
+ """Extract token counts from OTel attributes (handles multiple formats)."""
350
+ input_keys = [
351
+ "input_tokens", "usage.input_tokens", "prompt_tokens",
352
+ "usage.prompt_tokens", "inputTokens", "promptTokens",
353
+ "llm.usage.prompt_tokens", "gen_ai.usage.input_tokens"
354
+ ]
355
+ output_keys = [
356
+ "output_tokens", "usage.output_tokens", "completion_tokens",
357
+ "usage.completion_tokens", "outputTokens", "completionTokens",
358
+ "llm.usage.completion_tokens", "gen_ai.usage.output_tokens"
359
+ ]
360
+ cache_read_keys = [
361
+ "cache_read_tokens", "cache_read", "cacheReadTokens",
362
+ "usage.cache_read_tokens", "cache_read_input_tokens"
363
+ ]
364
+ cache_creation_keys = [
365
+ "cache_creation_tokens", "cache_creation", "cacheCreationTokens",
366
+ "usage.cache_creation_tokens", "cache_creation_input_tokens"
367
+ ]
368
+ cost_keys = [
369
+ "cost_usd", "cost", "total_cost", "usage.cost",
370
+ "llm.usage.cost", "gen_ai.usage.cost"
371
+ ]
372
+
373
+ def find_value(keys, default=0):
374
+ for key in keys:
375
+ if key in attrs and attrs[key]:
376
+ return attrs[key]
377
+ return default
378
+
379
+ return {
380
+ "input_tokens": safe_int(find_value(input_keys)),
381
+ "output_tokens": safe_int(find_value(output_keys)),
382
+ "cache_read": safe_int(find_value(cache_read_keys)),
383
+ "cache_creation": safe_int(find_value(cache_creation_keys)),
384
+ "cost_usd": safe_float(find_value(cost_keys, 0.0)),
385
+ }
386
+
387
+
388
+ def ensure_session(conn, session_id, squad, agent, user_id):
389
+ """Create or update session record."""
390
+ with conn.cursor() as cur:
391
+ cur.execute("""
392
+ INSERT INTO squads.sessions (id, squad, agent, user_id, last_activity_at)
393
+ VALUES (%s, %s, %s, %s, NOW())
394
+ ON CONFLICT (id) DO UPDATE SET
395
+ last_activity_at = NOW(),
396
+ squad = COALESCE(EXCLUDED.squad, squads.sessions.squad),
397
+ agent = COALESCE(EXCLUDED.agent, squads.sessions.agent)
398
+ """, (session_id, squad, agent, user_id or None))
399
+
400
+
401
+ def save_generation(conn, session_id, squad, agent, user_id, model, token_data):
402
+ """Save LLM generation to postgres + Redis."""
403
+ with conn.cursor() as cur:
404
+ cur.execute("""
405
+ INSERT INTO squads.llm_generations
406
+ (session_id, squad, agent, user_id, model,
407
+ input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, cost_usd)
408
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
409
+ RETURNING id
410
+ """, (
411
+ session_id, squad, agent, user_id or None, model,
412
+ token_data["input_tokens"], token_data["output_tokens"],
413
+ token_data["cache_read"], token_data["cache_creation"],
414
+ token_data["cost_usd"]
415
+ ))
416
+ gen_id = cur.fetchone()[0]
417
+
418
+ # Update session aggregates
419
+ cur.execute("""
420
+ UPDATE squads.sessions SET
421
+ total_input_tokens = total_input_tokens + %s,
422
+ total_output_tokens = total_output_tokens + %s,
423
+ total_cost_usd = total_cost_usd + %s,
424
+ generation_count = generation_count + 1,
425
+ last_activity_at = NOW()
426
+ WHERE id = %s
427
+ """, (
428
+ token_data["input_tokens"], token_data["output_tokens"],
429
+ token_data["cost_usd"], session_id
430
+ ))
431
+
432
+ # Update Redis real-time counters
433
+ incr_cost(squad, token_data["cost_usd"], token_data["input_tokens"], token_data["output_tokens"])
434
+ cache_session(session_id, squad, agent)
435
+
436
+ return gen_id
437
+
438
+
439
+ def save_tool_execution(conn, session_id, squad, agent, tool_name, success, duration_ms):
440
+ """Save tool execution to postgres."""
441
+ with conn.cursor() as cur:
442
+ cur.execute("""
443
+ INSERT INTO squads.tool_executions
444
+ (session_id, squad, agent, tool_name, success, duration_ms)
445
+ VALUES (%s, %s, %s, %s, %s, %s)
446
+ RETURNING id
447
+ """, (session_id, squad, agent, tool_name, success, duration_ms))
448
+ tool_id = cur.fetchone()[0]
449
+
450
+ # Update session tool count
451
+ cur.execute("""
452
+ UPDATE squads.sessions SET
453
+ tool_count = tool_count + 1,
454
+ last_activity_at = NOW()
455
+ WHERE id = %s
456
+ """, (session_id,))
457
+
458
+ return tool_id
459
+
460
+
461
+ @app.route("/v1/metrics", methods=["POST"])
462
+ def receive_metrics():
463
+ """Receive OTel metrics - acknowledge for now."""
464
+ try:
465
+ get_json_data()
466
+ return jsonify({"status": "ok"}), 200
467
+ except Exception as e:
468
+ print(f"Error processing metrics: {e}")
469
+ return jsonify({"error": str(e)}), 500
470
+
471
+
472
+ @app.route("/v1/logs", methods=["POST"])
473
+ def receive_logs():
474
+ """Receive OTel logs - save to postgres, optionally forward to Langfuse."""
475
+ try:
476
+ data = get_json_data()
477
+ conn = get_db()
478
+
479
+ for resource_log in data.get("resourceLogs", []):
480
+ resource_attrs = extract_attributes(
481
+ resource_log.get("resource", {}).get("attributes", [])
482
+ )
483
+
484
+ service_name = resource_attrs.get("service.name", "claude-code")
485
+
486
+ # Detect squad/agent context
487
+ squad_name = (
488
+ resource_attrs.get("squad") or
489
+ resource_attrs.get("squads.squad") or
490
+ os.environ.get("SQUADS_SQUAD") or
491
+ "hq"
492
+ )
493
+ agent_name = (
494
+ resource_attrs.get("agent") or
495
+ resource_attrs.get("squads.agent") or
496
+ os.environ.get("SQUADS_AGENT") or
497
+ "coo"
498
+ )
499
+
500
+ for scope_log in resource_log.get("scopeLogs", []):
501
+ for log_record in scope_log.get("logRecords", []):
502
+ log_attrs = extract_attributes(log_record.get("attributes", []))
503
+
504
+ event_name = log_attrs.get("event.name", "unknown")
505
+ session_id = log_attrs.get("session.id", "unknown")
506
+ user_id = log_attrs.get("user.id", "")
507
+
508
+ # Debug logging
509
+ if DEBUG_MODE:
510
+ recent_logs.append({
511
+ "timestamp": datetime.now().isoformat(),
512
+ "event_name": event_name,
513
+ "log_attrs": dict(log_attrs),
514
+ "resource_attrs": dict(resource_attrs),
515
+ })
516
+ if event_name == "api_request":
517
+ print(f"[DEBUG] api_request: session={session_id} squad={squad_name} agent={agent_name}")
518
+
519
+ # Ensure session exists
520
+ ensure_session(conn, session_id, squad_name, agent_name, user_id)
521
+
522
+ # Handle LLM API requests
523
+ if event_name == "api_request":
524
+ model = log_attrs.get("model", "claude")
525
+ token_data = extract_token_data(log_attrs)
526
+
527
+ # Save to postgres (primary)
528
+ gen_id = save_generation(
529
+ conn, session_id, squad_name, agent_name,
530
+ user_id, model, token_data
531
+ )
532
+ print(f"[PG] Generation #{gen_id}: {model} {token_data['input_tokens']}+{token_data['output_tokens']} tokens ${token_data['cost_usd']:.4f}")
533
+
534
+ # Forward to Langfuse (optional)
535
+ if langfuse:
536
+ try:
537
+ trace = langfuse.trace(
538
+ name=f"llm:{model}",
539
+ user_id=user_id or None,
540
+ session_id=session_id,
541
+ metadata={
542
+ "squad": squad_name,
543
+ "agent": agent_name,
544
+ "service": service_name,
545
+ },
546
+ )
547
+ trace.generation(
548
+ name=f"llm:{model}",
549
+ model=model,
550
+ usage={
551
+ "input": token_data["input_tokens"],
552
+ "output": token_data["output_tokens"],
553
+ "total": token_data["input_tokens"] + token_data["output_tokens"],
554
+ },
555
+ metadata={
556
+ "cache_read": token_data["cache_read"],
557
+ "cache_creation": token_data["cache_creation"],
558
+ "cost_usd": token_data["cost_usd"],
559
+ },
560
+ )
561
+ except Exception as e:
562
+ print(f"[Langfuse] Forward error: {e}")
563
+
564
+ # Handle tool results
565
+ elif event_name == "tool_result":
566
+ tool_name = log_attrs.get("tool_name", "unknown")
567
+ duration_ms = safe_int(log_attrs.get("duration_ms", 0))
568
+ success = log_attrs.get("success", "true") in ["true", True, "1"]
569
+
570
+ # Save to postgres (primary)
571
+ tool_id = save_tool_execution(
572
+ conn, session_id, squad_name, agent_name,
573
+ tool_name, success, duration_ms
574
+ )
575
+
576
+ # Only log non-trivial tools
577
+ if tool_name not in ["Read", "Glob", "Grep"]:
578
+ print(f"[PG] Tool #{tool_id}: {tool_name} success={success}")
579
+
580
+ # Forward to Langfuse (optional)
581
+ if langfuse:
582
+ try:
583
+ trace = langfuse.trace(
584
+ name=f"tool:{tool_name}",
585
+ session_id=session_id,
586
+ metadata={
587
+ "squad": squad_name,
588
+ "agent": agent_name,
589
+ },
590
+ )
591
+ trace.span(
592
+ name=f"tool:{tool_name}",
593
+ metadata={
594
+ "tool_name": tool_name,
595
+ "success": success,
596
+ "duration_ms": duration_ms,
597
+ },
598
+ )
599
+ except Exception as e:
600
+ print(f"[Langfuse] Forward error: {e}")
601
+
602
+ conn.commit()
603
+ conn.close()
604
+
605
+ if langfuse:
606
+ langfuse.flush()
607
+
608
+ return jsonify({"status": "ok"}), 200
609
+
610
+ except Exception as e:
611
+ import traceback
612
+ print(f"Error processing logs: {e}")
613
+ traceback.print_exc()
614
+ return jsonify({"error": str(e)}), 500
615
+
616
+
617
+ @app.route("/health", methods=["GET"])
618
+ def health():
619
+ """Health check endpoint."""
620
+ status = {"status": "healthy"}
621
+
622
+ # Check Postgres
623
+ try:
624
+ conn = get_db()
625
+ with conn.cursor() as cur:
626
+ cur.execute("SELECT 1")
627
+ conn.close()
628
+ status["postgres"] = "connected"
629
+ except Exception as e:
630
+ status["postgres"] = f"error: {e}"
631
+ status["status"] = "degraded"
632
+
633
+ # Check Redis
634
+ if redis_client:
635
+ try:
636
+ redis_client.ping()
637
+ status["redis"] = "connected"
638
+ except Exception as e:
639
+ status["redis"] = f"error: {e}"
640
+ status["status"] = "degraded"
641
+ else:
642
+ status["redis"] = "disabled"
643
+
644
+ status["langfuse"] = "enabled" if langfuse else "disabled"
645
+
646
+ return jsonify(status), 200 if status["status"] == "healthy" else 503
647
+
648
+
649
+ @app.route("/stats", methods=["GET"])
650
+ def stats():
651
+ """Get telemetry statistics - Redis (fast) or Postgres (fallback)."""
652
+ # Try Redis first (real-time, fast)
653
+ realtime = get_realtime_stats()
654
+ if realtime:
655
+ return jsonify({
656
+ "status": "running",
657
+ "source": "redis",
658
+ "today": {
659
+ "generations": realtime["generations"],
660
+ "input_tokens": realtime["input_tokens"],
661
+ "output_tokens": realtime["output_tokens"],
662
+ "cost_usd": realtime["cost_usd"],
663
+ },
664
+ "budget": {
665
+ "daily": DAILY_BUDGET,
666
+ "used": realtime["cost_usd"],
667
+ "remaining": realtime["budget_remaining"],
668
+ "used_pct": realtime["budget_pct"],
669
+ },
670
+ "by_squad": realtime["by_squad"],
671
+ "langfuse_enabled": langfuse is not None,
672
+ "redis_enabled": True,
673
+ }), 200
674
+
675
+ # Fallback to Postgres
676
+ try:
677
+ conn = get_db()
678
+ with conn.cursor(cursor_factory=RealDictCursor) as cur:
679
+ # Session count
680
+ cur.execute("SELECT COUNT(*) as count FROM squads.sessions")
681
+ sessions = cur.fetchone()["count"]
682
+
683
+ # Today's generations
684
+ cur.execute("""
685
+ SELECT
686
+ COUNT(*) as count,
687
+ COALESCE(SUM(input_tokens), 0) as input_tokens,
688
+ COALESCE(SUM(output_tokens), 0) as output_tokens,
689
+ COALESCE(SUM(cost_usd), 0) as cost_usd
690
+ FROM squads.llm_generations
691
+ WHERE created_at >= CURRENT_DATE
692
+ """)
693
+ today = cur.fetchone()
694
+
695
+ # By squad (today)
696
+ cur.execute("""
697
+ SELECT
698
+ squad,
699
+ COUNT(*) as generations,
700
+ COALESCE(SUM(cost_usd), 0) as cost_usd
701
+ FROM squads.llm_generations
702
+ WHERE created_at >= CURRENT_DATE
703
+ GROUP BY squad
704
+ ORDER BY cost_usd DESC
705
+ """)
706
+ by_squad = cur.fetchall()
707
+
708
+ conn.close()
709
+ cost_usd = float(today["cost_usd"])
710
+
711
+ return jsonify({
712
+ "status": "running",
713
+ "source": "postgres",
714
+ "sessions": sessions,
715
+ "today": {
716
+ "generations": today["count"],
717
+ "input_tokens": today["input_tokens"],
718
+ "output_tokens": today["output_tokens"],
719
+ "cost_usd": cost_usd,
720
+ },
721
+ "budget": {
722
+ "daily": DAILY_BUDGET,
723
+ "used": cost_usd,
724
+ "remaining": DAILY_BUDGET - cost_usd,
725
+ "used_pct": (cost_usd / DAILY_BUDGET) * 100 if DAILY_BUDGET > 0 else 0,
726
+ },
727
+ "by_squad": [dict(r) for r in by_squad],
728
+ "langfuse_enabled": langfuse is not None,
729
+ "redis_enabled": False,
730
+ }), 200
731
+
732
+ except Exception as e:
733
+ return jsonify({"error": str(e)}), 500
734
+
735
+
736
+ @app.route("/api/cost/summary", methods=["GET"])
737
+ def cost_summary():
738
+ """Get cost summary for dashboard (replaces Langfuse MCP calls)."""
739
+ try:
740
+ period = request.args.get("period", "day")
741
+ squad = request.args.get("squad")
742
+
743
+ conn = get_db()
744
+ with conn.cursor(cursor_factory=RealDictCursor) as cur:
745
+ # Determine time filter
746
+ if period == "day":
747
+ time_filter = "created_at >= CURRENT_DATE"
748
+ elif period == "week":
749
+ time_filter = "created_at >= CURRENT_DATE - INTERVAL '7 days'"
750
+ else: # month
751
+ time_filter = "created_at >= CURRENT_DATE - INTERVAL '30 days'"
752
+
753
+ # Squad filter
754
+ squad_filter = f"AND squad = '{squad}'" if squad else ""
755
+
756
+ # Aggregated stats
757
+ cur.execute(f"""
758
+ SELECT
759
+ COUNT(*) as generation_count,
760
+ COALESCE(SUM(input_tokens), 0) as input_tokens,
761
+ COALESCE(SUM(output_tokens), 0) as output_tokens,
762
+ COALESCE(SUM(cost_usd), 0) as total_cost_usd
763
+ FROM squads.llm_generations
764
+ WHERE {time_filter} {squad_filter}
765
+ """)
766
+ totals = cur.fetchone()
767
+
768
+ # By squad
769
+ cur.execute(f"""
770
+ SELECT
771
+ squad,
772
+ COUNT(*) as generations,
773
+ COALESCE(SUM(input_tokens), 0) as input_tokens,
774
+ COALESCE(SUM(output_tokens), 0) as output_tokens,
775
+ COALESCE(SUM(cost_usd), 0) as cost_usd
776
+ FROM squads.llm_generations
777
+ WHERE {time_filter}
778
+ GROUP BY squad
779
+ ORDER BY cost_usd DESC
780
+ """)
781
+ by_squad = cur.fetchall()
782
+
783
+ # By model
784
+ cur.execute(f"""
785
+ SELECT
786
+ model,
787
+ COUNT(*) as generations,
788
+ COALESCE(SUM(cost_usd), 0) as cost_usd
789
+ FROM squads.llm_generations
790
+ WHERE {time_filter} {squad_filter}
791
+ GROUP BY model
792
+ ORDER BY cost_usd DESC
793
+ """)
794
+ by_model = cur.fetchall()
795
+
796
+ conn.close()
797
+
798
+ return jsonify({
799
+ "period": period,
800
+ "squad_filter": squad,
801
+ "totals": {
802
+ "generations": totals["generation_count"],
803
+ "input_tokens": totals["input_tokens"],
804
+ "output_tokens": totals["output_tokens"],
805
+ "cost_usd": float(totals["total_cost_usd"]),
806
+ },
807
+ "by_squad": [{
808
+ "squad": r["squad"],
809
+ "generations": r["generations"],
810
+ "input_tokens": r["input_tokens"],
811
+ "output_tokens": r["output_tokens"],
812
+ "cost_usd": float(r["cost_usd"]),
813
+ } for r in by_squad],
814
+ "by_model": [{
815
+ "model": r["model"],
816
+ "generations": r["generations"],
817
+ "cost_usd": float(r["cost_usd"]),
818
+ } for r in by_model],
819
+ }), 200
820
+
821
+ except Exception as e:
822
+ return jsonify({"error": str(e)}), 500
823
+
824
+
825
+ @app.route("/api/rate-limits", methods=["GET"])
826
+ def get_rate_limits():
827
+ """Get current rate limits from Redis (captured by anthropic proxy)."""
828
+ if not redis_client:
829
+ return jsonify({"error": "Redis not available", "source": "none"}), 503
830
+
831
+ try:
832
+ # Get all rate limit keys
833
+ keys = redis_client.keys("ratelimit:latest:*")
834
+ limits = {}
835
+
836
+ for key in keys:
837
+ family = key.split(":")[-1]
838
+ data = redis_client.get(key)
839
+ if data:
840
+ limits[family] = json.loads(data)
841
+
842
+ return jsonify({
843
+ "rate_limits": limits,
844
+ "source": "redis",
845
+ "fetched_at": datetime.now().isoformat(),
846
+ }), 200
847
+
848
+ except Exception as e:
849
+ return jsonify({"error": str(e)}), 500
850
+
851
+
852
+ @app.route("/api/telemetry", methods=["POST"])
853
+ def receive_cli_telemetry():
854
+ """Receive anonymous CLI telemetry events."""
855
+ try:
856
+ data = request.get_json()
857
+ if not data:
858
+ return jsonify({"error": "No JSON data"}), 400
859
+
860
+ events = data.get("events", [data]) # Support single event or batch
861
+
862
+ conn = get_db()
863
+ with conn.cursor() as cur:
864
+ for event in events:
865
+ cur.execute("""
866
+ INSERT INTO squads.cli_events
867
+ (anonymous_id, event_name, cli_version, properties)
868
+ VALUES (%s, %s, %s, %s)
869
+ """, (
870
+ event.get("properties", {}).get("anonymousId", ""),
871
+ event.get("event", "unknown"),
872
+ event.get("properties", {}).get("cliVersion", "unknown"),
873
+ json.dumps(event.get("properties", {})),
874
+ ))
875
+
876
+ conn.commit()
877
+ conn.close()
878
+
879
+ return jsonify({
880
+ "status": "ok",
881
+ "received": len(events),
882
+ }), 200
883
+
884
+ except Exception as e:
885
+ if DEBUG_MODE:
886
+ print(f"[TELEMETRY] Error: {e}")
887
+ return jsonify({"error": str(e)}), 500
888
+
889
+
890
+ # =============================================================================
891
+ # Conversations API - Captures from engram hook
892
+ # =============================================================================
893
+
894
+ def save_conversation(conn, session_id, user_id, role, content, message_type, importance, metadata):
895
+ """Save conversation message to postgres."""
896
+ with conn.cursor() as cur:
897
+ cur.execute("""
898
+ INSERT INTO squads.conversations
899
+ (session_id, user_id, role, content, message_type, importance, metadata)
900
+ VALUES (%s, %s, %s, %s, %s, %s, %s)
901
+ RETURNING id
902
+ """, (
903
+ session_id, user_id or 'local', role, content,
904
+ message_type, importance, json.dumps(metadata or {})
905
+ ))
906
+ return cur.fetchone()[0]
907
+
908
+
909
+ @app.route("/api/conversations", methods=["POST"])
910
+ def receive_conversation():
911
+ """Receive conversation capture from engram hook.
912
+
913
+ Fast path: Buffer to Redis, return immediately.
914
+ Background worker saves to Postgres and forwards to engram/mem0.
915
+
916
+ Supports two formats:
917
+ 1. Direct format (new): {session_id, user_id, role, content, message_type, importance, metadata}
918
+ 2. Legacy format (mem0): {messages: [{role, content}], user_id, metadata: {session_id, type, importance}}
919
+ """
920
+ try:
921
+ data = request.get_json()
922
+ if not data:
923
+ return jsonify({"error": "No JSON data"}), 400
924
+
925
+ queued = 0
926
+
927
+ # Check for direct format (has content field at top level)
928
+ if "content" in data:
929
+ # Direct format from updated engram hook
930
+ content = data.get("content", "")
931
+ if content and len(content) >= 5:
932
+ conv_data = {
933
+ "session_id": data.get("session_id", ""),
934
+ "user_id": data.get("user_id", "local"),
935
+ "role": data.get("role", "user"),
936
+ "content": content,
937
+ "message_type": data.get("message_type", "message"),
938
+ "importance": data.get("importance", "normal"),
939
+ "metadata": data.get("metadata", {}),
940
+ }
941
+ if data.get("working_dir"):
942
+ conv_data["metadata"]["working_dir"] = data.get("working_dir")
943
+
944
+ if buffer_conversation(conv_data):
945
+ queued += 1
946
+ else:
947
+ # Legacy format (mem0 compatible)
948
+ messages = data.get("messages", [])
949
+ user_id = data.get("user_id", "local")
950
+ metadata = data.get("metadata", {})
951
+ session_id = metadata.get("session_id", "")
952
+ message_type = metadata.get("type", "message")
953
+ importance = metadata.get("importance", "normal")
954
+
955
+ for msg in messages:
956
+ role = msg.get("role", "user")
957
+ content = msg.get("content", "")
958
+
959
+ if not content or len(content) < 5:
960
+ continue
961
+
962
+ conv_data = {
963
+ "session_id": session_id,
964
+ "user_id": user_id,
965
+ "role": role,
966
+ "content": content,
967
+ "message_type": message_type,
968
+ "importance": importance,
969
+ "metadata": metadata,
970
+ }
971
+
972
+ if buffer_conversation(conv_data):
973
+ queued += 1
974
+
975
+ return jsonify({
976
+ "status": "ok",
977
+ "queued": queued,
978
+ }), 200
979
+
980
+ except Exception as e:
981
+ import traceback
982
+ print(f"Error saving conversation: {e}")
983
+ traceback.print_exc()
984
+ return jsonify({"error": str(e)}), 500
985
+
986
+
987
+ @app.route("/api/conversations/search", methods=["GET"])
988
+ def search_conversations():
989
+ """Full-text search over conversations."""
990
+ try:
991
+ query = request.args.get("q", "")
992
+ limit = min(int(request.args.get("limit", 20)), 100)
993
+ user_id = request.args.get("user_id")
994
+ message_type = request.args.get("type")
995
+
996
+ if not query:
997
+ return jsonify({"error": "Query parameter 'q' required"}), 400
998
+
999
+ conn = get_db()
1000
+ with conn.cursor(cursor_factory=RealDictCursor) as cur:
1001
+ # Build WHERE clause
1002
+ conditions = ["to_tsvector('english', content) @@ plainto_tsquery('english', %s)"]
1003
+ params = [query]
1004
+
1005
+ if user_id:
1006
+ conditions.append("user_id = %s")
1007
+ params.append(user_id)
1008
+
1009
+ if message_type:
1010
+ conditions.append("message_type = %s")
1011
+ params.append(message_type)
1012
+
1013
+ where_clause = " AND ".join(conditions)
1014
+ params.append(limit)
1015
+
1016
+ cur.execute(f"""
1017
+ SELECT
1018
+ id, session_id, user_id, role, content,
1019
+ message_type, importance, created_at,
1020
+ ts_rank(to_tsvector('english', content), plainto_tsquery('english', %s)) as rank
1021
+ FROM squads.conversations
1022
+ WHERE {where_clause}
1023
+ ORDER BY rank DESC, created_at DESC
1024
+ LIMIT %s
1025
+ """, [query] + params)
1026
+
1027
+ results = cur.fetchall()
1028
+
1029
+ conn.close()
1030
+
1031
+ return jsonify({
1032
+ "query": query,
1033
+ "count": len(results),
1034
+ "results": [{
1035
+ "id": r["id"],
1036
+ "session_id": r["session_id"],
1037
+ "role": r["role"],
1038
+ "content": r["content"][:500] + "..." if len(r["content"]) > 500 else r["content"],
1039
+ "type": r["message_type"],
1040
+ "importance": r["importance"],
1041
+ "created_at": r["created_at"].isoformat() if r["created_at"] else None,
1042
+ "rank": float(r["rank"]),
1043
+ } for r in results],
1044
+ }), 200
1045
+
1046
+ except Exception as e:
1047
+ return jsonify({"error": str(e)}), 500
1048
+
1049
+
1050
+ @app.route("/api/conversations/recent", methods=["GET"])
1051
+ def recent_conversations():
1052
+ """Get recent conversations (for debugging/review)."""
1053
+ try:
1054
+ limit = min(int(request.args.get("limit", 20)), 100)
1055
+ user_id = request.args.get("user_id")
1056
+
1057
+ conn = get_db()
1058
+ with conn.cursor(cursor_factory=RealDictCursor) as cur:
1059
+ if user_id:
1060
+ cur.execute("""
1061
+ SELECT id, session_id, user_id, role, content, message_type, importance, created_at
1062
+ FROM squads.conversations
1063
+ WHERE user_id = %s
1064
+ ORDER BY created_at DESC
1065
+ LIMIT %s
1066
+ """, (user_id, limit))
1067
+ else:
1068
+ cur.execute("""
1069
+ SELECT id, session_id, user_id, role, content, message_type, importance, created_at
1070
+ FROM squads.conversations
1071
+ ORDER BY created_at DESC
1072
+ LIMIT %s
1073
+ """, (limit,))
1074
+
1075
+ results = cur.fetchall()
1076
+
1077
+ conn.close()
1078
+
1079
+ return jsonify({
1080
+ "count": len(results),
1081
+ "conversations": [{
1082
+ "id": r["id"],
1083
+ "session_id": r["session_id"],
1084
+ "role": r["role"],
1085
+ "content": r["content"][:300] + "..." if len(r["content"]) > 300 else r["content"],
1086
+ "type": r["message_type"],
1087
+ "importance": r["importance"],
1088
+ "created_at": r["created_at"].isoformat() if r["created_at"] else None,
1089
+ } for r in results],
1090
+ }), 200
1091
+
1092
+ except Exception as e:
1093
+ return jsonify({"error": str(e)}), 500
1094
+
1095
+
1096
+ @app.route("/debug/logs", methods=["GET"])
1097
+ def debug_logs():
1098
+ """Get recent log attributes for debugging."""
1099
+ if not DEBUG_MODE:
1100
+ return jsonify({"error": "Debug mode disabled"}), 403
1101
+ return jsonify({
1102
+ "debug_mode": True,
1103
+ "recent_logs": list(recent_logs),
1104
+ "count": len(recent_logs),
1105
+ }), 200
1106
+
1107
+
1108
+ # =============================================================================
1109
+ # Task Tracking API - Track task completion, retries, quality
1110
+ # =============================================================================
1111
+
1112
+ @app.route("/api/tasks", methods=["POST"])
1113
+ def create_or_update_task():
1114
+ """Create or update a task."""
1115
+ try:
1116
+ data = request.get_json()
1117
+ if not data:
1118
+ return jsonify({"error": "No JSON data"}), 400
1119
+
1120
+ task_id = data.get("task_id")
1121
+ if not task_id:
1122
+ return jsonify({"error": "task_id required"}), 400
1123
+
1124
+ conn = get_db()
1125
+ with conn.cursor(cursor_factory=RealDictCursor) as cur:
1126
+ # Check if task exists
1127
+ cur.execute("SELECT id, retry_count FROM squads.tasks WHERE task_id = %s", (task_id,))
1128
+ existing = cur.fetchone()
1129
+
1130
+ if existing:
1131
+ # Update existing task
1132
+ retry_count = existing["retry_count"]
1133
+ if data.get("status") == "started" and data.get("is_retry"):
1134
+ retry_count += 1
1135
+
1136
+ cur.execute("""
1137
+ UPDATE squads.tasks SET
1138
+ status = COALESCE(%s, status),
1139
+ success = COALESCE(%s, success),
1140
+ retry_count = %s,
1141
+ output_type = COALESCE(%s, output_type),
1142
+ output_ref = COALESCE(%s, output_ref),
1143
+ total_tokens = COALESCE(%s, total_tokens),
1144
+ total_cost_usd = COALESCE(%s, total_cost_usd),
1145
+ peak_context_tokens = GREATEST(peak_context_tokens, COALESCE(%s, 0)),
1146
+ context_utilization_pct = GREATEST(context_utilization_pct, COALESCE(%s, 0)),
1147
+ completed_at = CASE WHEN %s IN ('completed', 'failed', 'cancelled') THEN NOW() ELSE completed_at END,
1148
+ duration_ms = CASE WHEN %s IN ('completed', 'failed', 'cancelled')
1149
+ THEN EXTRACT(EPOCH FROM (NOW() - started_at)) * 1000 ELSE duration_ms END,
1150
+ metadata = metadata || %s::jsonb
1151
+ WHERE task_id = %s
1152
+ RETURNING *
1153
+ """, (
1154
+ data.get("status"),
1155
+ data.get("success"),
1156
+ retry_count,
1157
+ data.get("output_type"),
1158
+ data.get("output_ref"),
1159
+ data.get("total_tokens"),
1160
+ data.get("total_cost_usd"),
1161
+ data.get("peak_context_tokens"),
1162
+ data.get("context_utilization_pct"),
1163
+ data.get("status"),
1164
+ data.get("status"),
1165
+ json.dumps(data.get("metadata", {})),
1166
+ task_id,
1167
+ ))
1168
+ result = cur.fetchone()
1169
+ action = "updated"
1170
+ else:
1171
+ # Create new task
1172
+ cur.execute("""
1173
+ INSERT INTO squads.tasks
1174
+ (task_id, session_id, squad, agent, task_type, description,
1175
+ status, output_type, output_ref, total_tokens, total_cost_usd,
1176
+ peak_context_tokens, context_utilization_pct, metadata)
1177
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
1178
+ RETURNING *
1179
+ """, (
1180
+ task_id,
1181
+ data.get("session_id"),
1182
+ data.get("squad", "hq"),
1183
+ data.get("agent"),
1184
+ data.get("task_type", "goal"),
1185
+ data.get("description"),
1186
+ data.get("status", "started"),
1187
+ data.get("output_type"),
1188
+ data.get("output_ref"),
1189
+ data.get("total_tokens", 0),
1190
+ data.get("total_cost_usd", 0),
1191
+ data.get("peak_context_tokens", 0),
1192
+ data.get("context_utilization_pct"),
1193
+ json.dumps(data.get("metadata", {})),
1194
+ ))
1195
+ result = cur.fetchone()
1196
+ action = "created"
1197
+
1198
+ conn.commit()
1199
+ conn.close()
1200
+
1201
+ return jsonify({
1202
+ "status": "ok",
1203
+ "action": action,
1204
+ "task": {
1205
+ "task_id": result["task_id"],
1206
+ "squad": result["squad"],
1207
+ "status": result["status"],
1208
+ "retry_count": result["retry_count"],
1209
+ }
1210
+ }), 200
1211
+
1212
+ except Exception as e:
1213
+ import traceback
1214
+ traceback.print_exc()
1215
+ return jsonify({"error": str(e)}), 500
1216
+
1217
+
1218
+ @app.route("/api/tasks/<task_id>/feedback", methods=["POST"])
1219
+ def add_task_feedback(task_id):
1220
+ """Add feedback for a task."""
1221
+ try:
1222
+ data = request.get_json()
1223
+ if not data:
1224
+ return jsonify({"error": "No JSON data"}), 400
1225
+
1226
+ conn = get_db()
1227
+ with conn.cursor() as cur:
1228
+ # Verify task exists
1229
+ cur.execute("SELECT id FROM squads.tasks WHERE task_id = %s", (task_id,))
1230
+ if not cur.fetchone():
1231
+ conn.close()
1232
+ return jsonify({"error": f"Task {task_id} not found"}), 404
1233
+
1234
+ # Insert feedback
1235
+ tags = data.get("tags", [])
1236
+ cur.execute("""
1237
+ INSERT INTO squads.task_feedback
1238
+ (task_id, quality_score, was_helpful, required_fixes, fix_description, tags, notes)
1239
+ VALUES (%s, %s, %s, %s, %s, %s, %s)
1240
+ RETURNING id
1241
+ """, (
1242
+ task_id,
1243
+ data.get("quality_score"),
1244
+ data.get("was_helpful"),
1245
+ data.get("required_fixes", False),
1246
+ data.get("fix_description"),
1247
+ tags if tags else None,
1248
+ data.get("notes"),
1249
+ ))
1250
+ feedback_id = cur.fetchone()[0]
1251
+
1252
+ conn.commit()
1253
+ conn.close()
1254
+
1255
+ return jsonify({
1256
+ "status": "ok",
1257
+ "feedback_id": feedback_id,
1258
+ }), 200
1259
+
1260
+ except Exception as e:
1261
+ return jsonify({"error": str(e)}), 500
1262
+
1263
+
1264
+ @app.route("/api/insights", methods=["GET"])
1265
+ def get_insights():
1266
+ """Get aggregated insights for dashboard."""
1267
+ try:
1268
+ squad = request.args.get("squad")
1269
+ period = request.args.get("period", "week") # day, week, month
1270
+ days = {"day": 1, "week": 7, "month": 30}.get(period, 7)
1271
+
1272
+ conn = get_db()
1273
+ with conn.cursor(cursor_factory=RealDictCursor) as cur:
1274
+ # Squad filter
1275
+ squad_filter = "AND t.squad = %s" if squad else ""
1276
+ params = [days, squad] if squad else [days]
1277
+
1278
+ # Task completion metrics
1279
+ cur.execute(f"""
1280
+ SELECT
1281
+ COALESCE(t.squad, 'all') as squad,
1282
+ COUNT(*) as tasks_total,
1283
+ COUNT(*) FILTER (WHERE t.status = 'completed') as tasks_completed,
1284
+ COUNT(*) FILTER (WHERE t.status = 'failed') as tasks_failed,
1285
+ COUNT(*) FILTER (WHERE t.success = true) as tasks_successful,
1286
+ ROUND(100.0 * COUNT(*) FILTER (WHERE t.success = true) / NULLIF(COUNT(*), 0), 1) as success_rate,
1287
+ SUM(t.retry_count) as total_retries,
1288
+ COUNT(*) FILTER (WHERE t.retry_count > 0) as tasks_with_retries,
1289
+ ROUND(AVG(t.retry_count)::numeric, 2) as avg_retries,
1290
+ ROUND(AVG(t.duration_ms)::numeric, 0) as avg_duration_ms,
1291
+ ROUND(AVG(t.total_tokens)::numeric, 0) as avg_tokens,
1292
+ ROUND(AVG(t.total_cost_usd)::numeric, 4) as avg_cost,
1293
+ ROUND(AVG(t.context_utilization_pct)::numeric, 1) as avg_context_pct,
1294
+ MAX(t.peak_context_tokens) as max_context_tokens
1295
+ FROM squads.tasks t
1296
+ WHERE t.started_at >= NOW() - INTERVAL '%s days' {squad_filter}
1297
+ GROUP BY t.squad
1298
+ ORDER BY tasks_total DESC
1299
+ """, params)
1300
+ task_metrics = cur.fetchall()
1301
+
1302
+ # Quality metrics from feedback
1303
+ cur.execute(f"""
1304
+ SELECT
1305
+ COALESCE(t.squad, 'all') as squad,
1306
+ COUNT(f.id) as feedback_count,
1307
+ ROUND(AVG(f.quality_score)::numeric, 2) as avg_quality,
1308
+ ROUND(100.0 * COUNT(*) FILTER (WHERE f.was_helpful = true) / NULLIF(COUNT(*), 0), 1) as helpful_pct,
1309
+ ROUND(100.0 * COUNT(*) FILTER (WHERE f.required_fixes = true) / NULLIF(COUNT(*), 0), 1) as fix_required_pct
1310
+ FROM squads.tasks t
1311
+ LEFT JOIN squads.task_feedback f ON t.task_id = f.task_id
1312
+ WHERE t.started_at >= NOW() - INTERVAL '%s days' {squad_filter}
1313
+ AND f.id IS NOT NULL
1314
+ GROUP BY t.squad
1315
+ """, params)
1316
+ quality_metrics = cur.fetchall()
1317
+
1318
+ # Tool usage metrics
1319
+ cur.execute(f"""
1320
+ SELECT
1321
+ tool_name,
1322
+ COUNT(*) as usage_count,
1323
+ ROUND(100.0 * COUNT(*) FILTER (WHERE success = true) / NULLIF(COUNT(*), 0), 1) as success_rate,
1324
+ ROUND(AVG(duration_ms)::numeric, 0) as avg_duration_ms
1325
+ FROM squads.tool_executions
1326
+ WHERE created_at >= NOW() - INTERVAL '%s days'
1327
+ GROUP BY tool_name
1328
+ ORDER BY usage_count DESC
1329
+ LIMIT 15
1330
+ """, [days])
1331
+ top_tools = cur.fetchall()
1332
+
1333
+ # Overall tool failure rate
1334
+ cur.execute("""
1335
+ SELECT
1336
+ ROUND(100.0 * COUNT(*) FILTER (WHERE success = false) / NULLIF(COUNT(*), 0), 1) as failure_rate
1337
+ FROM squads.tool_executions
1338
+ WHERE created_at >= NOW() - INTERVAL '%s days'
1339
+ """, [days])
1340
+ tool_failure = cur.fetchone()
1341
+
1342
+ # Session efficiency
1343
+ cur.execute(f"""
1344
+ SELECT
1345
+ COALESCE(squad, 'all') as squad,
1346
+ COUNT(*) as sessions,
1347
+ ROUND(AVG(total_cost_usd)::numeric, 4) as avg_session_cost,
1348
+ ROUND(AVG(generation_count)::numeric, 1) as avg_generations,
1349
+ ROUND(AVG(tool_count)::numeric, 1) as avg_tools
1350
+ FROM squads.sessions
1351
+ WHERE started_at >= NOW() - INTERVAL '%s days' {squad_filter.replace('t.', '')}
1352
+ GROUP BY squad
1353
+ """, params)
1354
+ session_metrics = cur.fetchall()
1355
+
1356
+ conn.close()
1357
+
1358
+ return jsonify({
1359
+ "period": period,
1360
+ "days": days,
1361
+ "squad_filter": squad,
1362
+ "task_metrics": [dict(r) for r in task_metrics],
1363
+ "quality_metrics": [dict(r) for r in quality_metrics],
1364
+ "top_tools": [dict(r) for r in top_tools],
1365
+ "tool_failure_rate": float(tool_failure["failure_rate"] or 0) if tool_failure else 0,
1366
+ "session_metrics": [dict(r) for r in session_metrics],
1367
+ }), 200
1368
+
1369
+ except Exception as e:
1370
+ import traceback
1371
+ traceback.print_exc()
1372
+ return jsonify({"error": str(e)}), 500
1373
+
1374
+
1375
+ @app.route("/api/insights/compute", methods=["POST"])
1376
+ def compute_insights():
1377
+ """Compute and cache insights into agent_insights table."""
1378
+ try:
1379
+ period = request.args.get("period", "day") # day, week, month
1380
+
1381
+ conn = get_db()
1382
+ with conn.cursor() as cur:
1383
+ # Compute for each squad
1384
+ cur.execute("""
1385
+ INSERT INTO squads.agent_insights
1386
+ (period, period_start, squad, agent,
1387
+ tasks_started, tasks_completed, tasks_failed, success_rate,
1388
+ total_retries, avg_retries_per_task, tasks_with_retries,
1389
+ avg_quality_score, feedback_count, helpful_pct, fix_required_pct,
1390
+ avg_duration_ms, avg_tokens_per_task, avg_cost_per_task, avg_context_utilization,
1391
+ top_tools, tool_failure_rate)
1392
+ SELECT
1393
+ %s as period,
1394
+ CURRENT_DATE as period_start,
1395
+ t.squad,
1396
+ t.agent,
1397
+ COUNT(*) as tasks_started,
1398
+ COUNT(*) FILTER (WHERE t.status = 'completed') as tasks_completed,
1399
+ COUNT(*) FILTER (WHERE t.status = 'failed') as tasks_failed,
1400
+ ROUND(100.0 * COUNT(*) FILTER (WHERE t.success = true) / NULLIF(COUNT(*), 0), 2),
1401
+ SUM(t.retry_count),
1402
+ ROUND(AVG(t.retry_count)::numeric, 2),
1403
+ COUNT(*) FILTER (WHERE t.retry_count > 0),
1404
+ (SELECT ROUND(AVG(f.quality_score)::numeric, 2) FROM squads.task_feedback f WHERE f.task_id = ANY(ARRAY_AGG(t.task_id))),
1405
+ (SELECT COUNT(*) FROM squads.task_feedback f WHERE f.task_id = ANY(ARRAY_AGG(t.task_id))),
1406
+ NULL, NULL,
1407
+ ROUND(AVG(t.duration_ms)::numeric, 0),
1408
+ ROUND(AVG(t.total_tokens)::numeric, 0),
1409
+ ROUND(AVG(t.total_cost_usd)::numeric, 6),
1410
+ ROUND(AVG(t.context_utilization_pct)::numeric, 2),
1411
+ '[]'::jsonb,
1412
+ NULL
1413
+ FROM squads.tasks t
1414
+ WHERE t.started_at >= CURRENT_DATE - INTERVAL '1 day' * %s
1415
+ GROUP BY t.squad, t.agent
1416
+ ON CONFLICT (period, period_start, squad, agent) DO UPDATE SET
1417
+ tasks_started = EXCLUDED.tasks_started,
1418
+ tasks_completed = EXCLUDED.tasks_completed,
1419
+ tasks_failed = EXCLUDED.tasks_failed,
1420
+ success_rate = EXCLUDED.success_rate,
1421
+ total_retries = EXCLUDED.total_retries,
1422
+ avg_retries_per_task = EXCLUDED.avg_retries_per_task,
1423
+ tasks_with_retries = EXCLUDED.tasks_with_retries,
1424
+ avg_quality_score = EXCLUDED.avg_quality_score,
1425
+ feedback_count = EXCLUDED.feedback_count,
1426
+ avg_duration_ms = EXCLUDED.avg_duration_ms,
1427
+ avg_tokens_per_task = EXCLUDED.avg_tokens_per_task,
1428
+ avg_cost_per_task = EXCLUDED.avg_cost_per_task,
1429
+ avg_context_utilization = EXCLUDED.avg_context_utilization,
1430
+ captured_at = NOW()
1431
+ """, (period, {"day": 1, "week": 7, "month": 30}.get(period, 7)))
1432
+
1433
+ conn.commit()
1434
+ conn.close()
1435
+
1436
+ return jsonify({"status": "ok", "period": period}), 200
1437
+
1438
+ except Exception as e:
1439
+ import traceback
1440
+ traceback.print_exc()
1441
+ return jsonify({"error": str(e)}), 500
1442
+
1443
+
1444
+ if __name__ == "__main__":
1445
+ port = int(os.environ.get("PORT", 8080))
1446
+ print(f"Starting Squads Bridge on port {port}")
1447
+ print(f" PostgreSQL: {DATABASE_URL.split('@')[1] if '@' in DATABASE_URL else DATABASE_URL}")
1448
+ print(f" Redis: {'connected' if redis_client else 'disabled'}")
1449
+ print(f" Langfuse: {'enabled' if LANGFUSE_ENABLED else 'disabled'}")
1450
+ print(f" Engram: {'enabled -> ' + ENGRAM_URL if ENGRAM_ENABLED else 'disabled'}")
1451
+ print(f" Budget: ${DAILY_BUDGET}/day")
1452
+
1453
+ # Start background conversation processor
1454
+ if redis_client:
1455
+ start_conversation_worker()
1456
+
1457
+ app.run(host="0.0.0.0", port=port)