brainlayer 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. brainlayer/__init__.py +3 -0
  2. brainlayer/cli/__init__.py +1545 -0
  3. brainlayer/cli/wizard.py +132 -0
  4. brainlayer/cli_new.py +151 -0
  5. brainlayer/client.py +164 -0
  6. brainlayer/clustering.py +736 -0
  7. brainlayer/daemon.py +1105 -0
  8. brainlayer/dashboard/README.md +129 -0
  9. brainlayer/dashboard/__init__.py +5 -0
  10. brainlayer/dashboard/app.py +151 -0
  11. brainlayer/dashboard/search.py +229 -0
  12. brainlayer/dashboard/views.py +230 -0
  13. brainlayer/embeddings.py +131 -0
  14. brainlayer/engine.py +550 -0
  15. brainlayer/index_new.py +87 -0
  16. brainlayer/mcp/__init__.py +1558 -0
  17. brainlayer/migrate.py +205 -0
  18. brainlayer/paths.py +43 -0
  19. brainlayer/pipeline/__init__.py +47 -0
  20. brainlayer/pipeline/analyze_communication.py +508 -0
  21. brainlayer/pipeline/brain_graph.py +567 -0
  22. brainlayer/pipeline/chat_tags.py +63 -0
  23. brainlayer/pipeline/chunk.py +422 -0
  24. brainlayer/pipeline/classify.py +472 -0
  25. brainlayer/pipeline/cluster_sampling.py +73 -0
  26. brainlayer/pipeline/enrichment.py +810 -0
  27. brainlayer/pipeline/extract.py +66 -0
  28. brainlayer/pipeline/extract_claude_desktop.py +149 -0
  29. brainlayer/pipeline/extract_corrections.py +231 -0
  30. brainlayer/pipeline/extract_markdown.py +195 -0
  31. brainlayer/pipeline/extract_whatsapp.py +227 -0
  32. brainlayer/pipeline/git_overlay.py +301 -0
  33. brainlayer/pipeline/longitudinal_analyzer.py +568 -0
  34. brainlayer/pipeline/obsidian_export.py +455 -0
  35. brainlayer/pipeline/operation_grouping.py +486 -0
  36. brainlayer/pipeline/plan_linking.py +313 -0
  37. brainlayer/pipeline/sanitize.py +549 -0
  38. brainlayer/pipeline/semantic_style.py +574 -0
  39. brainlayer/pipeline/session_enrichment.py +472 -0
  40. brainlayer/pipeline/style_embed.py +67 -0
  41. brainlayer/pipeline/style_index.py +139 -0
  42. brainlayer/pipeline/temporal_chains.py +203 -0
  43. brainlayer/pipeline/time_batcher.py +248 -0
  44. brainlayer/pipeline/unified_timeline.py +569 -0
  45. brainlayer/storage.py +66 -0
  46. brainlayer/store.py +155 -0
  47. brainlayer/taxonomy.json +80 -0
  48. brainlayer/vector_store.py +1891 -0
  49. brainlayer-1.0.0.dist-info/METADATA +313 -0
  50. brainlayer-1.0.0.dist-info/RECORD +53 -0
  51. brainlayer-1.0.0.dist-info/WHEEL +4 -0
  52. brainlayer-1.0.0.dist-info/entry_points.txt +4 -0
  53. brainlayer-1.0.0.dist-info/licenses/LICENSE +190 -0
brainlayer/daemon.py ADDED
@@ -0,0 +1,1105 @@
1
+ """FastAPI daemon service for fast brainlayer queries + dashboard API."""
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ import os
7
+ import signal
8
+ import subprocess
9
+ import sys
10
+ import time
11
+ from contextlib import asynccontextmanager
12
+ from pathlib import Path
13
+ from typing import Any, Dict, List, Optional
14
+
15
+ import apsw
16
+ import uvicorn
17
+ from fastapi import FastAPI, HTTPException, Request
18
+ from fastapi.middleware.cors import CORSMiddleware
19
+ from fastapi.responses import FileResponse, JSONResponse
20
+ from pydantic import BaseModel
21
+
22
+ from .embeddings import get_embedding_model
23
+ from .vector_store import VectorStore
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Default paths
28
+ from .paths import DEFAULT_DB_PATH
29
+
30
+ SOCKET_PATH = Path("/tmp/brainlayer.sock")
31
+ BRAIN_DIR = Path.home() / ".brainlayer-brain"
32
+ API_COSTS_PATH = Path.home() / ".local" / "share" / "brainlayer" / "api_costs.jsonl"
33
+
34
+ # Global state
35
+ vector_store: Optional[VectorStore] = None
36
+ embedding_model = None
37
+ http_port: Optional[int] = None
38
+
39
+
40
+ class SearchRequest(BaseModel):
41
+ """Search request model."""
42
+
43
+ query: str
44
+ n_results: int = 10
45
+ project_filter: Optional[str] = None
46
+ content_type_filter: Optional[str] = None
47
+ source_filter: Optional[str] = None
48
+ use_semantic: bool = True
49
+ hybrid: bool = True
50
+
51
+
52
+ class SearchResponse(BaseModel):
53
+ """Search response model."""
54
+
55
+ ids: List[Optional[str]] = []
56
+ documents: List[str]
57
+ metadatas: List[Dict[str, Any]]
58
+ distances: List[Optional[float]]
59
+ total_time_ms: float
60
+
61
+
62
+ class StatsResponse(BaseModel):
63
+ """Stats response model."""
64
+
65
+ total_chunks: int
66
+ projects: List[str]
67
+ content_types: List[str]
68
+
69
+
70
+ @asynccontextmanager
71
+ async def lifespan(app: FastAPI):
72
+ """Manage application lifespan. Guards against double-init in dual server mode."""
73
+ global vector_store, embedding_model
74
+
75
+ # Guard: only initialize once (dual servers share the same app)
76
+ if vector_store is not None:
77
+ yield
78
+ return
79
+
80
+ # Startup
81
+ logger.info("Starting brainlayer daemon...")
82
+
83
+ vector_store = VectorStore(DEFAULT_DB_PATH)
84
+ logger.info(f"Loaded vector store: {vector_store.count()} chunks")
85
+
86
+ embedding_model = get_embedding_model()
87
+ logger.info(f"Loaded embedding model: {embedding_model.model_name}")
88
+
89
+ try:
90
+ embedding_model.embed_query("test query")
91
+ logger.info("Model warmed up successfully")
92
+ except Exception as e:
93
+ logger.warning(f"Model warmup failed: {e}")
94
+
95
+ yield
96
+
97
+ # Shutdown (only close once)
98
+ logger.info("Shutting down brainlayer daemon...")
99
+ if vector_store:
100
+ vector_store.close()
101
+ vector_store = None
102
+
103
+
104
+ app = FastAPI(
105
+ title="BrainLayer Daemon",
106
+ description="Fast search daemon + dashboard API for brainlayer knowledge base",
107
+ version="0.2.0",
108
+ lifespan=lifespan,
109
+ )
110
+
111
+ # CORS — allow dashboard origins
112
+ app.add_middleware(
113
+ CORSMiddleware,
114
+ allow_origins=[
115
+ "http://localhost:3000",
116
+ "http://localhost:3001",
117
+ "http://localhost:5173",
118
+ "http://localhost:8080",
119
+ ],
120
+ allow_credentials=True,
121
+ allow_methods=["GET", "POST", "PATCH", "DELETE"],
122
+ allow_headers=["*"],
123
+ )
124
+
125
+
126
+ # ──────────────────────────────────────────────
127
+ # Existing endpoints (search, stats, context)
128
+ # ──────────────────────────────────────────────
129
+
130
+
131
+ @app.get("/health")
132
+ async def health_check():
133
+ """Health check endpoint."""
134
+ return {"status": "healthy", "chunks": vector_store.count() if vector_store else 0}
135
+
136
+
137
+ @app.get("/stats", response_model=StatsResponse)
138
+ async def get_stats():
139
+ """Get collection statistics."""
140
+ if not vector_store:
141
+ raise HTTPException(status_code=503, detail="Vector store not initialized")
142
+
143
+ stats = vector_store.get_stats()
144
+ return StatsResponse(**stats)
145
+
146
+
147
+ @app.post("/search", response_model=SearchResponse)
148
+ async def search(request: SearchRequest):
149
+ """Search the knowledge base."""
150
+ if not vector_store:
151
+ raise HTTPException(status_code=503, detail="Vector store not initialized")
152
+
153
+ start_time = time.time()
154
+
155
+ try:
156
+ if request.hybrid and request.use_semantic:
157
+ query_embedding = embedding_model.embed_query(request.query)
158
+ results = vector_store.hybrid_search(
159
+ query_embedding=query_embedding,
160
+ query_text=request.query,
161
+ n_results=request.n_results,
162
+ project_filter=request.project_filter,
163
+ content_type_filter=request.content_type_filter,
164
+ source_filter=request.source_filter,
165
+ )
166
+ elif request.use_semantic:
167
+ query_embedding = embedding_model.embed_query(request.query)
168
+ results = vector_store.search(
169
+ query_embedding=query_embedding,
170
+ n_results=request.n_results,
171
+ project_filter=request.project_filter,
172
+ content_type_filter=request.content_type_filter,
173
+ source_filter=request.source_filter,
174
+ )
175
+ else:
176
+ results = vector_store.search(
177
+ query_text=request.query,
178
+ n_results=request.n_results,
179
+ project_filter=request.project_filter,
180
+ content_type_filter=request.content_type_filter,
181
+ source_filter=request.source_filter,
182
+ )
183
+
184
+ total_time_ms = (time.time() - start_time) * 1000
185
+
186
+ return SearchResponse(
187
+ ids=results.get("ids", [[]])[0],
188
+ documents=results["documents"][0],
189
+ metadatas=results["metadatas"][0],
190
+ distances=results["distances"][0],
191
+ total_time_ms=total_time_ms,
192
+ )
193
+
194
+ except Exception as e:
195
+ logger.error(f"Search failed: {e}")
196
+ raise HTTPException(status_code=500, detail="Search failed")
197
+
198
+
199
+ @app.get("/context/{chunk_id}")
200
+ async def get_context(chunk_id: str, before: int = 3, after: int = 3):
201
+ """Get surrounding conversation context for a chunk."""
202
+ if not vector_store:
203
+ raise HTTPException(status_code=503, detail="Vector store not initialized")
204
+
205
+ try:
206
+ result = vector_store.get_context(chunk_id, before=before, after=after)
207
+ if result.get("error"):
208
+ raise HTTPException(status_code=404, detail=result["error"])
209
+ return result
210
+ except HTTPException:
211
+ raise
212
+ except Exception as e:
213
+ logger.error(f"Context lookup failed: {e}")
214
+ raise HTTPException(status_code=500, detail="Context lookup failed")
215
+
216
+
217
+ # ──────────────────────────────────────────────
218
+ # Brain View endpoints
219
+ # ──────────────────────────────────────────────
220
+
221
+
222
+ @app.get("/brain/graph")
223
+ async def brain_graph():
224
+ """Serve pre-generated brain graph.json."""
225
+ graph_path = BRAIN_DIR / "graph.json"
226
+ if not graph_path.exists():
227
+ raise HTTPException(status_code=404, detail="graph.json not found. Run: brainlayer brain-export")
228
+ return FileResponse(graph_path, media_type="application/json")
229
+
230
+
231
+ @app.get("/brain/metadata")
232
+ async def brain_metadata():
233
+ """Stats about the brain graph (node count, last generated, etc)."""
234
+ meta_path = BRAIN_DIR / "metadata.json"
235
+ if not meta_path.exists():
236
+ raise HTTPException(status_code=404, detail="metadata.json not found. Run: brainlayer brain-export")
237
+ with open(meta_path) as f:
238
+ return json.load(f)
239
+
240
+
241
+ @app.get("/brain/node/{node_id}")
242
+ async def brain_node_detail(node_id: str):
243
+ """Detail for a specific brain graph node — sessions, files, operations."""
244
+ graph_path = BRAIN_DIR / "graph.json"
245
+ if not graph_path.exists():
246
+ raise HTTPException(status_code=404, detail="graph.json not found")
247
+
248
+ with open(graph_path) as f:
249
+ graph = json.load(f)
250
+
251
+ for node in graph.get("nodes", []):
252
+ if node.get("id") == node_id:
253
+ return node
254
+
255
+ raise HTTPException(status_code=404, detail=f"Node {node_id} not found")
256
+
257
+
258
+ # ──────────────────────────────────────────────
259
+ # Health / Service status endpoints
260
+ # ──────────────────────────────────────────────
261
+
262
+
263
+ @app.get("/health/services")
264
+ async def health_services():
265
+ """Check status of ecosystem services: Ollama, Telegram bot, Railway, launchd."""
266
+
267
+ def _check_service(cmd: List[str], timeout: int = 3) -> str:
268
+ """Run a subprocess check (called via asyncio.to_thread to avoid blocking)."""
269
+ try:
270
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
271
+ return result.stdout.strip()
272
+ except Exception:
273
+ return ""
274
+
275
+ # Health check URLs — use env vars matching enrichment.py, fallback to defaults
276
+ ollama_base = os.environ.get("BRAINLAYER_OLLAMA_URL", "http://127.0.0.1:11434/api/generate")
277
+ ollama_base = ollama_base.rsplit("/api/", 1)[0] if "/api/" in ollama_base else ollama_base.rstrip("/")
278
+ mlx_base = os.environ.get("BRAINLAYER_MLX_URL", "http://127.0.0.1:8080/v1/chat/completions")
279
+ mlx_base = mlx_base.rsplit("/v1/", 1)[0] if "/v1/" in mlx_base else mlx_base.rstrip("/")
280
+
281
+ # Run all checks concurrently via thread pool (non-blocking)
282
+ ollama_fut = asyncio.to_thread(
283
+ _check_service,
284
+ ["curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", f"{ollama_base}/api/tags"],
285
+ 3,
286
+ )
287
+ mlx_fut = asyncio.to_thread(
288
+ _check_service,
289
+ ["curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", f"{mlx_base}/v1/models"],
290
+ 3,
291
+ )
292
+ telegram_fut = asyncio.to_thread(_check_service, ["launchctl", "list", "com.brainlayer.telegram"], 3)
293
+ railway_fut = asyncio.to_thread(
294
+ _check_service,
295
+ ["curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "http://localhost:8080/health"],
296
+ 5,
297
+ )
298
+
299
+ # Check all launchd services in one call (fast)
300
+ launchd_fut = asyncio.to_thread(
301
+ _check_service,
302
+ ["bash", "-c", "launchctl list 2>/dev/null | grep -E 'brainlayer' || true"],
303
+ 3,
304
+ )
305
+
306
+ ollama_code, mlx_code, telegram_out, railway_code, launchd_out = await asyncio.gather(
307
+ ollama_fut, mlx_fut, telegram_fut, railway_fut, launchd_fut
308
+ )
309
+
310
+ # Parse launchd services
311
+ launchd_services = {
312
+ "nightshift": "com.brainlayer.nightshift",
313
+ "briefing": "com.brainlayer.briefing",
314
+ "healthcheck": "com.brainlayer.healthcheck",
315
+ "compactor": "com.brainlayer.compactor",
316
+ "bedtime_guardian": "com.brainlayer.bedtime-guardian",
317
+ "session_archiver": "com.brainlayer.session-archiver",
318
+ "auto_index": "com.brainlayer.auto-index",
319
+ }
320
+ # Parse launchd list output — format: "PID\tExitStatus\tLabel"
321
+ # PID is "-" for scheduled services not currently running (normal for cron-like jobs)
322
+ launchd_statuses = {}
323
+ for name, label in launchd_services.items():
324
+ found = False
325
+ for line in launchd_out.splitlines():
326
+ if label in line:
327
+ found = True
328
+ parts = line.split("\t")
329
+ pid = parts[0].strip() if parts else "-"
330
+ exit_status = parts[1].strip() if len(parts) > 1 else "0"
331
+ if pid != "-":
332
+ launchd_statuses[name] = {"status": "up"}
333
+ elif exit_status == "0":
334
+ launchd_statuses[name] = {"status": "idle"} # loaded, last run OK
335
+ else:
336
+ launchd_statuses[name] = {"status": "error"} # loaded, last run failed
337
+ break
338
+ if not found:
339
+ launchd_statuses[name] = {"status": "not_loaded"}
340
+
341
+ services = {
342
+ "ollama": {"status": "up" if ollama_code == "200" else "down"},
343
+ "mlx": {"status": "up" if mlx_code == "200" else "down"},
344
+ "telegram_bot": {"status": "up" if telegram_out and not telegram_out.startswith("-\t") else "down"},
345
+ "railway": {"status": "up" if railway_code == "200" else "down"},
346
+ "brainlayer_daemon": {
347
+ "status": "up",
348
+ "chunks": vector_store.count() if vector_store else 0,
349
+ },
350
+ **launchd_statuses,
351
+ }
352
+
353
+ return {"services": services}
354
+
355
+
356
+ # ──────────────────────────────────────────────
357
+ # Stats / Token usage endpoints
358
+ # ──────────────────────────────────────────────
359
+
360
+
361
+ @app.get("/stats/tokens")
362
+ async def stats_tokens(days: int = 7):
363
+ """Token usage summary from Supabase llm_usage table."""
364
+ from datetime import datetime, timedelta, timezone
365
+
366
+ cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ")
367
+
368
+ entries = await asyncio.to_thread(
369
+ _supabase_get,
370
+ "llm_usage",
371
+ f"select=model,source,input_tokens,output_tokens,cost_usd,tier,created_at&created_at=gte.{cutoff}&order=created_at.desc&limit=1000",
372
+ )
373
+
374
+ if not entries:
375
+ return _stats_tokens_local(days)
376
+
377
+ total_cost = sum(float(e.get("cost_usd", 0)) for e in entries)
378
+ total_input = sum(e.get("input_tokens", 0) for e in entries)
379
+ total_output = sum(e.get("output_tokens", 0) for e in entries)
380
+
381
+ # Group by model
382
+ by_model: Dict[str, Dict[str, Any]] = {}
383
+ for e in entries:
384
+ model = e.get("model", "unknown")
385
+ if model not in by_model:
386
+ by_model[model] = {"calls": 0, "input_tokens": 0, "output_tokens": 0, "cost_usd": 0.0}
387
+ by_model[model]["calls"] += 1
388
+ by_model[model]["input_tokens"] += e.get("input_tokens", 0)
389
+ by_model[model]["output_tokens"] += e.get("output_tokens", 0)
390
+ by_model[model]["cost_usd"] += float(e.get("cost_usd", 0))
391
+
392
+ # Group by day for charts
393
+ by_day: Dict[str, Dict[str, Any]] = {}
394
+ for e in entries:
395
+ day = e.get("created_at", "")[:10] # YYYY-MM-DD
396
+ if not day:
397
+ continue
398
+ if day not in by_day:
399
+ by_day[day] = {"calls": 0, "input_tokens": 0, "output_tokens": 0, "cost_usd": 0.0}
400
+ by_day[day]["calls"] += 1
401
+ by_day[day]["input_tokens"] += e.get("input_tokens", 0)
402
+ by_day[day]["output_tokens"] += e.get("output_tokens", 0)
403
+ by_day[day]["cost_usd"] += float(e.get("cost_usd", 0))
404
+
405
+ return {
406
+ "days": days,
407
+ "total_cost_usd": round(total_cost, 4),
408
+ "total_input_tokens": total_input,
409
+ "total_output_tokens": total_output,
410
+ "entry_count": len(entries),
411
+ "by_model": {k: {**v, "cost_usd": round(v["cost_usd"], 4)} for k, v in by_model.items()},
412
+ "by_day": {k: {**v, "cost_usd": round(v["cost_usd"], 4)} for k, v in sorted(by_day.items())},
413
+ "recent": entries[:20],
414
+ }
415
+
416
+
417
+ def _stats_tokens_local(days: int) -> Dict[str, Any]:
418
+ """Fallback: read from local api_costs.jsonl with date filtering."""
419
+ if not API_COSTS_PATH.exists():
420
+ return {
421
+ "entries": [],
422
+ "total_cost_usd": 0,
423
+ "total_input_tokens": 0,
424
+ "total_output_tokens": 0,
425
+ }
426
+
427
+ from datetime import datetime, timedelta, timezone
428
+
429
+ cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%SZ")
430
+
431
+ entries: List[Dict[str, Any]] = []
432
+ total_cost = 0.0
433
+ total_input = 0
434
+ total_output = 0
435
+
436
+ with open(API_COSTS_PATH) as f:
437
+ for line in f:
438
+ line = line.strip()
439
+ if not line:
440
+ continue
441
+ try:
442
+ entry = json.loads(line)
443
+ # Filter by timestamp if present
444
+ ts = entry.get("timestamp", "")
445
+ if ts and ts < cutoff:
446
+ continue
447
+ entries.append(entry)
448
+ total_cost += entry.get("cost_usd", 0)
449
+ total_input += entry.get("input_tokens", 0)
450
+ total_output += entry.get("output_tokens", 0)
451
+ except json.JSONDecodeError:
452
+ continue
453
+
454
+ return {
455
+ "days": days,
456
+ "entries": entries[-50:],
457
+ "total_cost_usd": round(total_cost, 4),
458
+ "total_input_tokens": total_input,
459
+ "total_output_tokens": total_output,
460
+ "entry_count": len(entries),
461
+ }
462
+
463
+
464
+ @app.get("/stats/enrichment")
465
+ async def stats_enrichment():
466
+ """BrainLayer enrichment progress — how many chunks have tags, summaries, importance."""
467
+ if not vector_store:
468
+ raise HTTPException(status_code=503, detail="Vector store not initialized")
469
+
470
+ db_path = DEFAULT_DB_PATH
471
+ conn = apsw.Connection(str(db_path), flags=apsw.SQLITE_OPEN_READONLY)
472
+ cursor = conn.cursor()
473
+
474
+ try:
475
+ total = list(cursor.execute("SELECT COUNT(*) FROM chunks"))[0][0]
476
+ has_tags = list(cursor.execute("SELECT COUNT(*) FROM chunks WHERE tags IS NOT NULL AND tags != ''"))[0][0]
477
+ has_summary = list(cursor.execute("SELECT COUNT(*) FROM chunks WHERE summary IS NOT NULL AND summary != ''"))[
478
+ 0
479
+ ][0]
480
+ has_importance = list(cursor.execute("SELECT COUNT(*) FROM chunks WHERE importance IS NOT NULL"))[0][0]
481
+ has_intent = list(cursor.execute("SELECT COUNT(*) FROM chunks WHERE intent IS NOT NULL AND intent != ''"))[0][0]
482
+
483
+ # Embeddings count
484
+ try:
485
+ has_embeddings = list(cursor.execute("SELECT COUNT(*) FROM chunk_vectors_rowids"))[0][0]
486
+ except Exception:
487
+ has_embeddings = 0
488
+
489
+ # Projects breakdown
490
+ projects = list(
491
+ cursor.execute("""
492
+ SELECT project, COUNT(*) as cnt
493
+ FROM chunks
494
+ WHERE project IS NOT NULL
495
+ GROUP BY project
496
+ ORDER BY cnt DESC
497
+ LIMIT 20
498
+ """)
499
+ )
500
+
501
+ return {
502
+ "total_chunks": total,
503
+ "embeddings": {
504
+ "count": has_embeddings,
505
+ "pct": round(has_embeddings * 100 / total, 1) if total else 0,
506
+ },
507
+ "tags": {"count": has_tags, "pct": round(has_tags * 100 / total, 1) if total else 0},
508
+ "summaries": {
509
+ "count": has_summary,
510
+ "pct": round(has_summary * 100 / total, 1) if total else 0,
511
+ },
512
+ "importance": {
513
+ "count": has_importance,
514
+ "pct": round(has_importance * 100 / total, 1) if total else 0,
515
+ },
516
+ "intent": {
517
+ "count": has_intent,
518
+ "pct": round(has_intent * 100 / total, 1) if total else 0,
519
+ },
520
+ "projects": [{"project": p, "chunks": c} for p, c in projects],
521
+ }
522
+ finally:
523
+ conn.close()
524
+
525
+
526
+ # ──────────────────────────────────────────────
527
+ # Events + Service Runs
528
+ # ──────────────────────────────────────────────
529
+
530
+ _cached_ssl_ctx = None
531
+
532
+
533
+ def _supabase_ssl_ctx():
534
+ """Get SSL context that works on macOS (uses certifi if available). Cached."""
535
+ global _cached_ssl_ctx
536
+ if _cached_ssl_ctx is not None:
537
+ return _cached_ssl_ctx
538
+ import ssl
539
+
540
+ try:
541
+ import certifi
542
+
543
+ _cached_ssl_ctx = ssl.create_default_context(cafile=certifi.where())
544
+ except ImportError:
545
+ _cached_ssl_ctx = ssl.create_default_context()
546
+ return _cached_ssl_ctx
547
+
548
+
549
+ def _supabase_get(path: str, params: str = "") -> list:
550
+ """Fetch from Supabase REST API. Returns list of rows or empty on error."""
551
+ supabase_url = os.environ.get("SUPABASE_URL")
552
+ supabase_key = os.environ.get("SUPABASE_SERVICE_KEY") or os.environ.get("SUPABASE_ANON_KEY")
553
+ if not supabase_url or not supabase_key:
554
+ return []
555
+ import urllib.request
556
+
557
+ try:
558
+ url = f"{supabase_url}/rest/v1/{path}{'?' + params if params else ''}"
559
+ req = urllib.request.Request(
560
+ url,
561
+ headers={
562
+ "apikey": supabase_key,
563
+ "Authorization": f"Bearer {supabase_key}",
564
+ },
565
+ )
566
+ with urllib.request.urlopen(req, timeout=5, context=_supabase_ssl_ctx()) as resp:
567
+ return json.loads(resp.read())
568
+ except Exception as e:
569
+ logger.warning(f"Supabase query failed ({path}): {e}")
570
+ return []
571
+
572
+
573
+ @app.get("/events/recent")
574
+ async def events_recent(limit: int = 50):
575
+ """Recent golem events from Supabase."""
576
+ rows = await asyncio.to_thread(
577
+ _supabase_get,
578
+ "golem_events",
579
+ f"select=actor,type,data,created_at&order=created_at.desc&limit={max(1, min(limit, 100))}",
580
+ )
581
+ return {"events": rows, "count": len(rows)}
582
+
583
+
584
+ @app.get("/stats/service-runs")
585
+ async def stats_service_runs(limit: int = 20):
586
+ """Recent service runs from Supabase."""
587
+ rows = await asyncio.to_thread(
588
+ _supabase_get,
589
+ "service_runs",
590
+ f"select=service,started_at,ended_at,duration_ms,status,error&order=started_at.desc&limit={max(1, min(limit, 50))}",
591
+ )
592
+ return {"runs": rows, "count": len(rows)}
593
+
594
+
595
+ # ──────────────────────────────────────────────
596
+ # Content Pipeline
597
+ # ──────────────────────────────────────────────
598
+
599
+
600
+ @app.get("/content/pipeline-runs")
601
+ async def content_pipeline_runs(limit: int = 50):
602
+ """Recent pipeline runs from Supabase."""
603
+ rows = await asyncio.to_thread(
604
+ _supabase_get,
605
+ "pipeline_runs",
606
+ f"select=id,pipeline_id,idea,idea_type,success,duration_ms,quality_score,user_feedback,output_format,error,created_at&order=created_at.desc&limit={max(1, min(limit, 100))}",
607
+ )
608
+ return {"runs": rows, "count": len(rows)}
609
+
610
+
611
+ @app.get("/content/pipeline-stats")
612
+ async def content_pipeline_stats():
613
+ """Aggregate pipeline performance stats."""
614
+ rows = await asyncio.to_thread(
615
+ _supabase_get,
616
+ "pipeline_runs",
617
+ "select=pipeline_id,success,duration_ms,quality_score,idea_type&order=created_at.desc&limit=500",
618
+ )
619
+ # Aggregate by pipeline
620
+ stats: dict[str, dict] = {}
621
+ for r in rows:
622
+ pid = r.get("pipeline_id", "unknown")
623
+ if pid not in stats:
624
+ stats[pid] = {
625
+ "total": 0,
626
+ "success": 0,
627
+ "quality_sum": 0.0,
628
+ "quality_count": 0,
629
+ "duration_sum": 0,
630
+ "idea_types": {},
631
+ }
632
+ s = stats[pid]
633
+ s["total"] += 1
634
+ if r.get("success"):
635
+ s["success"] += 1
636
+ qs = r.get("quality_score")
637
+ if qs is not None:
638
+ s["quality_sum"] += float(qs)
639
+ s["quality_count"] += 1
640
+ s["duration_sum"] += int(r.get("duration_ms") or 0)
641
+ it = r.get("idea_type", "general")
642
+ s["idea_types"][it] = s["idea_types"].get(it, 0) + 1
643
+
644
+ result = []
645
+ for pid, s in stats.items():
646
+ top_types = sorted(s["idea_types"].items(), key=lambda x: -x[1])[:3]
647
+ result.append(
648
+ {
649
+ "pipeline_id": pid,
650
+ "total_runs": s["total"],
651
+ "successful_runs": s["success"],
652
+ "success_rate": round(s["success"] / s["total"], 2) if s["total"] > 0 else 0,
653
+ "avg_quality": round(s["quality_sum"] / s["quality_count"], 2) if s["quality_count"] > 0 else None,
654
+ "avg_duration_ms": round(s["duration_sum"] / s["total"]) if s["total"] > 0 else 0,
655
+ "top_idea_types": [t[0] for t in top_types],
656
+ }
657
+ )
658
+ return {"stats": result, "total_runs": len(rows)}
659
+
660
+
661
+ # ──────────────────────────────────────────────
662
+ # Backlog CRUD
663
+ # ──────────────────────────────────────────────
664
+
665
+
666
+ def _supabase_mutate(method: str, path: str, body: dict | None = None, params: str = "") -> dict | list | None:
667
+ """POST/PATCH/DELETE to Supabase REST API. Returns parsed JSON or None."""
668
+ supabase_url = os.environ.get("SUPABASE_URL")
669
+ supabase_key = os.environ.get("SUPABASE_SERVICE_KEY") or os.environ.get("SUPABASE_ANON_KEY")
670
+ if not supabase_url or not supabase_key:
671
+ return None
672
+ import urllib.request
673
+
674
+ try:
675
+ url = f"{supabase_url}/rest/v1/{path}{'?' + params if params else ''}"
676
+ data = json.dumps(body).encode() if body else None
677
+ req = urllib.request.Request(
678
+ url,
679
+ data=data,
680
+ method=method,
681
+ headers={
682
+ "apikey": supabase_key,
683
+ "Authorization": f"Bearer {supabase_key}",
684
+ "Content-Type": "application/json",
685
+ "Prefer": "return=representation",
686
+ },
687
+ )
688
+ with urllib.request.urlopen(req, timeout=5, context=_supabase_ssl_ctx()) as resp:
689
+ return json.loads(resp.read())
690
+ except Exception as e:
691
+ logger.warning(f"Supabase {method} failed ({path}): {e}")
692
+ return None
693
+
694
+
695
+ @app.get("/backlog/items")
696
+ async def backlog_list(project: str = "", status: str = ""):
697
+ """List backlog items, optionally filtered by project and/or status."""
698
+ from urllib.parse import quote
699
+
700
+ params = "select=*&order=updated_at.desc&limit=200"
701
+ if project:
702
+ params += f"&project=eq.{quote(project, safe='')}"
703
+ if status:
704
+ params += f"&status=eq.{quote(status, safe='')}"
705
+ rows = await asyncio.to_thread(_supabase_get, "backlog_items", params)
706
+ return {"items": rows, "count": len(rows)}
707
+
708
+
709
+ @app.post("/backlog/items")
710
+ async def backlog_create(request: Request):
711
+ """Create a new backlog item."""
712
+ body = await request.json()
713
+ # Validate required fields
714
+ if not body.get("title"):
715
+ return JSONResponse({"error": "title is required"}, status_code=400)
716
+ item = {
717
+ "title": body["title"],
718
+ "project": body.get("project", ""),
719
+ "description": body.get("description"),
720
+ "status": body.get("status", "backlog"),
721
+ "priority": body.get("priority", "medium"),
722
+ "tags": body.get("tags", []),
723
+ "created_by": body.get("created_by", "dashboard"),
724
+ }
725
+ result = await asyncio.to_thread(_supabase_mutate, "POST", "backlog_items", item)
726
+ if result and len(result) > 0:
727
+ return result[0]
728
+ return JSONResponse({"error": "failed to create item"}, status_code=500)
729
+
730
+
731
+ @app.patch("/backlog/items/{item_id}")
732
+ async def backlog_update(item_id: str, request: Request):
733
+ """Update a backlog item."""
734
+ from urllib.parse import quote
735
+
736
+ body = await request.json()
737
+ # Only allow safe fields
738
+ allowed = {"title", "description", "status", "priority", "tags", "project"}
739
+ update = {k: v for k, v in body.items() if k in allowed}
740
+ if not update:
741
+ return JSONResponse({"error": "no valid fields to update"}, status_code=400)
742
+ result = await asyncio.to_thread(
743
+ _supabase_mutate, "PATCH", "backlog_items", update, f"id=eq.{quote(item_id, safe='')}"
744
+ )
745
+ if result and len(result) > 0:
746
+ return result[0]
747
+ return JSONResponse({"error": "item not found or update failed"}, status_code=404)
748
+
749
+
750
+ @app.delete("/backlog/items/{item_id}")
751
+ async def backlog_delete(item_id: str):
752
+ """Delete a backlog item."""
753
+ from urllib.parse import quote
754
+
755
+ result = await asyncio.to_thread(
756
+ _supabase_mutate, "DELETE", "backlog_items", None, f"id=eq.{quote(item_id, safe='')}"
757
+ )
758
+ if result is None:
759
+ return JSONResponse({"error": "delete failed"}, status_code=500)
760
+ return {"deleted": True, "count": len(result) if isinstance(result, list) else 0}
761
+
762
+
763
+ # ──────────────────────────────────────────────
764
+ # Dashboard Search & Session Detail
765
+ # ──────────────────────────────────────────────
766
+
767
+
768
+ @app.get("/dashboard/search")
769
+ async def dashboard_search(q: str = "", project: str = "", content_type: str = "", limit: int = 20):
770
+ """Fast FTS5 text search across all chunks. Returns ranked results with snippets."""
771
+ if not q.strip():
772
+ return {"results": [], "query": q, "total": 0, "time_ms": 0}
773
+
774
+ limit = max(1, min(limit, 100))
775
+ start_time = time.time()
776
+
777
+ def _run_search():
778
+ import re
779
+
780
+ conn = apsw.Connection(str(DEFAULT_DB_PATH), flags=apsw.SQLITE_OPEN_READONLY)
781
+ cursor = conn.cursor()
782
+ try:
783
+ # Build FTS5 match expression: split words, join with AND
784
+ # Strip FTS5 special chars except quotes
785
+ words = re.findall(r"[a-zA-Z0-9_]+", q)
786
+ if not words:
787
+ return []
788
+ match_expr = " AND ".join(f'"{w}"' for w in words)
789
+
790
+ # Build WHERE clauses for filters
791
+ where_parts = []
792
+ params: list = []
793
+ if project:
794
+ where_parts.append("c.project LIKE ?")
795
+ params.append(f"%{project}%")
796
+ if content_type:
797
+ where_parts.append("c.content_type = ?")
798
+ params.append(content_type)
799
+ where_clause = (" AND " + " AND ".join(where_parts)) if where_parts else ""
800
+
801
+ sql = f"""
802
+ SELECT c.id, c.content_type, c.project, c.conversation_id,
803
+ c.importance, c.tags, c.summary, c.intent,
804
+ snippet(chunks_fts, 0, '<mark>', '</mark>', '...', 40) as snippet,
805
+ fts.rank
806
+ FROM chunks_fts fts
807
+ JOIN chunks c ON c.id = fts.chunk_id
808
+ WHERE chunks_fts MATCH ?{where_clause}
809
+ ORDER BY fts.rank
810
+ LIMIT ?
811
+ """
812
+ all_params = [match_expr] + params + [limit]
813
+ rows = list(cursor.execute(sql, all_params))
814
+ return rows
815
+ except Exception as e:
816
+ logger.warning(f"Dashboard search failed: {e}")
817
+ return []
818
+ finally:
819
+ conn.close()
820
+
821
+ rows = await asyncio.to_thread(_run_search)
822
+ elapsed = (time.time() - start_time) * 1000
823
+
824
+ def _sanitize_snippet(raw: str) -> str:
825
+ """Escape HTML in FTS5 snippet except <mark> tags (defense-in-depth)."""
826
+ import html
827
+
828
+ escaped = html.escape(raw)
829
+ return escaped.replace("&lt;mark&gt;", "<mark>").replace("&lt;/mark&gt;", "</mark>")
830
+
831
+ results = []
832
+ for row in rows:
833
+ (
834
+ chunk_id,
835
+ content_type,
836
+ proj,
837
+ conv_id,
838
+ importance,
839
+ tags,
840
+ summary,
841
+ intent,
842
+ snippet_text,
843
+ rank,
844
+ ) = row
845
+ results.append(
846
+ {
847
+ "id": chunk_id,
848
+ "content_type": content_type,
849
+ "project": proj,
850
+ "conversation_id": conv_id,
851
+ "importance": importance,
852
+ "tags": tags,
853
+ "summary": summary,
854
+ "intent": intent,
855
+ "snippet": _sanitize_snippet(snippet_text) if snippet_text else "",
856
+ "rank": rank,
857
+ }
858
+ )
859
+
860
+ return {"results": results, "query": q, "total": len(results), "time_ms": round(elapsed, 1)}
861
+
862
+
863
+ @app.get("/session/{session_id:path}")
864
+ async def session_detail(session_id: str, page: int = 1, per_page: int = 50, content_type: str = ""):
865
+ """Get session detail: chunks (paginated), files touched, metadata.
866
+
867
+ Sessions are matched by conversation_id OR by chunk ID prefix (for newer chunks
868
+ that don't have conversation_id set). The chunk ID format is '{jsonl_path}:{N}'.
869
+ Optionally filter by content_type.
870
+ """
871
+ per_page = max(1, min(per_page, 200))
872
+ offset = (max(1, page) - 1) * per_page
873
+
874
+ def _get_session():
875
+ conn = apsw.Connection(str(DEFAULT_DB_PATH), flags=apsw.SQLITE_OPEN_READONLY)
876
+ cursor = conn.cursor()
877
+ try:
878
+ # Try conversation_id first, then fall back to ID prefix match
879
+ total = list(cursor.execute("SELECT COUNT(*) FROM chunks WHERE conversation_id = ?", [session_id]))[0][0]
880
+
881
+ if total == 0:
882
+ # Fall back to ID prefix match (chunks where id starts with session_id:)
883
+ prefix = session_id + ":"
884
+ total = list(cursor.execute("SELECT COUNT(*) FROM chunks WHERE id LIKE ? || '%'", [prefix]))[0][0]
885
+ if total == 0:
886
+ # Also try exact prefix without colon (e.g. the path itself)
887
+ total = list(cursor.execute("SELECT COUNT(*) FROM chunks WHERE id LIKE ? || '%'", [session_id]))[0][
888
+ 0
889
+ ]
890
+ if total == 0:
891
+ return None
892
+ id_filter = ("id LIKE ? || '%'", [session_id])
893
+ else:
894
+ id_filter = ("id LIKE ? || '%'", [prefix])
895
+ else:
896
+ id_filter = ("conversation_id = ?", [session_id])
897
+
898
+ where, wparams = id_filter
899
+
900
+ # Add type filter if specified
901
+ type_where = ""
902
+ type_params: list = []
903
+ if content_type:
904
+ type_where = " AND content_type = ?"
905
+ type_params = [content_type]
906
+ # Recalculate total with type filter
907
+ total = list(
908
+ cursor.execute(
909
+ f"SELECT COUNT(*) FROM chunks WHERE {where}{type_where}",
910
+ wparams + type_params,
911
+ )
912
+ )[0][0]
913
+
914
+ # Paginated chunks
915
+ chunks = list(
916
+ cursor.execute(
917
+ f"""
918
+ SELECT id, content_type, project, position, importance,
919
+ tags, summary, intent, content, source_file
920
+ FROM chunks
921
+ WHERE {where}{type_where}
922
+ ORDER BY position ASC, rowid ASC
923
+ LIMIT ? OFFSET ?
924
+ """,
925
+ wparams + type_params + [per_page, offset],
926
+ )
927
+ )
928
+
929
+ # Session context (if available)
930
+ ctx = list(
931
+ cursor.execute(
932
+ """
933
+ SELECT session_id, project, branch, pr_number, commit_shas,
934
+ files_changed, started_at, ended_at, created_at,
935
+ plan_name, plan_phase, story_id
936
+ FROM session_context WHERE session_id = ?
937
+ """,
938
+ [session_id],
939
+ )
940
+ )
941
+
942
+ # Unique files touched in this session
943
+ files = list(
944
+ cursor.execute(
945
+ f"""
946
+ SELECT DISTINCT source_file
947
+ FROM chunks
948
+ WHERE {where} AND source_file IS NOT NULL AND source_file != ''
949
+ ORDER BY source_file
950
+ """,
951
+ wparams,
952
+ )
953
+ )
954
+
955
+ # Content type distribution
956
+ type_dist = list(
957
+ cursor.execute(
958
+ f"""
959
+ SELECT content_type, COUNT(*) as cnt
960
+ FROM chunks
961
+ WHERE {where}
962
+ GROUP BY content_type
963
+ ORDER BY cnt DESC
964
+ """,
965
+ wparams,
966
+ )
967
+ )
968
+
969
+ return {
970
+ "total": total,
971
+ "chunks": chunks,
972
+ "context": ctx,
973
+ "files": files,
974
+ "type_distribution": type_dist,
975
+ }
976
+ finally:
977
+ conn.close()
978
+
979
+ data = await asyncio.to_thread(_get_session)
980
+ if data is None:
981
+ raise HTTPException(status_code=404, detail=f"Session {session_id} not found")
982
+
983
+ # Format chunks
984
+ formatted_chunks = []
985
+ for c in data["chunks"]:
986
+ cid, ctype, proj, pos, imp, tags, summary, intent, content, src = c
987
+ formatted_chunks.append(
988
+ {
989
+ "id": cid,
990
+ "content_type": ctype,
991
+ "project": proj,
992
+ "position": pos,
993
+ "importance": imp,
994
+ "tags": tags,
995
+ "summary": summary,
996
+ "intent": intent,
997
+ "content": content[:2000] if content else None, # Truncate large content
998
+ "source_file": src,
999
+ }
1000
+ )
1001
+
1002
+ # Format session context
1003
+ ctx_data = None
1004
+ if data["context"]:
1005
+ row = data["context"][0]
1006
+ ctx_data = {
1007
+ "session_id": row[0],
1008
+ "project": row[1],
1009
+ "branch": row[2],
1010
+ "pr_number": row[3],
1011
+ "commit_shas": row[4],
1012
+ "files_changed": row[5],
1013
+ "started_at": row[6],
1014
+ "ended_at": row[7],
1015
+ "created_at": row[8],
1016
+ "plan_name": row[9] if len(row) > 9 else None,
1017
+ "plan_phase": row[10] if len(row) > 10 else None,
1018
+ "story_id": row[11] if len(row) > 11 else None,
1019
+ }
1020
+
1021
+ return {
1022
+ "session_id": session_id,
1023
+ "total_chunks": data["total"],
1024
+ "page": page,
1025
+ "per_page": per_page,
1026
+ "chunks": formatted_chunks,
1027
+ "context": ctx_data,
1028
+ "files": [f[0] for f in data["files"]],
1029
+ "type_distribution": {t: c for t, c in data["type_distribution"]},
1030
+ }
1031
+
1032
+
1033
+ # ──────────────────────────────────────────────
1034
+ # Server startup
1035
+ # ──────────────────────────────────────────────
1036
+
1037
+
1038
+ def setup_signal_handlers():
1039
+ """Setup graceful shutdown signal handlers."""
1040
+
1041
+ def signal_handler(signum, frame):
1042
+ logger.info(f"Received signal {signum}, shutting down...")
1043
+ sys.exit(0)
1044
+
1045
+ signal.signal(signal.SIGTERM, signal_handler)
1046
+ signal.signal(signal.SIGINT, signal_handler)
1047
+
1048
+
1049
+ def main():
1050
+ """Main daemon entry point."""
1051
+ import argparse
1052
+
1053
+ parser = argparse.ArgumentParser(description="BrainLayer daemon")
1054
+ parser.add_argument("--http", type=int, default=None, help="Also serve on HTTP port (e.g. --http 8787)")
1055
+ parser.add_argument("--host", type=str, default="127.0.0.1", help="HTTP bind address (default: 127.0.0.1)")
1056
+ args = parser.parse_args()
1057
+
1058
+ global http_port
1059
+ http_port = args.http
1060
+
1061
+ # Setup logging
1062
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
1063
+
1064
+ setup_signal_handlers()
1065
+
1066
+ if SOCKET_PATH.exists():
1067
+ SOCKET_PATH.unlink()
1068
+
1069
+ if args.http:
1070
+ # Dual mode: unix socket + HTTP port
1071
+ asyncio.run(_run_dual(args.http, host=args.host))
1072
+ else:
1073
+ # Socket-only mode (backward compatible)
1074
+ config = uvicorn.Config(app, uds=str(SOCKET_PATH), log_level="info", access_log=False)
1075
+ server = uvicorn.Server(config)
1076
+ try:
1077
+ server.run()
1078
+ except KeyboardInterrupt:
1079
+ logger.info("Daemon stopped by user")
1080
+ except Exception as e:
1081
+ logger.error(f"Daemon failed: {e}")
1082
+ sys.exit(1)
1083
+
1084
+
1085
+ async def _run_dual(port: int, host: str = "127.0.0.1"):
1086
+ """Run both unix socket and HTTP servers concurrently."""
1087
+ socket_config = uvicorn.Config(app, uds=str(SOCKET_PATH), log_level="info", access_log=False)
1088
+ http_config = uvicorn.Config(app, host=host, port=port, log_level="info", access_log=False)
1089
+
1090
+ socket_server = uvicorn.Server(socket_config)
1091
+ http_server = uvicorn.Server(http_config)
1092
+
1093
+ logger.info(f"Starting dual mode: socket={SOCKET_PATH}, http={host}:{port}")
1094
+
1095
+ try:
1096
+ await asyncio.gather(
1097
+ socket_server.serve(),
1098
+ http_server.serve(),
1099
+ )
1100
+ except KeyboardInterrupt:
1101
+ logger.info("Daemon stopped by user")
1102
+
1103
+
1104
+ if __name__ == "__main__":
1105
+ main()