researchloop 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. researchloop/__init__.py +1 -0
  2. researchloop/__main__.py +3 -0
  3. researchloop/cli.py +1138 -0
  4. researchloop/clusters/__init__.py +4 -0
  5. researchloop/clusters/monitor.py +199 -0
  6. researchloop/clusters/ssh.py +183 -0
  7. researchloop/comms/__init__.py +0 -0
  8. researchloop/comms/base.py +34 -0
  9. researchloop/comms/conversation.py +465 -0
  10. researchloop/comms/ntfy.py +95 -0
  11. researchloop/comms/router.py +71 -0
  12. researchloop/comms/slack.py +188 -0
  13. researchloop/core/__init__.py +0 -0
  14. researchloop/core/auth.py +78 -0
  15. researchloop/core/config.py +328 -0
  16. researchloop/core/credentials.py +38 -0
  17. researchloop/core/models.py +119 -0
  18. researchloop/core/orchestrator.py +910 -0
  19. researchloop/dashboard/__init__.py +0 -0
  20. researchloop/dashboard/app.py +15 -0
  21. researchloop/dashboard/auth.py +60 -0
  22. researchloop/dashboard/routes.py +912 -0
  23. researchloop/dashboard/templates/base.html +84 -0
  24. researchloop/dashboard/templates/login.html +12 -0
  25. researchloop/dashboard/templates/loop_detail.html +58 -0
  26. researchloop/dashboard/templates/loops.html +61 -0
  27. researchloop/dashboard/templates/setup.html +14 -0
  28. researchloop/dashboard/templates/sprint_detail.html +109 -0
  29. researchloop/dashboard/templates/sprints.html +48 -0
  30. researchloop/dashboard/templates/studies.html +18 -0
  31. researchloop/dashboard/templates/study_detail.html +64 -0
  32. researchloop/db/__init__.py +5 -0
  33. researchloop/db/database.py +86 -0
  34. researchloop/db/migrations.py +172 -0
  35. researchloop/db/queries.py +351 -0
  36. researchloop/runner/__init__.py +1 -0
  37. researchloop/runner/claude.py +169 -0
  38. researchloop/runner/job_templates/sge.sh.j2 +319 -0
  39. researchloop/runner/job_templates/slurm.sh.j2 +336 -0
  40. researchloop/runner/main.py +156 -0
  41. researchloop/runner/pipeline.py +272 -0
  42. researchloop/runner/templates/fix_issues.md.j2 +11 -0
  43. researchloop/runner/templates/idea_generator.md.j2 +16 -0
  44. researchloop/runner/templates/red_team.md.j2 +15 -0
  45. researchloop/runner/templates/report.md.j2 +31 -0
  46. researchloop/runner/templates/research_sprint.md.j2 +51 -0
  47. researchloop/runner/templates/summarizer.md.j2 +7 -0
  48. researchloop/runner/upload.py +153 -0
  49. researchloop/schedulers/__init__.py +11 -0
  50. researchloop/schedulers/base.py +43 -0
  51. researchloop/schedulers/local.py +188 -0
  52. researchloop/schedulers/sge.py +163 -0
  53. researchloop/schedulers/slurm.py +179 -0
  54. researchloop/sprints/__init__.py +0 -0
  55. researchloop/sprints/auto_loop.py +458 -0
  56. researchloop/sprints/manager.py +750 -0
  57. researchloop/studies/__init__.py +0 -0
  58. researchloop/studies/manager.py +102 -0
  59. researchloop-0.1.0.dist-info/METADATA +596 -0
  60. researchloop-0.1.0.dist-info/RECORD +63 -0
  61. researchloop-0.1.0.dist-info/WHEEL +4 -0
  62. researchloop-0.1.0.dist-info/entry_points.txt +3 -0
  63. researchloop-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,910 @@
1
+ """Main orchestrator -- ties every subsystem together and exposes a FastAPI app."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from contextlib import asynccontextmanager
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from fastapi import FastAPI, Header, HTTPException, Request, UploadFile
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ from fastapi.responses import JSONResponse
15
+
16
+ from researchloop.clusters.monitor import JobMonitor
17
+ from researchloop.clusters.ssh import SSHManager
18
+ from researchloop.comms.conversation import ConversationManager
19
+ from researchloop.comms.ntfy import NtfyNotifier
20
+ from researchloop.comms.router import NotificationRouter
21
+ from researchloop.comms.slack import (
22
+ SlackNotifier,
23
+ verify_slack_signature,
24
+ )
25
+ from researchloop.core.config import Config
26
+ from researchloop.db.database import Database
27
+ from researchloop.schedulers.base import BaseScheduler
28
+ from researchloop.sprints.auto_loop import AutoLoopController
29
+ from researchloop.sprints.manager import SprintManager
30
+ from researchloop.studies.manager import StudyManager
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class Orchestrator:
36
+ """Central coordinator that initialises and owns every subsystem.
37
+
38
+ Call :meth:`start` to bring everything up and :meth:`stop` for a
39
+ clean shutdown.
40
+ """
41
+
42
+ def __init__(self, config: Config) -> None:
43
+ self.config = config
44
+
45
+ # Subsystem references (populated by start()).
46
+ self.db: Database | None = None
47
+ self.ssh_manager: SSHManager | None = None
48
+ self.schedulers: dict[str, BaseScheduler] = {}
49
+ self.study_manager: StudyManager | None = None
50
+ self.sprint_manager: SprintManager | None = None
51
+ self.auto_loop: AutoLoopController | None = None
52
+ self.notification_router: NotificationRouter | None = None
53
+ self.job_monitor: JobMonitor | None = None
54
+ self.conversation_manager: ConversationManager | None = None
55
+
56
+ # ------------------------------------------------------------------
57
+ # Lifecycle
58
+ # ------------------------------------------------------------------
59
+
60
+ async def start(self) -> None:
61
+ """Initialise the database, managers, and background tasks."""
62
+ logger.info("Orchestrator starting...")
63
+
64
+ # 1. Database
65
+ self.db = Database(self.config.db_path)
66
+ await self.db.connect()
67
+ logger.info("Database connected: %s", self.config.db_path)
68
+
69
+ # 2. SSH manager
70
+ self.ssh_manager = SSHManager()
71
+
72
+ # 3. Schedulers -- import concrete implementations lazily so that
73
+ # the base package has no hard dependency on them.
74
+ self.schedulers = _build_schedulers(self.config)
75
+
76
+ # 4. Study manager
77
+ self.study_manager = StudyManager(self.db, self.config)
78
+ await self.study_manager.sync_from_config()
79
+
80
+ # 5. Notification router
81
+ self.notification_router = NotificationRouter()
82
+ if self.config.ntfy and self.config.ntfy.topic:
83
+ ntfy = NtfyNotifier(
84
+ url=self.config.ntfy.url,
85
+ topic=self.config.ntfy.topic,
86
+ )
87
+ self.notification_router.add_notifier(ntfy)
88
+ logger.info(
89
+ "ntfy notifier configured for topic %r",
90
+ self.config.ntfy.topic,
91
+ )
92
+ if self.config.slack and self.config.slack.bot_token:
93
+ slack_notifier = SlackNotifier(
94
+ bot_token=self.config.slack.bot_token,
95
+ channel_id=self.config.slack.channel_id,
96
+ dashboard_url=self.config.orchestrator_url,
97
+ )
98
+ self.notification_router.add_notifier(slack_notifier)
99
+ logger.info("Slack notifier configured")
100
+
101
+ # 6. Sprint manager
102
+ self.sprint_manager = SprintManager(
103
+ db=self.db,
104
+ config=self.config,
105
+ ssh_manager=self.ssh_manager,
106
+ schedulers=self.schedulers,
107
+ study_manager=self.study_manager,
108
+ notification_router=self.notification_router,
109
+ )
110
+
111
+ # 6b. Conversation manager
112
+ self.conversation_manager = ConversationManager(
113
+ self.db, sprint_manager=self.sprint_manager
114
+ )
115
+
116
+ # Wire conversation manager to Slack notifier
117
+ # so notifications store thread context.
118
+ if self.config.slack and self.config.slack.bot_token:
119
+ for n in self.notification_router._notifiers:
120
+ if isinstance(n, SlackNotifier):
121
+ n._cm = self.conversation_manager
122
+
123
+ # 7. Auto-loop controller
124
+ self.auto_loop = AutoLoopController(
125
+ db=self.db,
126
+ sprint_manager=self.sprint_manager,
127
+ config=self.config,
128
+ )
129
+
130
+ # 8. Job monitor
131
+ self.job_monitor = JobMonitor(
132
+ ssh_manager=self.ssh_manager,
133
+ db=self.db,
134
+ schedulers=self.schedulers,
135
+ config=self.config,
136
+ )
137
+ await self.job_monitor.start_polling()
138
+
139
+ logger.info("Orchestrator started.")
140
+
141
+ async def stop(self) -> None:
142
+ """Shut down all subsystems cleanly."""
143
+ logger.info("Orchestrator shutting down...")
144
+
145
+ if self.job_monitor is not None:
146
+ await self.job_monitor.stop_polling()
147
+
148
+ if self.ssh_manager is not None:
149
+ await self.ssh_manager.close_all()
150
+
151
+ if self.db is not None:
152
+ await self.db.close()
153
+
154
+ logger.info("Orchestrator stopped.")
155
+
156
+
157
+ # ----------------------------------------------------------------------
158
+ # Scheduler factory
159
+ # ----------------------------------------------------------------------
160
+
161
+
162
+ def _build_schedulers(config: Config) -> dict[str, BaseScheduler]:
163
+ """Build a scheduler instance for every cluster in *config*.
164
+
165
+ The dict is keyed by cluster name **and** by scheduler type so that
166
+ lookups by either key succeed.
167
+ """
168
+ schedulers: dict[str, BaseScheduler] = {}
169
+
170
+ for cluster in config.clusters:
171
+ stype = cluster.scheduler_type
172
+ if stype in schedulers:
173
+ # Reuse an existing scheduler of the same type.
174
+ schedulers[cluster.name] = schedulers[stype]
175
+ continue
176
+
177
+ scheduler: BaseScheduler | None = None
178
+ try:
179
+ if stype == "slurm":
180
+ from researchloop.schedulers.slurm import (
181
+ SlurmScheduler, # type: ignore[import-not-found]
182
+ )
183
+
184
+ scheduler = SlurmScheduler()
185
+ elif stype == "sge":
186
+ from researchloop.schedulers.sge import (
187
+ SGEScheduler, # type: ignore[import-not-found]
188
+ )
189
+
190
+ scheduler = SGEScheduler()
191
+ elif stype == "local":
192
+ from researchloop.schedulers.local import (
193
+ LocalScheduler, # type: ignore[import-not-found]
194
+ )
195
+
196
+ scheduler = LocalScheduler()
197
+ else:
198
+ logger.warning(
199
+ "Unknown scheduler type %r for cluster %r", stype, cluster.name
200
+ )
201
+ except ImportError:
202
+ logger.warning(
203
+ "Scheduler %r not available (import failed) for cluster %r",
204
+ stype,
205
+ cluster.name,
206
+ )
207
+
208
+ if scheduler is not None:
209
+ schedulers[cluster.name] = scheduler
210
+ schedulers[stype] = scheduler
211
+
212
+ return schedulers
213
+
214
+
215
+ # ======================================================================
216
+ # FastAPI application factory
217
+ # ======================================================================
218
+
219
+
220
+ def create_app(orchestrator: Orchestrator) -> FastAPI:
221
+ """Build and return the FastAPI application.
222
+
223
+ Routes:
224
+ - ``POST /api/webhook/sprint-complete``
225
+ - ``POST /api/webhook/heartbeat``
226
+ - ``POST /api/artifacts/{sprint_id}``
227
+ - ``GET /api/sprints``
228
+ - ``GET /api/sprints/{sprint_id}``
229
+ - ``GET /api/studies``
230
+ """
231
+
232
+ @asynccontextmanager
233
+ async def lifespan(app: FastAPI): # type: ignore[no-untyped-def]
234
+ await orchestrator.start()
235
+ yield
236
+ await orchestrator.stop()
237
+
238
+ app = FastAPI(
239
+ title="ResearchLoop API",
240
+ version="0.1.0",
241
+ lifespan=lifespan,
242
+ )
243
+
244
+ # -- Root redirect ---------------------------------------------------
245
+ @app.get("/")
246
+ async def root(): # type: ignore[no-untyped-def]
247
+ from fastapi.responses import RedirectResponse
248
+
249
+ return RedirectResponse("/dashboard/", status_code=303)
250
+
251
+ # -- CORS middleware ------------------------------------------------
252
+ app.add_middleware(
253
+ CORSMiddleware,
254
+ allow_origins=["*"],
255
+ allow_credentials=True,
256
+ allow_methods=["*"],
257
+ allow_headers=["*"],
258
+ )
259
+
260
+ # -- Auth helper ----------------------------------------------------
261
+
262
+ from researchloop.dashboard.auth import (
263
+ SessionManager,
264
+ check_password,
265
+ )
266
+
267
+ # Signing key is auto-generated and persisted in the DB.
268
+ # Loaded lazily on first use so the DB is ready.
269
+ _api_session_mgr: SessionManager | None = None
270
+
271
+ async def _get_session_mgr() -> SessionManager:
272
+ nonlocal _api_session_mgr
273
+ if _api_session_mgr is not None:
274
+ return _api_session_mgr
275
+
276
+ key: str | None = None
277
+ if orchestrator.db is not None:
278
+ row = await orchestrator.db.fetch_one(
279
+ "SELECT value FROM settings WHERE key = ?",
280
+ ("signing_key",),
281
+ )
282
+ if row:
283
+ key = row["value"]
284
+ else:
285
+ import secrets as _secrets
286
+
287
+ key = _secrets.token_hex(32)
288
+ await orchestrator.db.execute(
289
+ "INSERT INTO settings (key, value) VALUES (?, ?)",
290
+ ("signing_key", key),
291
+ )
292
+ _api_session_mgr = SessionManager(secret_key=key)
293
+ return _api_session_mgr
294
+
295
+ async def _get_password_hash() -> str | None:
296
+ """Resolve dashboard password hash from config or DB."""
297
+ cfg_hash = orchestrator.config.dashboard.password_hash
298
+ if cfg_hash:
299
+ return cfg_hash
300
+ if orchestrator.db is not None:
301
+ row = await orchestrator.db.fetch_one(
302
+ "SELECT value FROM settings WHERE key = ?",
303
+ ("dashboard_password_hash",),
304
+ )
305
+ if row:
306
+ return row["value"]
307
+ return None
308
+
309
+ async def _check_auth(
310
+ x_shared_secret: str | None = None,
311
+ authorization: str | None = None,
312
+ ) -> None:
313
+ """Raise 401 if neither shared secret nor bearer token is valid."""
314
+ # Check bearer token (from `researchloop connect`).
315
+ if authorization and authorization.startswith("Bearer "):
316
+ token = authorization[7:]
317
+ mgr = await _get_session_mgr()
318
+ if mgr.verify_token(token):
319
+ return
320
+
321
+ # Check shared secret (from runner webhooks / config).
322
+ expected = orchestrator.config.shared_secret
323
+ if expected and x_shared_secret == expected:
324
+ return
325
+
326
+ # If no auth mechanism is configured, allow access.
327
+ if not expected:
328
+ return
329
+
330
+ raise HTTPException(
331
+ status_code=401,
332
+ detail="Invalid or missing credentials",
333
+ )
334
+
335
+ @app.post("/api/auth")
336
+ async def api_auth(request: Request) -> JSONResponse:
337
+ """Authenticate with dashboard password, get API token."""
338
+ body = await request.json()
339
+ password = body.get("password", "")
340
+
341
+ pw_hash = await _get_password_hash()
342
+ if not pw_hash:
343
+ raise HTTPException(
344
+ status_code=400,
345
+ detail="No password configured on this server",
346
+ )
347
+
348
+ if not check_password(password, pw_hash):
349
+ raise HTTPException(
350
+ status_code=401,
351
+ detail="Invalid password",
352
+ )
353
+
354
+ mgr = await _get_session_mgr()
355
+ token = mgr.create_token()
356
+ return JSONResponse({"token": token})
357
+
358
+ # -- Webhook routes -------------------------------------------------
359
+
360
+ async def _check_webhook_token(
361
+ sprint_id: str,
362
+ x_webhook_token: str | None = None,
363
+ ) -> None:
364
+ """Verify the per-sprint webhook token."""
365
+ from researchloop.db import queries
366
+
367
+ if not sprint_id or orchestrator.db is None:
368
+ raise HTTPException(
369
+ status_code=400,
370
+ detail="sprint_id is required",
371
+ )
372
+ sprint = await queries.get_sprint(orchestrator.db, sprint_id)
373
+ if sprint is None:
374
+ raise HTTPException(
375
+ status_code=404,
376
+ detail="Sprint not found",
377
+ )
378
+ expected = sprint.get("webhook_token")
379
+ if expected and x_webhook_token != expected:
380
+ raise HTTPException(
381
+ status_code=401,
382
+ detail="Invalid webhook token",
383
+ )
384
+
385
+ @app.post("/api/webhook/sprint-complete")
386
+ async def webhook_sprint_complete(
387
+ request: Request,
388
+ x_webhook_token: str | None = Header(default=None),
389
+ ) -> JSONResponse:
390
+ """Handle sprint completion webhook from the runner."""
391
+ body: dict[str, Any] = await request.json()
392
+ sprint_id: str = body.get("sprint_id", "")
393
+ await _check_webhook_token(sprint_id, x_webhook_token)
394
+ status: str = body.get("status", "completed")
395
+ summary: str | None = body.get("summary")
396
+ error: str | None = body.get("error")
397
+ idea: str | None = body.get("idea")
398
+
399
+ if not sprint_id:
400
+ raise HTTPException(status_code=400, detail="sprint_id is required")
401
+
402
+ assert orchestrator.sprint_manager is not None
403
+ await orchestrator.sprint_manager.handle_completion(
404
+ sprint_id=sprint_id,
405
+ status=status,
406
+ summary=summary,
407
+ error=error,
408
+ idea=idea,
409
+ )
410
+
411
+ # Trigger auto-loop advancement if applicable.
412
+ if orchestrator.auto_loop is not None:
413
+ await orchestrator.auto_loop.on_sprint_complete(sprint_id)
414
+
415
+ logger.info(
416
+ "Webhook: sprint %s completion processed (status=%s)",
417
+ sprint_id,
418
+ status,
419
+ )
420
+ return JSONResponse({"ok": True, "sprint_id": sprint_id})
421
+
422
+ @app.post("/api/webhook/heartbeat")
423
+ async def webhook_heartbeat(
424
+ request: Request,
425
+ x_webhook_token: str | None = Header(default=None),
426
+ ) -> JSONResponse:
427
+ """Handle heartbeat from the runner."""
428
+ body: dict[str, Any] = await request.json()
429
+ sprint_id = body.get("sprint_id", "")
430
+ await _check_webhook_token(sprint_id, x_webhook_token)
431
+ phase: str | None = body.get("phase")
432
+ log_tail: str | None = body.get("log_tail")
433
+ recent_files: str | None = body.get("recent_files")
434
+ progress: str | None = body.get("progress")
435
+ output_log: str | None = body.get("output_log")
436
+
437
+ if not sprint_id:
438
+ raise HTTPException(status_code=400, detail="sprint_id is required")
439
+
440
+ assert orchestrator.db is not None
441
+
442
+ from researchloop.db import queries
443
+
444
+ # Build metadata with heartbeat info.
445
+ meta: dict[str, Any] = {
446
+ "last_heartbeat": datetime.now(timezone.utc).isoformat(),
447
+ "phase": phase,
448
+ }
449
+ # Preserve existing report/has_pdf from metadata.
450
+ sprint = await queries.get_sprint(orchestrator.db, sprint_id)
451
+ if sprint and sprint.get("metadata_json"):
452
+ try:
453
+ existing = json.loads(sprint["metadata_json"])
454
+ for k in ("report", "has_pdf"):
455
+ if k in existing:
456
+ meta[k] = existing[k]
457
+ except (json.JSONDecodeError, TypeError):
458
+ pass
459
+
460
+ update_fields: dict[str, Any] = {
461
+ "metadata_json": json.dumps(meta),
462
+ }
463
+ if phase:
464
+ update_fields["status"] = phase
465
+ # Store log + progress so the dashboard can show live state.
466
+ if log_tail or progress or output_log:
467
+ parts: list[str] = []
468
+ if progress:
469
+ parts.append(progress.strip())
470
+ if output_log:
471
+ parts.append(
472
+ f"--- Script output (last 30 lines) ---\n{output_log.strip()}"
473
+ )
474
+ if log_tail:
475
+ parts.append(f"--- Tool log ---\n{log_tail.strip()}")
476
+ if recent_files:
477
+ parts.append(f"--- Recent file activity ---\n{recent_files.strip()}")
478
+ update_fields["error"] = "\n\n".join(parts)
479
+
480
+ await queries.update_sprint(orchestrator.db, sprint_id, **update_fields)
481
+
482
+ logger.debug("Heartbeat received for sprint %s (phase=%s)", sprint_id, phase)
483
+ return JSONResponse({"ok": True})
484
+
485
+ # -- Artifact upload ------------------------------------------------
486
+
487
+ @app.post("/api/artifacts/{sprint_id}")
488
+ async def upload_artifact(
489
+ sprint_id: str,
490
+ file: UploadFile,
491
+ x_webhook_token: str | None = Header(default=None),
492
+ ) -> JSONResponse:
493
+ """Receive and store an artifact file for a sprint."""
494
+ await _check_webhook_token(sprint_id, x_webhook_token)
495
+
496
+ assert orchestrator.db is not None
497
+
498
+ from researchloop.db import queries
499
+
500
+ sprint = await queries.get_sprint(orchestrator.db, sprint_id)
501
+ if sprint is None:
502
+ raise HTTPException(status_code=404, detail="Sprint not found")
503
+
504
+ # Determine storage path.
505
+ artifact_dir = Path(orchestrator.config.artifact_dir) / sprint_id
506
+ artifact_dir.mkdir(parents=True, exist_ok=True)
507
+
508
+ filename = file.filename or "upload"
509
+ dest = artifact_dir / filename
510
+
511
+ # Stream the upload to disk.
512
+ size = 0
513
+ with open(dest, "wb") as f:
514
+ while chunk := await file.read(1024 * 256): # 256 KB chunks
515
+ f.write(chunk)
516
+ size += len(chunk)
517
+
518
+ # Record in database.
519
+ await queries.create_artifact(
520
+ orchestrator.db,
521
+ sprint_id=sprint_id,
522
+ filename=filename,
523
+ path=str(dest),
524
+ size=size,
525
+ content_type=file.content_type,
526
+ )
527
+
528
+ logger.info(
529
+ "Artifact %r uploaded for sprint %s (%d bytes)",
530
+ filename,
531
+ sprint_id,
532
+ size,
533
+ )
534
+ return JSONResponse(
535
+ {"ok": True, "filename": filename, "size": size},
536
+ status_code=201,
537
+ )
538
+
539
+ # -- Read-only JSON endpoints ---------------------------------------
540
+
541
+ @app.get("/api/sprints")
542
+ async def list_sprints(
543
+ study_name: str | None = None,
544
+ limit: int = 50,
545
+ x_shared_secret: str | None = Header(default=None),
546
+ authorization: str | None = Header(default=None),
547
+ ) -> JSONResponse:
548
+ """List sprints, optionally filtered by study name."""
549
+ await _check_auth(x_shared_secret, authorization)
550
+ limit = min(max(1, limit), 1000)
551
+ assert orchestrator.sprint_manager is not None
552
+ sprints = await orchestrator.sprint_manager.list_sprints(
553
+ study_name=study_name, limit=limit
554
+ )
555
+ return JSONResponse({"sprints": sprints})
556
+
557
+ @app.get("/api/sprints/{sprint_id}")
558
+ async def get_sprint(
559
+ sprint_id: str,
560
+ x_shared_secret: str | None = Header(default=None),
561
+ authorization: str | None = Header(default=None),
562
+ ) -> JSONResponse:
563
+ """Get a single sprint by ID."""
564
+ await _check_auth(x_shared_secret, authorization)
565
+ assert orchestrator.sprint_manager is not None
566
+ sprint = await orchestrator.sprint_manager.get_sprint(sprint_id)
567
+ if sprint is None:
568
+ raise HTTPException(status_code=404, detail="Sprint not found")
569
+ return JSONResponse({"sprint": sprint})
570
+
571
+ @app.get("/api/studies")
572
+ async def list_studies(
573
+ x_shared_secret: str | None = Header(default=None),
574
+ authorization: str | None = Header(default=None),
575
+ ) -> JSONResponse:
576
+ """List all studies."""
577
+ await _check_auth(x_shared_secret, authorization)
578
+ assert orchestrator.study_manager is not None
579
+ studies = await orchestrator.study_manager.list_all()
580
+ return JSONResponse({"studies": studies})
581
+
582
+ # -- Sprint / loop management API -----------------------------------
583
+
584
+ @app.post("/api/sprints")
585
+ async def create_sprint(
586
+ request: Request,
587
+ x_shared_secret: str | None = Header(default=None),
588
+ authorization: str | None = Header(default=None),
589
+ ) -> JSONResponse:
590
+ """Create and submit a sprint."""
591
+ await _check_auth(x_shared_secret, authorization)
592
+ assert orchestrator.sprint_manager is not None
593
+ body = await request.json()
594
+ study_name = body.get("study_name", "")
595
+ idea = body.get("idea", "")
596
+ if not study_name or not idea:
597
+ raise HTTPException(
598
+ status_code=400,
599
+ detail="study_name and idea are required",
600
+ )
601
+ job_options = body.get("job_options", {})
602
+ sprint = await orchestrator.sprint_manager.run_sprint(
603
+ study_name, idea, job_options=job_options
604
+ )
605
+ return JSONResponse(
606
+ {
607
+ "sprint_id": sprint.id,
608
+ "study_name": sprint.study_name,
609
+ "status": sprint.status.value,
610
+ "job_id": sprint.job_id,
611
+ },
612
+ status_code=201,
613
+ )
614
+
615
+ @app.post("/api/sprints/{sprint_id}/cancel")
616
+ async def cancel_sprint(
617
+ sprint_id: str,
618
+ x_shared_secret: str | None = Header(default=None),
619
+ authorization: str | None = Header(default=None),
620
+ ) -> JSONResponse:
621
+ """Cancel a sprint."""
622
+ await _check_auth(x_shared_secret, authorization)
623
+ assert orchestrator.sprint_manager is not None
624
+ success = await orchestrator.sprint_manager.cancel_sprint(sprint_id)
625
+ return JSONResponse({"cancelled": success})
626
+
627
+ @app.post("/api/loops")
628
+ async def create_loop(
629
+ request: Request,
630
+ x_shared_secret: str | None = Header(default=None),
631
+ authorization: str | None = Header(default=None),
632
+ ) -> JSONResponse:
633
+ """Start an auto-loop."""
634
+ await _check_auth(x_shared_secret, authorization)
635
+ assert orchestrator.auto_loop is not None
636
+ body = await request.json()
637
+ study_name = body.get("study_name", "")
638
+ count = body.get("count", 5)
639
+ if not study_name:
640
+ raise HTTPException(
641
+ status_code=400,
642
+ detail="study_name is required",
643
+ )
644
+ context = body.get("context", "")
645
+ loop_id = await orchestrator.auto_loop.start(study_name, count, context=context)
646
+ return JSONResponse({"loop_id": loop_id}, status_code=201)
647
+
648
+ @app.post("/api/loops/{loop_id}/stop")
649
+ async def stop_loop(
650
+ loop_id: str,
651
+ x_shared_secret: str | None = Header(default=None),
652
+ authorization: str | None = Header(default=None),
653
+ ) -> JSONResponse:
654
+ """Stop an auto-loop."""
655
+ await _check_auth(x_shared_secret, authorization)
656
+ assert orchestrator.auto_loop is not None
657
+ await orchestrator.auto_loop.stop(loop_id)
658
+ return JSONResponse({"stopped": True})
659
+
660
+ # -- Slack Events API -----------------------------------------------
661
+
662
+ # Track processed event IDs to avoid duplicates.
663
+ _processed_events: set[str] = set()
664
+
665
+ @app.post("/api/slack/events")
666
+ async def slack_events(request: Request) -> JSONResponse:
667
+ """Handle Slack Events API callbacks."""
668
+ raw_body = await request.body()
669
+ body: dict[str, Any] = json.loads(raw_body)
670
+
671
+ # URL verification challenge
672
+ if body.get("type") == "url_verification":
673
+ return JSONResponse({"challenge": body.get("challenge", "")})
674
+
675
+ # Deduplicate retries.
676
+ event_id = body.get("event_id", "")
677
+ if event_id in _processed_events:
678
+ return JSONResponse({"ok": True})
679
+ if event_id:
680
+ _processed_events.add(event_id)
681
+ # Keep set from growing forever.
682
+ if len(_processed_events) > 1000:
683
+ _processed_events.clear()
684
+
685
+ # Signature verification
686
+ slack_cfg = orchestrator.config.slack
687
+ if slack_cfg and slack_cfg.bot_token and not slack_cfg.signing_secret:
688
+ logger.warning(
689
+ "Slack signing_secret is not configured — "
690
+ "rejecting event. Set [slack] signing_secret in "
691
+ "researchloop.toml or the "
692
+ "RESEARCHLOOP_SLACK_SIGNING_SECRET env var."
693
+ )
694
+ raise HTTPException(
695
+ status_code=500,
696
+ detail="Slack signing_secret is required but not configured",
697
+ )
698
+ if slack_cfg and slack_cfg.signing_secret:
699
+ ts = request.headers.get("X-Slack-Request-Timestamp", "")
700
+ sig = request.headers.get("X-Slack-Signature", "")
701
+ if not verify_slack_signature(
702
+ slack_cfg.signing_secret,
703
+ ts,
704
+ raw_body,
705
+ sig,
706
+ ):
707
+ raise HTTPException(
708
+ status_code=403,
709
+ detail="Invalid Slack signature",
710
+ )
711
+
712
+ if body.get("type") != "event_callback":
713
+ return JSONResponse({"ok": True})
714
+
715
+ event = body.get("event", {})
716
+
717
+ # Ignore bot messages to avoid loops.
718
+ if event.get("bot_id") or event.get("subtype") or event.get("bot_profile"):
719
+ return JSONResponse({"ok": True})
720
+
721
+ # Return 200 immediately so Slack doesn't retry.
722
+ # Process the event in a background task.
723
+ import asyncio as _aio
724
+
725
+ _aio.create_task(_handle_slack_event(event, slack_cfg))
726
+ return JSONResponse({"ok": True})
727
+
728
+ async def _handle_slack_event(event: dict, slack_cfg: Any) -> None:
729
+ """Process a Slack event in the background."""
730
+ event_type = event.get("type", "")
731
+
732
+ if event_type not in ("app_mention", "message"):
733
+ return
734
+
735
+ # Check if user is allowed.
736
+ user_id: str = event.get("user", "")
737
+ allowed = slack_cfg.allowed_user_ids if slack_cfg else []
738
+ if allowed and user_id not in allowed:
739
+ if slack_cfg and slack_cfg.bot_token:
740
+ ch = event.get("channel", "")
741
+ ts = event.get("thread_ts") or event.get("ts", "")
742
+ n = SlackNotifier(
743
+ bot_token=slack_cfg.bot_token,
744
+ channel_id=ch,
745
+ )
746
+ await n._post_message(
747
+ "Sorry, you're not authorized to use this bot.",
748
+ thread_ts=ts,
749
+ )
750
+ return
751
+
752
+ text: str = event.get("text", "")
753
+ thread_ts: str = event.get("thread_ts") or event.get("ts", "")
754
+ channel: str = event.get("channel", "")
755
+ channel_type: str = event.get("channel_type", "")
756
+
757
+ # Restrict to configured channel if enabled.
758
+ # DMs (channel_type "im") are always allowed.
759
+ if (
760
+ slack_cfg
761
+ and slack_cfg.restrict_to_channel
762
+ and slack_cfg.channel_id
763
+ and channel != slack_cfg.channel_id
764
+ and channel_type != "im"
765
+ ):
766
+ logger.debug(
767
+ "Ignoring message in channel %s (not %s)",
768
+ channel,
769
+ slack_cfg.channel_id,
770
+ )
771
+ return
772
+ text_lower = text.lower().strip()
773
+
774
+ # Handle "auth status" / "login" commands
775
+ if any(kw in text_lower for kw in ("auth status", "auth check", "login")):
776
+ if slack_cfg and slack_cfg.bot_token:
777
+ from researchloop.core.auth import (
778
+ check_claude_auth_async,
779
+ )
780
+
781
+ ok, detail = await check_claude_auth_async()
782
+ notifier = SlackNotifier(
783
+ bot_token=slack_cfg.bot_token,
784
+ channel_id=channel,
785
+ )
786
+ if ok:
787
+ msg = (
788
+ ":white_check_mark: Claude is"
789
+ f" authenticated on this server ({detail})."
790
+ )
791
+ else:
792
+ msg = (
793
+ ":information_source: Claude is not"
794
+ " authenticated on this server"
795
+ " (not required — AI runs on the"
796
+ " HPC cluster)."
797
+ )
798
+ await notifier._post_message(msg, thread_ts=thread_ts)
799
+ return
800
+
801
+ # Handle "help" command.
802
+ if text_lower == "help":
803
+ if slack_cfg and slack_cfg.bot_token:
804
+ notifier = SlackNotifier(
805
+ bot_token=slack_cfg.bot_token,
806
+ channel_id=channel,
807
+ )
808
+ await notifier._post_message(
809
+ "Available commands:\n"
810
+ "• `sprint run <study> <idea>`"
811
+ " — start a sprint\n"
812
+ "• `sprint list` — list recent sprints\n"
813
+ "• `loop start <study> <count>`"
814
+ " — start an auto-loop\n"
815
+ "• `auth status` — check Claude auth\n"
816
+ "• `help` — show this message",
817
+ thread_ts=thread_ts,
818
+ )
819
+ return
820
+
821
+ # Handle "sprint list" command.
822
+ if "sprint list" in text_lower:
823
+ if orchestrator.sprint_manager and slack_cfg and slack_cfg.bot_token:
824
+ notifier = SlackNotifier(
825
+ bot_token=slack_cfg.bot_token,
826
+ channel_id=channel,
827
+ )
828
+ sprints = await orchestrator.sprint_manager.list_sprints(limit=10)
829
+ if not sprints:
830
+ await notifier._post_message(
831
+ "No sprints found.",
832
+ thread_ts=thread_ts,
833
+ )
834
+ else:
835
+ lines = [
836
+ f"• *{s['id']}* [{s['status']}] {(s.get('idea') or '')[:50]}"
837
+ for s in sprints
838
+ ]
839
+ await notifier._post_message(
840
+ "Recent sprints:\n" + "\n".join(lines),
841
+ thread_ts=thread_ts,
842
+ )
843
+ return
844
+
845
+ # Handle "sprint run <study> <idea>" commands.
846
+ if "sprint run" in text.lower():
847
+ parts = text.lower().split("sprint run", 1)[1]
848
+ tokens = parts.strip().split(None, 1)
849
+ study_name = tokens[0] if tokens else ""
850
+ idea = tokens[1] if len(tokens) > 1 else ""
851
+
852
+ if (
853
+ study_name
854
+ and idea
855
+ and orchestrator.sprint_manager is not None
856
+ and slack_cfg
857
+ and slack_cfg.bot_token
858
+ ):
859
+ notifier = SlackNotifier(
860
+ bot_token=slack_cfg.bot_token,
861
+ channel_id=channel,
862
+ )
863
+ try:
864
+ sprint = await orchestrator.sprint_manager.run_sprint(
865
+ study_name, idea
866
+ )
867
+ await notifier._post_message(
868
+ f"Sprint *{sprint.id}* submitted for study *{study_name}*.",
869
+ thread_ts=thread_ts,
870
+ )
871
+ except Exception as exc:
872
+ await notifier._post_message(
873
+ f"Failed to start sprint: {exc}",
874
+ thread_ts=thread_ts,
875
+ )
876
+ return
877
+
878
+ # Free-form chat — pass to Claude via ConversationManager.
879
+ cm = orchestrator.conversation_manager
880
+ if cm is not None and slack_cfg and slack_cfg.bot_token:
881
+ notifier = SlackNotifier(
882
+ bot_token=slack_cfg.bot_token,
883
+ channel_id=channel,
884
+ )
885
+
886
+ try:
887
+ response_text = await cm.handle_message(
888
+ thread_ts=thread_ts,
889
+ user_text=text,
890
+ channel=channel,
891
+ bot_token=slack_cfg.bot_token if slack_cfg else None,
892
+ )
893
+ await notifier._post_message(response_text, thread_ts=thread_ts)
894
+ except Exception as exc:
895
+ logger.exception("Chat handler failed: %s", exc)
896
+ await notifier._post_message(
897
+ "Sorry, something went wrong. Try `help` for available commands.",
898
+ thread_ts=thread_ts,
899
+ )
900
+
901
+ return
902
+
903
+ # -- Dashboard HTML routes -----------------------------------------
904
+ from researchloop.dashboard.routes import (
905
+ add_dashboard_routes,
906
+ )
907
+
908
+ add_dashboard_routes(app, orchestrator)
909
+
910
+ return app