researchloop 0.2.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. {researchloop-0.2.0 → researchloop-0.3.1}/CLAUDE.md +8 -9
  2. {researchloop-0.2.0 → researchloop-0.3.1}/PKG-INFO +9 -5
  3. {researchloop-0.2.0 → researchloop-0.3.1}/README.md +8 -4
  4. researchloop-0.3.1/docs/assets/mmlu-combined.gif +0 -0
  5. researchloop-0.3.1/docs/assets/mmlu-combined.mp4 +0 -0
  6. {researchloop-0.2.0 → researchloop-0.3.1}/docs/index.md +4 -0
  7. {researchloop-0.2.0 → researchloop-0.3.1}/pyproject.toml +1 -1
  8. researchloop-0.3.1/researchloop/__init__.py +1 -0
  9. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/clusters/monitor.py +21 -7
  10. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/comms/slack.py +3 -15
  11. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/core/orchestrator.py +14 -65
  12. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/routes.py +349 -25
  13. researchloop-0.3.1/researchloop/dashboard/templates/loop_detail.html +115 -0
  14. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/sprint_detail.html +21 -1
  15. researchloop-0.3.1/researchloop/dashboard/templates/sprints.html +127 -0
  16. researchloop-0.3.1/researchloop/dashboard/templates/studies.html +31 -0
  17. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/study_detail.html +30 -3
  18. researchloop-0.3.1/researchloop/dashboard/templates/study_form.html +128 -0
  19. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/db/migrations.py +11 -12
  20. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/db/queries.py +30 -5
  21. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/job_templates/sge.sh.j2 +22 -0
  22. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/job_templates/slurm.sh.j2 +22 -0
  23. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/tweak.md.j2 +6 -1
  24. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/sprints/auto_loop.py +3 -124
  25. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/sprints/manager.py +133 -32
  26. researchloop-0.3.1/researchloop/studies/manager.py +286 -0
  27. {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/sge/Dockerfile +10 -0
  28. {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/sge/entrypoint.sh +17 -0
  29. {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/conftest.py +2 -2
  30. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_auto_loop.py +2 -109
  31. researchloop-0.3.1/tests/test_dashboard.py +1102 -0
  32. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_database.py +2 -1
  33. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_queries.py +81 -24
  34. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_runner.py +55 -0
  35. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_slack.py +1 -49
  36. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_slack_simulator.py +6 -52
  37. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_sprint_manager.py +359 -1
  38. researchloop-0.3.1/tests/test_study_manager.py +285 -0
  39. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_tweaks.py +112 -3
  40. {researchloop-0.2.0 → researchloop-0.3.1}/uv.lock +1 -1
  41. researchloop-0.2.0/researchloop/__init__.py +0 -1
  42. researchloop-0.2.0/researchloop/comms/conversation.py +0 -472
  43. researchloop-0.2.0/researchloop/core/auth.py +0 -78
  44. researchloop-0.2.0/researchloop/dashboard/templates/loop_detail.html +0 -58
  45. researchloop-0.2.0/researchloop/dashboard/templates/sprints.html +0 -48
  46. researchloop-0.2.0/researchloop/dashboard/templates/studies.html +0 -18
  47. researchloop-0.2.0/researchloop/studies/manager.py +0 -102
  48. researchloop-0.2.0/tests/test_conversation.py +0 -366
  49. researchloop-0.2.0/tests/test_dashboard.py +0 -465
  50. researchloop-0.2.0/tests/test_study_manager.py +0 -77
  51. {researchloop-0.2.0 → researchloop-0.3.1}/.github/workflows/ci.yml +0 -0
  52. {researchloop-0.2.0 → researchloop-0.3.1}/.github/workflows/docs.yml +0 -0
  53. {researchloop-0.2.0 → researchloop-0.3.1}/.github/workflows/release.yml +0 -0
  54. {researchloop-0.2.0 → researchloop-0.3.1}/.gitignore +0 -0
  55. {researchloop-0.2.0 → researchloop-0.3.1}/Dockerfile +0 -0
  56. {researchloop-0.2.0 → researchloop-0.3.1}/LICENSE +0 -0
  57. {researchloop-0.2.0 → researchloop-0.3.1}/docs/cli.md +0 -0
  58. {researchloop-0.2.0 → researchloop-0.3.1}/docs/configuration.md +0 -0
  59. {researchloop-0.2.0 → researchloop-0.3.1}/docs/dashboard.md +0 -0
  60. {researchloop-0.2.0 → researchloop-0.3.1}/docs/deployment.md +0 -0
  61. {researchloop-0.2.0 → researchloop-0.3.1}/docs/development.md +0 -0
  62. {researchloop-0.2.0 → researchloop-0.3.1}/docs/getting-started.md +0 -0
  63. {researchloop-0.2.0 → researchloop-0.3.1}/docs/security.md +0 -0
  64. {researchloop-0.2.0 → researchloop-0.3.1}/docs/slack.md +0 -0
  65. {researchloop-0.2.0 → researchloop-0.3.1}/mkdocs.yml +0 -0
  66. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/__main__.py +0 -0
  67. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/cli.py +0 -0
  68. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/clusters/__init__.py +0 -0
  69. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/clusters/ssh.py +0 -0
  70. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/comms/__init__.py +0 -0
  71. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/comms/base.py +0 -0
  72. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/comms/ntfy.py +0 -0
  73. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/comms/router.py +0 -0
  74. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/core/__init__.py +0 -0
  75. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/core/config.py +0 -0
  76. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/core/credentials.py +0 -0
  77. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/core/models.py +0 -0
  78. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/__init__.py +0 -0
  79. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/app.py +0 -0
  80. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/auth.py +0 -0
  81. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/base.html +0 -0
  82. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/login.html +0 -0
  83. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/loops.html +0 -0
  84. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/search.html +0 -0
  85. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/setup.html +0 -0
  86. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/tweak_detail.html +0 -0
  87. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/db/__init__.py +0 -0
  88. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/db/database.py +0 -0
  89. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/__init__.py +0 -0
  90. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/claude.py +0 -0
  91. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/job_templates/sge_tweak.sh.j2 +0 -0
  92. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/job_templates/slurm_tweak.sh.j2 +0 -0
  93. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/main.py +0 -0
  94. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/pipeline.py +0 -0
  95. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/fix_issues.md.j2 +0 -0
  96. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/idea_generator.md.j2 +0 -0
  97. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/red_team.md.j2 +0 -0
  98. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/report.md.j2 +0 -0
  99. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/research_sprint.md.j2 +0 -0
  100. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/summarizer.md.j2 +0 -0
  101. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/upload.py +0 -0
  102. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/schedulers/__init__.py +0 -0
  103. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/schedulers/base.py +0 -0
  104. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/schedulers/local.py +0 -0
  105. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/schedulers/sge.py +0 -0
  106. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/schedulers/slurm.py +0 -0
  107. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/sprints/__init__.py +0 -0
  108. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/studies/__init__.py +0 -0
  109. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/testing/__init__.py +0 -0
  110. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/testing/slack_mock.py +0 -0
  111. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/testing/slack_simulator.py +0 -0
  112. {researchloop-0.2.0 → researchloop-0.3.1}/researchloop.toml.example +0 -0
  113. {researchloop-0.2.0 → researchloop-0.3.1}/slack-app-manifest.yml +0 -0
  114. {researchloop-0.2.0 → researchloop-0.3.1}/tests/__init__.py +0 -0
  115. {researchloop-0.2.0 → researchloop-0.3.1}/tests/conftest.py +0 -0
  116. {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/sge/mock_claude.sh +0 -0
  117. {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/slurm/Dockerfile +0 -0
  118. {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/slurm/entrypoint.sh +0 -0
  119. {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/slurm/mock_claude.sh +0 -0
  120. {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/__init__.py +0 -0
  121. {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_loop_advancement.py +0 -0
  122. {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_loop_and_monitor.py +0 -0
  123. {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_sge_scheduler.py +0 -0
  124. {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_slurm_scheduler.py +0 -0
  125. {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_sprint_slurm.py +0 -0
  126. {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_webhook_and_refresh.py +0 -0
  127. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_api.py +0 -0
  128. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_cli.py +0 -0
  129. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_config.py +0 -0
  130. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_models.py +0 -0
  131. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_notification.py +0 -0
  132. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_schedulers.py +0 -0
  133. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_search.py +0 -0
  134. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_sge.py +0 -0
  135. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_slack_events.py +0 -0
  136. {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_slack_mock.py +0 -0
@@ -8,18 +8,19 @@ ResearchLoop is an automated research sprint platform for HPC clusters. It orche
8
8
 
9
9
  Two processes:
10
10
 
11
- 1. **Orchestrator** (`researchloop serve`) — FastAPI server that manages studies/sprints in SQLite, submits jobs via SSH, receives webhooks from runners, stores artifacts. Also serves the web dashboard and handles Slack events.
11
+ 1. **Orchestrator** (`researchloop serve`) — FastAPI server that manages studies/sprints in SQLite, submits jobs via SSH, receives webhooks from runners, stores artifacts. Also serves the web dashboard and handles Slack events. Has no Claude CLI dependency.
12
12
  2. **Sprint Runner** — runs inside each SLURM/SGE job on HPC. Self-contained bash scripts chain `claude -p` calls through a pipeline (research → red-team → fix → report → summarize), then upload artifacts and send a completion webhook.
13
13
 
14
14
  Key design decisions:
15
15
 
16
- - All AI work runs on HPC, never on the orchestrator (except Slack conversations and auto-loop idea generation, which use `claude -p` locally with restricted tools)
17
- - `claude -p --output-format stream-json` for sprint steps (enables live progress), `--output-format json` for conversations
16
+ - All AI work runs on HPC; the orchestrator never invokes `claude`
17
+ - `claude -p --output-format stream-json` for sprint steps (enables live progress)
18
18
  - SSH to HPC login nodes for sbatch/squeue/scancel/qsub/qdel
19
19
  - Job completion via per-sprint webhook tokens (runner → orchestrator), SSH polling as fallback
20
20
  - SQLite (aiosqlite, WAL mode) for metadata, with a `settings` table for persistent config (signing key, password hash)
21
21
  - Jinja2 templates for all prompts and job scripts — prompts are pre-rendered by the orchestrator and embedded as base64 in the job script
22
22
  - Auto-loop sprints generate their own ideas on the cluster (where Claude is authenticated) rather than on the orchestrator
23
+ - Slack integration is notification + structured slash-style commands only (`sprint run`, `sprint list`, `loop start`, `help`); free-form Q&A was removed
23
24
  - Context hierarchy: global → cluster → study (inline text + file paths at each level)
24
25
 
25
26
  ## Tech stack
@@ -51,11 +52,10 @@ researchloop/
51
52
  models.py — SprintStatus enum, Sprint/Study/AutoLoop dataclasses, generate_sprint_id(), format_sprint_dirname()
52
53
  orchestrator.py — Orchestrator class + create_app() FastAPI factory (API + Slack + dashboard)
53
54
  credentials.py — CLI credential storage (~/.config/researchloop/credentials.json) for remote orchestrator auth
54
- auth.py — check_claude_auth_async() helper for verifying Claude CLI auth status
55
55
  db/
56
56
  __init__.py
57
57
  database.py — async SQLite wrapper (WAL mode, auto-migrations, fetch_one/fetch_all/execute)
58
- migrations.py — CREATE TABLE statements (7 tables: studies, sprints, auto_loops, artifacts, slack_sessions, events, settings) + indexes + incremental column migrations
58
+ migrations.py — CREATE TABLE statements (7 tables: studies, sprints, tweaks, auto_loops, artifacts, events, settings) + indexes + incremental column migrations
59
59
  queries.py — async CRUD functions (all take Database as first arg, return dicts)
60
60
  clusters/
61
61
  __init__.py
@@ -95,7 +95,6 @@ researchloop/
95
95
  base.py — BaseNotifier ABC (notify_sprint_started/completed/failed)
96
96
  ntfy.py — NtfyNotifier (ntfy.sh push notifications)
97
97
  slack.py — SlackNotifier (chat:write + files:write) + verify_slack_signature()
98
- conversation.py — ConversationManager (Slack threads → Claude sessions via --resume, action execution, markdown→Slack conversion)
99
98
  router.py — NotificationRouter (fan-out to all configured notifiers)
100
99
  dashboard/
101
100
  __init__.py
@@ -116,7 +115,7 @@ researchloop/
116
115
 
117
116
  ## Database
118
117
 
119
- SQLite with 8 tables: `studies`, `sprints`, `tweaks`, `auto_loops`, `artifacts`, `slack_sessions`, `events`, `settings`. Schema in `db/migrations.py`. All queries in `db/queries.py` use parameterized SQL and return plain dicts.
118
+ SQLite with 7 tables: `studies`, `sprints`, `tweaks`, `auto_loops`, `artifacts`, `events`, `settings`. Schema in `db/migrations.py`. All queries in `db/queries.py` use parameterized SQL and return plain dicts. (An older `slack_sessions` table is dropped by the migration if present.)
120
119
 
121
120
  Key columns:
122
121
  - `sprints.webhook_token` — per-sprint token for webhook auth (generated at creation)
@@ -148,7 +147,7 @@ Key columns:
148
147
  - CSRF protection: HMAC-based tokens derived from session token + signing secret, checked on all mutating dashboard POST routes
149
148
  - Dashboard refresh: pulls live status from cluster via SSH (reads logs, progress.md, output.log, report.md, findings.md, summary.txt, idea.txt, checks for PDF)
150
149
  - Slack events: deduplication via event_id set, signature verification, background task processing (return 200 immediately), bot message filtering
151
- - Slack conversation: thread session mapping in DB, context building with study/sprint info, action execution via [ACTION: ...] tags
150
+ - Slack commands: `sprint run`, `sprint list`, `loop start`, `help` (no free-form chat orchestrator does not run Claude locally)
152
151
  - Auto-loop: sprint idea=None → job script generates idea on cluster → idea.txt read back via SSH/webhook
153
152
  - CLI auth: `researchloop connect` gets a bearer token via /api/auth, stored in ~/.config/researchloop/credentials.json with 600 permissions
154
153
  - CLI auto-reauth: on 401, prompts for password, gets new token, saves it
@@ -156,7 +155,7 @@ Key columns:
156
155
 
157
156
  ## Testing
158
157
 
159
- 339 unit tests covering: models, config parsing, database operations, all query functions, SLURM scheduler (mock SSH), SGE scheduler (mock SSH), local scheduler (real subprocesses), study/sprint managers, auto-loop controller (with mock claude), notification router, Slack notifier + signature verification + conversation manager + Slack events API, FastAPI API endpoints (TestClient), dashboard routes + auth + setup + CSRF, CLI commands (CliRunner), runner output parsing, and template rendering.
158
+ Unit tests cover: models, config parsing, database operations, all query functions, SLURM scheduler (mock SSH), SGE scheduler (mock SSH), local scheduler (real subprocesses), study/sprint managers, auto-loop controller, notification router, Slack notifier + signature verification + Slack events API, FastAPI API endpoints (TestClient), dashboard routes + auth + setup + CSRF, CLI commands (CliRunner), runner output parsing, and template rendering.
160
159
 
161
160
  Integration tests (in tests/integration/) use a Docker SLURM container to test real job submission.
162
161
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: researchloop
3
- Version: 0.2.0
3
+ Version: 0.3.1
4
4
  Summary: Automated research sprint platform for HPC clusters
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -38,6 +38,8 @@ Description-Content-Type: text/markdown
38
38
  [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
39
39
  [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
40
40
 
41
+ <img width="720" height="456" alt="mmlu-combined" src="https://github.com/user-attachments/assets/6d1d495f-1078-4f81-9f8a-bb1792ea3905" />
42
+
41
43
  ---
42
44
 
43
45
  ResearchLoop submits AI-powered research experiments to your SLURM or SGE cluster, then reports back the results. You describe a research idea in natural language, it handles the rest: submitting the job, running a multi-step pipeline with [Claude Code](https://docs.anthropic.com/en/docs/claude-code), red-teaming the results, generating a report, and notifying you when it's done.
@@ -120,11 +122,13 @@ Browse to `/dashboard/` to see all your studies, sprints, and loops. Submit new
120
122
 
121
123
  ### Slack bot
122
124
 
123
- Chat with the bot to start sprints, check status, or discuss research ideas. The bot maintains conversation context across a thread, so you can have a back-and-forth about what to try next.
125
+ Get sprint notifications in your Slack channel and run commands from a thread:
124
126
 
125
127
  ```
126
- You: What should I investigate next based on the results from sp-a3f7b2?
127
- Bot: Based on the findings, I'd suggest... [ACTION: sprint_run {"study": "my-project", "idea": "..."}]
128
+ sprint run my-project "investigate feature X under condition Y"
129
+ sprint list
130
+ loop start my-project 5
131
+ help
128
132
  ```
129
133
 
130
134
  See the [Slack setup guide](https://researchloop.github.io/researchloop/slack/) for configuration.
@@ -187,7 +191,7 @@ Full docs at **[researchloop.github.io/researchloop](https://researchloop.github
187
191
  - [Configuration reference](https://researchloop.github.io/researchloop/configuration/) -- all TOML options and environment variables
188
192
  - [Deployment guide](https://researchloop.github.io/researchloop/deployment/) -- Docker, Fly.io, SSH key setup
189
193
  - [Dashboard guide](https://researchloop.github.io/researchloop/dashboard/) -- web UI features and authentication
190
- - [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands, conversational mode
194
+ - [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands, notifications
191
195
  - [CLI reference](https://researchloop.github.io/researchloop/cli/) -- all commands with examples
192
196
  - [Security](https://researchloop.github.io/researchloop/security/) -- authentication, CSRF, webhook tokens
193
197
  - [Development](https://researchloop.github.io/researchloop/development/) -- contributing, testing, architecture
@@ -7,6 +7,8 @@
7
7
  [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
8
8
  [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
9
9
 
10
+ <img width="720" height="456" alt="mmlu-combined" src="https://github.com/user-attachments/assets/6d1d495f-1078-4f81-9f8a-bb1792ea3905" />
11
+
10
12
  ---
11
13
 
12
14
  ResearchLoop submits AI-powered research experiments to your SLURM or SGE cluster, then reports back the results. You describe a research idea in natural language, it handles the rest: submitting the job, running a multi-step pipeline with [Claude Code](https://docs.anthropic.com/en/docs/claude-code), red-teaming the results, generating a report, and notifying you when it's done.
@@ -89,11 +91,13 @@ Browse to `/dashboard/` to see all your studies, sprints, and loops. Submit new
89
91
 
90
92
  ### Slack bot
91
93
 
92
- Chat with the bot to start sprints, check status, or discuss research ideas. The bot maintains conversation context across a thread, so you can have a back-and-forth about what to try next.
94
+ Get sprint notifications in your Slack channel and run commands from a thread:
93
95
 
94
96
  ```
95
- You: What should I investigate next based on the results from sp-a3f7b2?
96
- Bot: Based on the findings, I'd suggest... [ACTION: sprint_run {"study": "my-project", "idea": "..."}]
97
+ sprint run my-project "investigate feature X under condition Y"
98
+ sprint list
99
+ loop start my-project 5
100
+ help
97
101
  ```
98
102
 
99
103
  See the [Slack setup guide](https://researchloop.github.io/researchloop/slack/) for configuration.
@@ -156,7 +160,7 @@ Full docs at **[researchloop.github.io/researchloop](https://researchloop.github
156
160
  - [Configuration reference](https://researchloop.github.io/researchloop/configuration/) -- all TOML options and environment variables
157
161
  - [Deployment guide](https://researchloop.github.io/researchloop/deployment/) -- Docker, Fly.io, SSH key setup
158
162
  - [Dashboard guide](https://researchloop.github.io/researchloop/dashboard/) -- web UI features and authentication
159
- - [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands, conversational mode
163
+ - [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands, notifications
160
164
  - [CLI reference](https://researchloop.github.io/researchloop/cli/) -- all commands with examples
161
165
  - [Security](https://researchloop.github.io/researchloop/security/) -- authentication, CSRF, webhook tokens
162
166
  - [Development](https://researchloop.github.io/researchloop/development/) -- contributing, testing, architecture
@@ -2,6 +2,10 @@
2
2
 
3
3
  **Automated AI research sprints on HPC clusters.**
4
4
 
5
+ <video autoplay muted loop playsinline width="720" style="max-width:100%;height:auto;border-radius:6px">
6
+ <source src="assets/mmlu-combined.mp4" type="video/mp4">
7
+ </video>
8
+
5
9
  ---
6
10
 
7
11
  ResearchLoop automates multi-step AI research pipelines on SLURM and SGE clusters. You describe a research idea, and ResearchLoop submits it to your HPC cluster where [Claude Code](https://docs.anthropic.com/en/docs/claude-code) executes a full research pipeline -- coding, red-teaming, fixing, reporting -- inside a single job. Results are reported back via webhooks, Slack, or push notifications, and you can monitor everything from a web dashboard or the CLI.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "researchloop"
3
- version = "0.2.0"
3
+ version = "0.3.1"
4
4
  description = "Automated research sprint platform for HPC clusters"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -0,0 +1 @@
1
+ __version__ = "0.3.1"
@@ -6,12 +6,15 @@ import asyncio
6
6
  import json
7
7
  import logging
8
8
  from datetime import datetime, timezone
9
- from typing import Any
9
+ from typing import TYPE_CHECKING, Any
10
10
 
11
11
  from researchloop.clusters.ssh import SSHManager
12
12
  from researchloop.db import queries
13
13
  from researchloop.schedulers.base import BaseScheduler
14
14
 
15
+ if TYPE_CHECKING:
16
+ from researchloop.sprints.manager import SprintManager
17
+
15
18
  logger = logging.getLogger(__name__)
16
19
 
17
20
  # If a job's heartbeat is older than this many seconds AND the job is not
@@ -28,11 +31,17 @@ class JobMonitor:
28
31
  db: Any,
29
32
  schedulers: dict[str, BaseScheduler],
30
33
  config: Any = None,
34
+ sprint_manager: SprintManager | None = None,
31
35
  ) -> None:
32
36
  self.ssh_manager = ssh_manager
33
37
  self.db = db
34
38
  self.schedulers = schedulers
35
39
  self.config = config
40
+ # Optional: when set, terminal-state transitions go through
41
+ # sprint_manager.mark_sprint_terminal so the parent auto-loop
42
+ # advances. None falls back to a direct DB update (used by
43
+ # minimal test fixtures that don't construct a SprintManager).
44
+ self.sprint_manager = sprint_manager
36
45
  self._polling_task: asyncio.Task[None] | None = None
37
46
  self._stop_event = asyncio.Event()
38
47
 
@@ -143,12 +152,17 @@ class JobMonitor:
143
152
  # Persist the updated status if it changed.
144
153
  if status in ("completed", "failed"):
145
154
  try:
146
- await queries.update_sprint(
147
- self.db,
148
- sprint_id,
149
- status=status,
150
- completed_at=datetime.now(timezone.utc).isoformat(),
151
- )
155
+ if self.sprint_manager is not None:
156
+ await self.sprint_manager.mark_sprint_terminal(
157
+ sprint_id, status
158
+ )
159
+ else:
160
+ await queries.update_sprint(
161
+ self.db,
162
+ sprint_id,
163
+ status=status,
164
+ completed_at=datetime.now(timezone.utc).isoformat(),
165
+ )
152
166
  except Exception:
153
167
  logger.exception(
154
168
  "Failed to update DB status for sprint %s", sprint_id
@@ -26,12 +26,10 @@ class SlackNotifier(BaseNotifier):
26
26
  bot_token: str,
27
27
  channel_id: str | None = None,
28
28
  dashboard_url: str | None = None,
29
- conversation_manager: Any = None,
30
29
  ) -> None:
31
30
  self.bot_token = bot_token
32
31
  self.channel_id = channel_id
33
32
  self.dashboard_url = dashboard_url
34
- self._cm = conversation_manager
35
33
 
36
34
  async def _post_message(
37
35
  self,
@@ -121,10 +119,7 @@ class SlackNotifier(BaseNotifier):
121
119
  f"*Study:* {study_name}\n"
122
120
  f"*Idea:* {idea_trunc}"
123
121
  )
124
- resp = await self._post_message(msg)
125
- ts = resp.get("ts", "")
126
- if ts and self._cm:
127
- await self._cm.store_bot_message(ts, msg)
122
+ await self._post_message(msg)
128
123
 
129
124
  async def notify_sprint_completed(
130
125
  self,
@@ -140,11 +135,7 @@ class SlackNotifier(BaseNotifier):
140
135
  f"*Study:* {study_name}\n"
141
136
  f"*Summary:* {summary_trunc}"
142
137
  )
143
- resp = await self._post_message(msg)
144
- # Store the notification for thread context.
145
- ts = resp.get("ts", "")
146
- if ts and self._cm:
147
- await self._cm.store_bot_message(ts, msg)
138
+ await self._post_message(msg)
148
139
  if pdf_path:
149
140
  await self._upload_file(
150
141
  pdf_path,
@@ -162,10 +153,7 @@ class SlackNotifier(BaseNotifier):
162
153
  msg = (
163
154
  f":x: Sprint *{link}* failed\n*Study:* {study_name}\n*Error:* {error[:500]}"
164
155
  )
165
- resp = await self._post_message(msg)
166
- ts = resp.get("ts", "")
167
- if ts and self._cm:
168
- await self._cm.store_bot_message(ts, msg)
156
+ await self._post_message(msg)
169
157
 
170
158
 
171
159
  def verify_slack_signature(
@@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse
15
15
 
16
16
  from researchloop.clusters.monitor import JobMonitor
17
17
  from researchloop.clusters.ssh import SSHManager
18
- from researchloop.comms.conversation import ConversationManager
19
18
  from researchloop.comms.ntfy import NtfyNotifier
20
19
  from researchloop.comms.router import NotificationRouter
21
20
  from researchloop.comms.slack import (
@@ -51,7 +50,6 @@ class Orchestrator:
51
50
  self.auto_loop: AutoLoopController | None = None
52
51
  self.notification_router: NotificationRouter | None = None
53
52
  self.job_monitor: JobMonitor | None = None
54
- self.conversation_manager: ConversationManager | None = None
55
53
 
56
54
  # ------------------------------------------------------------------
57
55
  # Lifecycle
@@ -108,24 +106,15 @@ class Orchestrator:
108
106
  notification_router=self.notification_router,
109
107
  )
110
108
 
111
- # 6b. Conversation manager
112
- self.conversation_manager = ConversationManager(
113
- self.db, sprint_manager=self.sprint_manager
114
- )
115
-
116
- # Wire conversation manager to Slack notifier
117
- # so notifications store thread context.
118
- if self.config.slack and self.config.slack.bot_token:
119
- for n in self.notification_router._notifiers:
120
- if isinstance(n, SlackNotifier):
121
- n._cm = self.conversation_manager
122
-
123
109
  # 7. Auto-loop controller
124
110
  self.auto_loop = AutoLoopController(
125
111
  db=self.db,
126
112
  sprint_manager=self.sprint_manager,
127
113
  config=self.config,
128
114
  )
115
+ # Late-bind the back-reference so SprintManager.mark_sprint_terminal
116
+ # can advance the parent loop on every terminal transition.
117
+ self.sprint_manager.auto_loop = self.auto_loop
129
118
 
130
119
  # 8. Job monitor
131
120
  self.job_monitor = JobMonitor(
@@ -133,6 +122,7 @@ class Orchestrator:
133
122
  db=self.db,
134
123
  schedulers=self.schedulers,
135
124
  config=self.config,
125
+ sprint_manager=self.sprint_manager,
136
126
  )
137
127
  await self.job_monitor.start_polling()
138
128
 
@@ -419,6 +409,10 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
419
409
  {"ok": True, "sprint_id": sprint_id, "tweak_id": tweak_id}
420
410
  )
421
411
 
412
+ # handle_completion fires auto_loop.on_sprint_complete internally
413
+ # via mark_sprint_terminal — single chokepoint for terminal-state
414
+ # transitions, so the loop also advances when the JobMonitor or a
415
+ # dashboard refresh is the one that detects the terminal status.
422
416
  await orchestrator.sprint_manager.handle_completion(
423
417
  sprint_id=sprint_id,
424
418
  status=status,
@@ -427,10 +421,6 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
427
421
  idea=idea,
428
422
  )
429
423
 
430
- # Trigger auto-loop advancement if applicable.
431
- if orchestrator.auto_loop is not None:
432
- await orchestrator.auto_loop.on_sprint_complete(sprint_id)
433
-
434
424
  logger.info(
435
425
  "Webhook: sprint %s completion processed (status=%s)",
436
426
  sprint_id,
@@ -790,33 +780,6 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
790
780
  return
791
781
  text_lower = text.lower().strip()
792
782
 
793
- # Handle "auth status" / "login" commands
794
- if any(kw in text_lower for kw in ("auth status", "auth check", "login")):
795
- if slack_cfg and slack_cfg.bot_token:
796
- from researchloop.core.auth import (
797
- check_claude_auth_async,
798
- )
799
-
800
- ok, detail = await check_claude_auth_async()
801
- notifier = SlackNotifier(
802
- bot_token=slack_cfg.bot_token,
803
- channel_id=channel,
804
- )
805
- if ok:
806
- msg = (
807
- ":white_check_mark: Claude is"
808
- f" authenticated on this server ({detail})."
809
- )
810
- else:
811
- msg = (
812
- ":information_source: Claude is not"
813
- " authenticated on this server"
814
- " (not required — AI runs on the"
815
- " HPC cluster)."
816
- )
817
- await notifier._post_message(msg, thread_ts=thread_ts)
818
- return
819
-
820
783
  # Handle "help" command.
821
784
  if text_lower == "help":
822
785
  if slack_cfg and slack_cfg.bot_token:
@@ -831,7 +794,6 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
831
794
  "• `sprint list` — list recent sprints\n"
832
795
  "• `loop start <study> <count>`"
833
796
  " — start an auto-loop\n"
834
- "• `auth status` — check Claude auth\n"
835
797
  "• `help` — show this message",
836
798
  thread_ts=thread_ts,
837
799
  )
@@ -894,29 +856,16 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
894
856
  )
895
857
  return
896
858
 
897
- # Free-form chatpass to Claude via ConversationManager.
898
- cm = orchestrator.conversation_manager
899
- if cm is not None and slack_cfg and slack_cfg.bot_token:
859
+ # Unrecognized messagepoint user at the help command.
860
+ if slack_cfg and slack_cfg.bot_token:
900
861
  notifier = SlackNotifier(
901
862
  bot_token=slack_cfg.bot_token,
902
863
  channel_id=channel,
903
864
  )
904
-
905
- try:
906
- response_text = await cm.handle_message(
907
- thread_ts=thread_ts,
908
- user_text=text,
909
- channel=channel,
910
- bot_token=slack_cfg.bot_token if slack_cfg else None,
911
- )
912
- await notifier._post_message(response_text, thread_ts=thread_ts)
913
- except Exception as exc:
914
- logger.exception("Chat handler failed: %s", exc)
915
- await notifier._post_message(
916
- "Sorry, something went wrong. Try `help` for available commands.",
917
- thread_ts=thread_ts,
918
- )
919
-
865
+ await notifier._post_message(
866
+ "Sorry, I didn't understand that. Try `help` for available commands.",
867
+ thread_ts=thread_ts,
868
+ )
920
869
  return
921
870
 
922
871
  # -- Dashboard HTML routes -----------------------------------------