researchloop 0.2.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {researchloop-0.2.0 → researchloop-0.3.1}/CLAUDE.md +8 -9
- {researchloop-0.2.0 → researchloop-0.3.1}/PKG-INFO +9 -5
- {researchloop-0.2.0 → researchloop-0.3.1}/README.md +8 -4
- researchloop-0.3.1/docs/assets/mmlu-combined.gif +0 -0
- researchloop-0.3.1/docs/assets/mmlu-combined.mp4 +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/docs/index.md +4 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/pyproject.toml +1 -1
- researchloop-0.3.1/researchloop/__init__.py +1 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/clusters/monitor.py +21 -7
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/comms/slack.py +3 -15
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/core/orchestrator.py +14 -65
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/routes.py +349 -25
- researchloop-0.3.1/researchloop/dashboard/templates/loop_detail.html +115 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/sprint_detail.html +21 -1
- researchloop-0.3.1/researchloop/dashboard/templates/sprints.html +127 -0
- researchloop-0.3.1/researchloop/dashboard/templates/studies.html +31 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/study_detail.html +30 -3
- researchloop-0.3.1/researchloop/dashboard/templates/study_form.html +128 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/db/migrations.py +11 -12
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/db/queries.py +30 -5
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/job_templates/sge.sh.j2 +22 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/job_templates/slurm.sh.j2 +22 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/tweak.md.j2 +6 -1
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/sprints/auto_loop.py +3 -124
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/sprints/manager.py +133 -32
- researchloop-0.3.1/researchloop/studies/manager.py +286 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/sge/Dockerfile +10 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/sge/entrypoint.sh +17 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/conftest.py +2 -2
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_auto_loop.py +2 -109
- researchloop-0.3.1/tests/test_dashboard.py +1102 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_database.py +2 -1
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_queries.py +81 -24
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_runner.py +55 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_slack.py +1 -49
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_slack_simulator.py +6 -52
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_sprint_manager.py +359 -1
- researchloop-0.3.1/tests/test_study_manager.py +285 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_tweaks.py +112 -3
- {researchloop-0.2.0 → researchloop-0.3.1}/uv.lock +1 -1
- researchloop-0.2.0/researchloop/__init__.py +0 -1
- researchloop-0.2.0/researchloop/comms/conversation.py +0 -472
- researchloop-0.2.0/researchloop/core/auth.py +0 -78
- researchloop-0.2.0/researchloop/dashboard/templates/loop_detail.html +0 -58
- researchloop-0.2.0/researchloop/dashboard/templates/sprints.html +0 -48
- researchloop-0.2.0/researchloop/dashboard/templates/studies.html +0 -18
- researchloop-0.2.0/researchloop/studies/manager.py +0 -102
- researchloop-0.2.0/tests/test_conversation.py +0 -366
- researchloop-0.2.0/tests/test_dashboard.py +0 -465
- researchloop-0.2.0/tests/test_study_manager.py +0 -77
- {researchloop-0.2.0 → researchloop-0.3.1}/.github/workflows/ci.yml +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/.github/workflows/docs.yml +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/.github/workflows/release.yml +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/.gitignore +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/Dockerfile +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/LICENSE +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/docs/cli.md +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/docs/configuration.md +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/docs/dashboard.md +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/docs/deployment.md +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/docs/development.md +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/docs/getting-started.md +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/docs/security.md +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/docs/slack.md +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/mkdocs.yml +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/__main__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/cli.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/clusters/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/clusters/ssh.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/comms/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/comms/base.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/comms/ntfy.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/comms/router.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/core/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/core/config.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/core/credentials.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/core/models.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/app.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/auth.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/base.html +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/login.html +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/loops.html +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/search.html +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/setup.html +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/dashboard/templates/tweak_detail.html +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/db/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/db/database.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/claude.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/job_templates/sge_tweak.sh.j2 +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/job_templates/slurm_tweak.sh.j2 +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/main.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/pipeline.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/fix_issues.md.j2 +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/idea_generator.md.j2 +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/red_team.md.j2 +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/report.md.j2 +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/research_sprint.md.j2 +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/templates/summarizer.md.j2 +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/runner/upload.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/schedulers/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/schedulers/base.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/schedulers/local.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/schedulers/sge.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/schedulers/slurm.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/sprints/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/studies/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/testing/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/testing/slack_mock.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop/testing/slack_simulator.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/researchloop.toml.example +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/slack-app-manifest.yml +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/conftest.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/sge/mock_claude.sh +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/slurm/Dockerfile +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/slurm/entrypoint.sh +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/docker/slurm/mock_claude.sh +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/__init__.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_loop_advancement.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_loop_and_monitor.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_sge_scheduler.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_slurm_scheduler.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_sprint_slurm.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/integration/test_webhook_and_refresh.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_api.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_cli.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_config.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_models.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_notification.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_schedulers.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_search.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_sge.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_slack_events.py +0 -0
- {researchloop-0.2.0 → researchloop-0.3.1}/tests/test_slack_mock.py +0 -0
|
@@ -8,18 +8,19 @@ ResearchLoop is an automated research sprint platform for HPC clusters. It orche
|
|
|
8
8
|
|
|
9
9
|
Two processes:
|
|
10
10
|
|
|
11
|
-
1. **Orchestrator** (`researchloop serve`) — FastAPI server that manages studies/sprints in SQLite, submits jobs via SSH, receives webhooks from runners, stores artifacts. Also serves the web dashboard and handles Slack events.
|
|
11
|
+
1. **Orchestrator** (`researchloop serve`) — FastAPI server that manages studies/sprints in SQLite, submits jobs via SSH, receives webhooks from runners, stores artifacts. Also serves the web dashboard and handles Slack events. Has no Claude CLI dependency.
|
|
12
12
|
2. **Sprint Runner** — runs inside each SLURM/SGE job on HPC. Self-contained bash scripts chain `claude -p` calls through a pipeline (research → red-team → fix → report → summarize), then upload artifacts and send a completion webhook.
|
|
13
13
|
|
|
14
14
|
Key design decisions:
|
|
15
15
|
|
|
16
|
-
- All AI work runs on HPC
|
|
17
|
-
- `claude -p --output-format stream-json` for sprint steps (enables live progress)
|
|
16
|
+
- All AI work runs on HPC; the orchestrator never invokes `claude`
|
|
17
|
+
- `claude -p --output-format stream-json` for sprint steps (enables live progress)
|
|
18
18
|
- SSH to HPC login nodes for sbatch/squeue/scancel/qsub/qdel
|
|
19
19
|
- Job completion via per-sprint webhook tokens (runner → orchestrator), SSH polling as fallback
|
|
20
20
|
- SQLite (aiosqlite, WAL mode) for metadata, with a `settings` table for persistent config (signing key, password hash)
|
|
21
21
|
- Jinja2 templates for all prompts and job scripts — prompts are pre-rendered by the orchestrator and embedded as base64 in the job script
|
|
22
22
|
- Auto-loop sprints generate their own ideas on the cluster (where Claude is authenticated) rather than on the orchestrator
|
|
23
|
+
- Slack integration is notification + structured slash-style commands only (`sprint run`, `sprint list`, `loop start`, `help`); free-form Q&A was removed
|
|
23
24
|
- Context hierarchy: global → cluster → study (inline text + file paths at each level)
|
|
24
25
|
|
|
25
26
|
## Tech stack
|
|
@@ -51,11 +52,10 @@ researchloop/
|
|
|
51
52
|
models.py — SprintStatus enum, Sprint/Study/AutoLoop dataclasses, generate_sprint_id(), format_sprint_dirname()
|
|
52
53
|
orchestrator.py — Orchestrator class + create_app() FastAPI factory (API + Slack + dashboard)
|
|
53
54
|
credentials.py — CLI credential storage (~/.config/researchloop/credentials.json) for remote orchestrator auth
|
|
54
|
-
auth.py — check_claude_auth_async() helper for verifying Claude CLI auth status
|
|
55
55
|
db/
|
|
56
56
|
__init__.py
|
|
57
57
|
database.py — async SQLite wrapper (WAL mode, auto-migrations, fetch_one/fetch_all/execute)
|
|
58
|
-
migrations.py — CREATE TABLE statements (7 tables: studies, sprints, auto_loops, artifacts,
|
|
58
|
+
migrations.py — CREATE TABLE statements (7 tables: studies, sprints, tweaks, auto_loops, artifacts, events, settings) + indexes + incremental column migrations
|
|
59
59
|
queries.py — async CRUD functions (all take Database as first arg, return dicts)
|
|
60
60
|
clusters/
|
|
61
61
|
__init__.py
|
|
@@ -95,7 +95,6 @@ researchloop/
|
|
|
95
95
|
base.py — BaseNotifier ABC (notify_sprint_started/completed/failed)
|
|
96
96
|
ntfy.py — NtfyNotifier (ntfy.sh push notifications)
|
|
97
97
|
slack.py — SlackNotifier (chat:write + files:write) + verify_slack_signature()
|
|
98
|
-
conversation.py — ConversationManager (Slack threads → Claude sessions via --resume, action execution, markdown→Slack conversion)
|
|
99
98
|
router.py — NotificationRouter (fan-out to all configured notifiers)
|
|
100
99
|
dashboard/
|
|
101
100
|
__init__.py
|
|
@@ -116,7 +115,7 @@ researchloop/
|
|
|
116
115
|
|
|
117
116
|
## Database
|
|
118
117
|
|
|
119
|
-
SQLite with
|
|
118
|
+
SQLite with 7 tables: `studies`, `sprints`, `tweaks`, `auto_loops`, `artifacts`, `events`, `settings`. Schema in `db/migrations.py`. All queries in `db/queries.py` use parameterized SQL and return plain dicts. (An older `slack_sessions` table is dropped by the migration if present.)
|
|
120
119
|
|
|
121
120
|
Key columns:
|
|
122
121
|
- `sprints.webhook_token` — per-sprint token for webhook auth (generated at creation)
|
|
@@ -148,7 +147,7 @@ Key columns:
|
|
|
148
147
|
- CSRF protection: HMAC-based tokens derived from session token + signing secret, checked on all mutating dashboard POST routes
|
|
149
148
|
- Dashboard refresh: pulls live status from cluster via SSH (reads logs, progress.md, output.log, report.md, findings.md, summary.txt, idea.txt, checks for PDF)
|
|
150
149
|
- Slack events: deduplication via event_id set, signature verification, background task processing (return 200 immediately), bot message filtering
|
|
151
|
-
- Slack
|
|
150
|
+
- Slack commands: `sprint run`, `sprint list`, `loop start`, `help` (no free-form chat — orchestrator does not run Claude locally)
|
|
152
151
|
- Auto-loop: sprint idea=None → job script generates idea on cluster → idea.txt read back via SSH/webhook
|
|
153
152
|
- CLI auth: `researchloop connect` gets a bearer token via /api/auth, stored in ~/.config/researchloop/credentials.json with 600 permissions
|
|
154
153
|
- CLI auto-reauth: on 401, prompts for password, gets new token, saves it
|
|
@@ -156,7 +155,7 @@ Key columns:
|
|
|
156
155
|
|
|
157
156
|
## Testing
|
|
158
157
|
|
|
159
|
-
|
|
158
|
+
Unit tests cover: models, config parsing, database operations, all query functions, SLURM scheduler (mock SSH), SGE scheduler (mock SSH), local scheduler (real subprocesses), study/sprint managers, auto-loop controller, notification router, Slack notifier + signature verification + Slack events API, FastAPI API endpoints (TestClient), dashboard routes + auth + setup + CSRF, CLI commands (CliRunner), runner output parsing, and template rendering.
|
|
160
159
|
|
|
161
160
|
Integration tests (in tests/integration/) use a Docker SLURM container to test real job submission.
|
|
162
161
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: researchloop
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Automated research sprint platform for HPC clusters
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -38,6 +38,8 @@ Description-Content-Type: text/markdown
|
|
|
38
38
|
[](https://www.python.org/downloads/)
|
|
39
39
|
[](https://opensource.org/licenses/MIT)
|
|
40
40
|
|
|
41
|
+
<img width="720" height="456" alt="mmlu-combined" src="https://github.com/user-attachments/assets/6d1d495f-1078-4f81-9f8a-bb1792ea3905" />
|
|
42
|
+
|
|
41
43
|
---
|
|
42
44
|
|
|
43
45
|
ResearchLoop submits AI-powered research experiments to your SLURM or SGE cluster, then reports back the results. You describe a research idea in natural language, it handles the rest: submitting the job, running a multi-step pipeline with [Claude Code](https://docs.anthropic.com/en/docs/claude-code), red-teaming the results, generating a report, and notifying you when it's done.
|
|
@@ -120,11 +122,13 @@ Browse to `/dashboard/` to see all your studies, sprints, and loops. Submit new
|
|
|
120
122
|
|
|
121
123
|
### Slack bot
|
|
122
124
|
|
|
123
|
-
|
|
125
|
+
Get sprint notifications in your Slack channel and run commands from a thread:
|
|
124
126
|
|
|
125
127
|
```
|
|
126
|
-
|
|
127
|
-
|
|
128
|
+
sprint run my-project "investigate feature X under condition Y"
|
|
129
|
+
sprint list
|
|
130
|
+
loop start my-project 5
|
|
131
|
+
help
|
|
128
132
|
```
|
|
129
133
|
|
|
130
134
|
See the [Slack setup guide](https://researchloop.github.io/researchloop/slack/) for configuration.
|
|
@@ -187,7 +191,7 @@ Full docs at **[researchloop.github.io/researchloop](https://researchloop.github
|
|
|
187
191
|
- [Configuration reference](https://researchloop.github.io/researchloop/configuration/) -- all TOML options and environment variables
|
|
188
192
|
- [Deployment guide](https://researchloop.github.io/researchloop/deployment/) -- Docker, Fly.io, SSH key setup
|
|
189
193
|
- [Dashboard guide](https://researchloop.github.io/researchloop/dashboard/) -- web UI features and authentication
|
|
190
|
-
- [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands,
|
|
194
|
+
- [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands, notifications
|
|
191
195
|
- [CLI reference](https://researchloop.github.io/researchloop/cli/) -- all commands with examples
|
|
192
196
|
- [Security](https://researchloop.github.io/researchloop/security/) -- authentication, CSRF, webhook tokens
|
|
193
197
|
- [Development](https://researchloop.github.io/researchloop/development/) -- contributing, testing, architecture
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
[](https://www.python.org/downloads/)
|
|
8
8
|
[](https://opensource.org/licenses/MIT)
|
|
9
9
|
|
|
10
|
+
<img width="720" height="456" alt="mmlu-combined" src="https://github.com/user-attachments/assets/6d1d495f-1078-4f81-9f8a-bb1792ea3905" />
|
|
11
|
+
|
|
10
12
|
---
|
|
11
13
|
|
|
12
14
|
ResearchLoop submits AI-powered research experiments to your SLURM or SGE cluster, then reports back the results. You describe a research idea in natural language, it handles the rest: submitting the job, running a multi-step pipeline with [Claude Code](https://docs.anthropic.com/en/docs/claude-code), red-teaming the results, generating a report, and notifying you when it's done.
|
|
@@ -89,11 +91,13 @@ Browse to `/dashboard/` to see all your studies, sprints, and loops. Submit new
|
|
|
89
91
|
|
|
90
92
|
### Slack bot
|
|
91
93
|
|
|
92
|
-
|
|
94
|
+
Get sprint notifications in your Slack channel and run commands from a thread:
|
|
93
95
|
|
|
94
96
|
```
|
|
95
|
-
|
|
96
|
-
|
|
97
|
+
sprint run my-project "investigate feature X under condition Y"
|
|
98
|
+
sprint list
|
|
99
|
+
loop start my-project 5
|
|
100
|
+
help
|
|
97
101
|
```
|
|
98
102
|
|
|
99
103
|
See the [Slack setup guide](https://researchloop.github.io/researchloop/slack/) for configuration.
|
|
@@ -156,7 +160,7 @@ Full docs at **[researchloop.github.io/researchloop](https://researchloop.github
|
|
|
156
160
|
- [Configuration reference](https://researchloop.github.io/researchloop/configuration/) -- all TOML options and environment variables
|
|
157
161
|
- [Deployment guide](https://researchloop.github.io/researchloop/deployment/) -- Docker, Fly.io, SSH key setup
|
|
158
162
|
- [Dashboard guide](https://researchloop.github.io/researchloop/dashboard/) -- web UI features and authentication
|
|
159
|
-
- [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands,
|
|
163
|
+
- [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands, notifications
|
|
160
164
|
- [CLI reference](https://researchloop.github.io/researchloop/cli/) -- all commands with examples
|
|
161
165
|
- [Security](https://researchloop.github.io/researchloop/security/) -- authentication, CSRF, webhook tokens
|
|
162
166
|
- [Development](https://researchloop.github.io/researchloop/development/) -- contributing, testing, architecture
|
|
Binary file
|
|
Binary file
|
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
**Automated AI research sprints on HPC clusters.**
|
|
4
4
|
|
|
5
|
+
<video autoplay muted loop playsinline width="720" style="max-width:100%;height:auto;border-radius:6px">
|
|
6
|
+
<source src="assets/mmlu-combined.mp4" type="video/mp4">
|
|
7
|
+
</video>
|
|
8
|
+
|
|
5
9
|
---
|
|
6
10
|
|
|
7
11
|
ResearchLoop automates multi-step AI research pipelines on SLURM and SGE clusters. You describe a research idea, and ResearchLoop submits it to your HPC cluster where [Claude Code](https://docs.anthropic.com/en/docs/claude-code) executes a full research pipeline -- coding, red-teaming, fixing, reporting -- inside a single job. Results are reported back via webhooks, Slack, or push notifications, and you can monitor everything from a web dashboard or the CLI.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.1"
|
|
@@ -6,12 +6,15 @@ import asyncio
|
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
8
|
from datetime import datetime, timezone
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
10
10
|
|
|
11
11
|
from researchloop.clusters.ssh import SSHManager
|
|
12
12
|
from researchloop.db import queries
|
|
13
13
|
from researchloop.schedulers.base import BaseScheduler
|
|
14
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from researchloop.sprints.manager import SprintManager
|
|
17
|
+
|
|
15
18
|
logger = logging.getLogger(__name__)
|
|
16
19
|
|
|
17
20
|
# If a job's heartbeat is older than this many seconds AND the job is not
|
|
@@ -28,11 +31,17 @@ class JobMonitor:
|
|
|
28
31
|
db: Any,
|
|
29
32
|
schedulers: dict[str, BaseScheduler],
|
|
30
33
|
config: Any = None,
|
|
34
|
+
sprint_manager: SprintManager | None = None,
|
|
31
35
|
) -> None:
|
|
32
36
|
self.ssh_manager = ssh_manager
|
|
33
37
|
self.db = db
|
|
34
38
|
self.schedulers = schedulers
|
|
35
39
|
self.config = config
|
|
40
|
+
# Optional: when set, terminal-state transitions go through
|
|
41
|
+
# sprint_manager.mark_sprint_terminal so the parent auto-loop
|
|
42
|
+
# advances. None falls back to a direct DB update (used by
|
|
43
|
+
# minimal test fixtures that don't construct a SprintManager).
|
|
44
|
+
self.sprint_manager = sprint_manager
|
|
36
45
|
self._polling_task: asyncio.Task[None] | None = None
|
|
37
46
|
self._stop_event = asyncio.Event()
|
|
38
47
|
|
|
@@ -143,12 +152,17 @@ class JobMonitor:
|
|
|
143
152
|
# Persist the updated status if it changed.
|
|
144
153
|
if status in ("completed", "failed"):
|
|
145
154
|
try:
|
|
146
|
-
|
|
147
|
-
self.
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
155
|
+
if self.sprint_manager is not None:
|
|
156
|
+
await self.sprint_manager.mark_sprint_terminal(
|
|
157
|
+
sprint_id, status
|
|
158
|
+
)
|
|
159
|
+
else:
|
|
160
|
+
await queries.update_sprint(
|
|
161
|
+
self.db,
|
|
162
|
+
sprint_id,
|
|
163
|
+
status=status,
|
|
164
|
+
completed_at=datetime.now(timezone.utc).isoformat(),
|
|
165
|
+
)
|
|
152
166
|
except Exception:
|
|
153
167
|
logger.exception(
|
|
154
168
|
"Failed to update DB status for sprint %s", sprint_id
|
|
@@ -26,12 +26,10 @@ class SlackNotifier(BaseNotifier):
|
|
|
26
26
|
bot_token: str,
|
|
27
27
|
channel_id: str | None = None,
|
|
28
28
|
dashboard_url: str | None = None,
|
|
29
|
-
conversation_manager: Any = None,
|
|
30
29
|
) -> None:
|
|
31
30
|
self.bot_token = bot_token
|
|
32
31
|
self.channel_id = channel_id
|
|
33
32
|
self.dashboard_url = dashboard_url
|
|
34
|
-
self._cm = conversation_manager
|
|
35
33
|
|
|
36
34
|
async def _post_message(
|
|
37
35
|
self,
|
|
@@ -121,10 +119,7 @@ class SlackNotifier(BaseNotifier):
|
|
|
121
119
|
f"*Study:* {study_name}\n"
|
|
122
120
|
f"*Idea:* {idea_trunc}"
|
|
123
121
|
)
|
|
124
|
-
|
|
125
|
-
ts = resp.get("ts", "")
|
|
126
|
-
if ts and self._cm:
|
|
127
|
-
await self._cm.store_bot_message(ts, msg)
|
|
122
|
+
await self._post_message(msg)
|
|
128
123
|
|
|
129
124
|
async def notify_sprint_completed(
|
|
130
125
|
self,
|
|
@@ -140,11 +135,7 @@ class SlackNotifier(BaseNotifier):
|
|
|
140
135
|
f"*Study:* {study_name}\n"
|
|
141
136
|
f"*Summary:* {summary_trunc}"
|
|
142
137
|
)
|
|
143
|
-
|
|
144
|
-
# Store the notification for thread context.
|
|
145
|
-
ts = resp.get("ts", "")
|
|
146
|
-
if ts and self._cm:
|
|
147
|
-
await self._cm.store_bot_message(ts, msg)
|
|
138
|
+
await self._post_message(msg)
|
|
148
139
|
if pdf_path:
|
|
149
140
|
await self._upload_file(
|
|
150
141
|
pdf_path,
|
|
@@ -162,10 +153,7 @@ class SlackNotifier(BaseNotifier):
|
|
|
162
153
|
msg = (
|
|
163
154
|
f":x: Sprint *{link}* failed\n*Study:* {study_name}\n*Error:* {error[:500]}"
|
|
164
155
|
)
|
|
165
|
-
|
|
166
|
-
ts = resp.get("ts", "")
|
|
167
|
-
if ts and self._cm:
|
|
168
|
-
await self._cm.store_bot_message(ts, msg)
|
|
156
|
+
await self._post_message(msg)
|
|
169
157
|
|
|
170
158
|
|
|
171
159
|
def verify_slack_signature(
|
|
@@ -15,7 +15,6 @@ from fastapi.responses import JSONResponse
|
|
|
15
15
|
|
|
16
16
|
from researchloop.clusters.monitor import JobMonitor
|
|
17
17
|
from researchloop.clusters.ssh import SSHManager
|
|
18
|
-
from researchloop.comms.conversation import ConversationManager
|
|
19
18
|
from researchloop.comms.ntfy import NtfyNotifier
|
|
20
19
|
from researchloop.comms.router import NotificationRouter
|
|
21
20
|
from researchloop.comms.slack import (
|
|
@@ -51,7 +50,6 @@ class Orchestrator:
|
|
|
51
50
|
self.auto_loop: AutoLoopController | None = None
|
|
52
51
|
self.notification_router: NotificationRouter | None = None
|
|
53
52
|
self.job_monitor: JobMonitor | None = None
|
|
54
|
-
self.conversation_manager: ConversationManager | None = None
|
|
55
53
|
|
|
56
54
|
# ------------------------------------------------------------------
|
|
57
55
|
# Lifecycle
|
|
@@ -108,24 +106,15 @@ class Orchestrator:
|
|
|
108
106
|
notification_router=self.notification_router,
|
|
109
107
|
)
|
|
110
108
|
|
|
111
|
-
# 6b. Conversation manager
|
|
112
|
-
self.conversation_manager = ConversationManager(
|
|
113
|
-
self.db, sprint_manager=self.sprint_manager
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
# Wire conversation manager to Slack notifier
|
|
117
|
-
# so notifications store thread context.
|
|
118
|
-
if self.config.slack and self.config.slack.bot_token:
|
|
119
|
-
for n in self.notification_router._notifiers:
|
|
120
|
-
if isinstance(n, SlackNotifier):
|
|
121
|
-
n._cm = self.conversation_manager
|
|
122
|
-
|
|
123
109
|
# 7. Auto-loop controller
|
|
124
110
|
self.auto_loop = AutoLoopController(
|
|
125
111
|
db=self.db,
|
|
126
112
|
sprint_manager=self.sprint_manager,
|
|
127
113
|
config=self.config,
|
|
128
114
|
)
|
|
115
|
+
# Late-bind the back-reference so SprintManager.mark_sprint_terminal
|
|
116
|
+
# can advance the parent loop on every terminal transition.
|
|
117
|
+
self.sprint_manager.auto_loop = self.auto_loop
|
|
129
118
|
|
|
130
119
|
# 8. Job monitor
|
|
131
120
|
self.job_monitor = JobMonitor(
|
|
@@ -133,6 +122,7 @@ class Orchestrator:
|
|
|
133
122
|
db=self.db,
|
|
134
123
|
schedulers=self.schedulers,
|
|
135
124
|
config=self.config,
|
|
125
|
+
sprint_manager=self.sprint_manager,
|
|
136
126
|
)
|
|
137
127
|
await self.job_monitor.start_polling()
|
|
138
128
|
|
|
@@ -419,6 +409,10 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
|
|
|
419
409
|
{"ok": True, "sprint_id": sprint_id, "tweak_id": tweak_id}
|
|
420
410
|
)
|
|
421
411
|
|
|
412
|
+
# handle_completion fires auto_loop.on_sprint_complete internally
|
|
413
|
+
# via mark_sprint_terminal — single chokepoint for terminal-state
|
|
414
|
+
# transitions, so the loop also advances when the JobMonitor or a
|
|
415
|
+
# dashboard refresh is the one that detects the terminal status.
|
|
422
416
|
await orchestrator.sprint_manager.handle_completion(
|
|
423
417
|
sprint_id=sprint_id,
|
|
424
418
|
status=status,
|
|
@@ -427,10 +421,6 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
|
|
|
427
421
|
idea=idea,
|
|
428
422
|
)
|
|
429
423
|
|
|
430
|
-
# Trigger auto-loop advancement if applicable.
|
|
431
|
-
if orchestrator.auto_loop is not None:
|
|
432
|
-
await orchestrator.auto_loop.on_sprint_complete(sprint_id)
|
|
433
|
-
|
|
434
424
|
logger.info(
|
|
435
425
|
"Webhook: sprint %s completion processed (status=%s)",
|
|
436
426
|
sprint_id,
|
|
@@ -790,33 +780,6 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
|
|
|
790
780
|
return
|
|
791
781
|
text_lower = text.lower().strip()
|
|
792
782
|
|
|
793
|
-
# Handle "auth status" / "login" commands
|
|
794
|
-
if any(kw in text_lower for kw in ("auth status", "auth check", "login")):
|
|
795
|
-
if slack_cfg and slack_cfg.bot_token:
|
|
796
|
-
from researchloop.core.auth import (
|
|
797
|
-
check_claude_auth_async,
|
|
798
|
-
)
|
|
799
|
-
|
|
800
|
-
ok, detail = await check_claude_auth_async()
|
|
801
|
-
notifier = SlackNotifier(
|
|
802
|
-
bot_token=slack_cfg.bot_token,
|
|
803
|
-
channel_id=channel,
|
|
804
|
-
)
|
|
805
|
-
if ok:
|
|
806
|
-
msg = (
|
|
807
|
-
":white_check_mark: Claude is"
|
|
808
|
-
f" authenticated on this server ({detail})."
|
|
809
|
-
)
|
|
810
|
-
else:
|
|
811
|
-
msg = (
|
|
812
|
-
":information_source: Claude is not"
|
|
813
|
-
" authenticated on this server"
|
|
814
|
-
" (not required — AI runs on the"
|
|
815
|
-
" HPC cluster)."
|
|
816
|
-
)
|
|
817
|
-
await notifier._post_message(msg, thread_ts=thread_ts)
|
|
818
|
-
return
|
|
819
|
-
|
|
820
783
|
# Handle "help" command.
|
|
821
784
|
if text_lower == "help":
|
|
822
785
|
if slack_cfg and slack_cfg.bot_token:
|
|
@@ -831,7 +794,6 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
|
|
|
831
794
|
"• `sprint list` — list recent sprints\n"
|
|
832
795
|
"• `loop start <study> <count>`"
|
|
833
796
|
" — start an auto-loop\n"
|
|
834
|
-
"• `auth status` — check Claude auth\n"
|
|
835
797
|
"• `help` — show this message",
|
|
836
798
|
thread_ts=thread_ts,
|
|
837
799
|
)
|
|
@@ -894,29 +856,16 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
|
|
|
894
856
|
)
|
|
895
857
|
return
|
|
896
858
|
|
|
897
|
-
#
|
|
898
|
-
|
|
899
|
-
if cm is not None and slack_cfg and slack_cfg.bot_token:
|
|
859
|
+
# Unrecognized message — point user at the help command.
|
|
860
|
+
if slack_cfg and slack_cfg.bot_token:
|
|
900
861
|
notifier = SlackNotifier(
|
|
901
862
|
bot_token=slack_cfg.bot_token,
|
|
902
863
|
channel_id=channel,
|
|
903
864
|
)
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
user_text=text,
|
|
909
|
-
channel=channel,
|
|
910
|
-
bot_token=slack_cfg.bot_token if slack_cfg else None,
|
|
911
|
-
)
|
|
912
|
-
await notifier._post_message(response_text, thread_ts=thread_ts)
|
|
913
|
-
except Exception as exc:
|
|
914
|
-
logger.exception("Chat handler failed: %s", exc)
|
|
915
|
-
await notifier._post_message(
|
|
916
|
-
"Sorry, something went wrong. Try `help` for available commands.",
|
|
917
|
-
thread_ts=thread_ts,
|
|
918
|
-
)
|
|
919
|
-
|
|
865
|
+
await notifier._post_message(
|
|
866
|
+
"Sorry, I didn't understand that. Try `help` for available commands.",
|
|
867
|
+
thread_ts=thread_ts,
|
|
868
|
+
)
|
|
920
869
|
return
|
|
921
870
|
|
|
922
871
|
# -- Dashboard HTML routes -----------------------------------------
|