researchloop 0.1.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. {researchloop-0.1.0 → researchloop-0.3.0}/.github/workflows/ci.yml +5 -1
  2. {researchloop-0.1.0 → researchloop-0.3.0}/CLAUDE.md +11 -10
  3. researchloop-0.3.0/PKG-INFO +213 -0
  4. researchloop-0.3.0/README.md +182 -0
  5. researchloop-0.3.0/docs/assets/mmlu-combined.gif +0 -0
  6. researchloop-0.3.0/docs/assets/mmlu-combined.mp4 +0 -0
  7. {researchloop-0.1.0 → researchloop-0.3.0}/docs/deployment.md +1 -2
  8. {researchloop-0.1.0 → researchloop-0.3.0}/docs/development.md +1 -1
  9. {researchloop-0.1.0 → researchloop-0.3.0}/docs/getting-started.md +1 -12
  10. {researchloop-0.1.0 → researchloop-0.3.0}/docs/index.md +4 -0
  11. {researchloop-0.1.0 → researchloop-0.3.0}/pyproject.toml +3 -2
  12. researchloop-0.3.0/researchloop/__init__.py +1 -0
  13. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/cli.py +182 -0
  14. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/clusters/ssh.py +4 -1
  15. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/comms/slack.py +5 -16
  16. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/core/models.py +5 -0
  17. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/core/orchestrator.py +25 -61
  18. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/dashboard/routes.py +634 -24
  19. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/dashboard/templates/base.html +6 -1
  20. researchloop-0.3.0/researchloop/dashboard/templates/loop_detail.html +115 -0
  21. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/dashboard/templates/loops.html +13 -2
  22. researchloop-0.3.0/researchloop/dashboard/templates/search.html +35 -0
  23. researchloop-0.3.0/researchloop/dashboard/templates/sprint_detail.html +224 -0
  24. researchloop-0.3.0/researchloop/dashboard/templates/sprints.html +127 -0
  25. researchloop-0.3.0/researchloop/dashboard/templates/studies.html +31 -0
  26. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/dashboard/templates/study_detail.html +34 -7
  27. researchloop-0.3.0/researchloop/dashboard/templates/study_form.html +128 -0
  28. researchloop-0.3.0/researchloop/dashboard/templates/tweak_detail.html +71 -0
  29. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/db/migrations.py +25 -12
  30. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/db/queries.py +127 -5
  31. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/job_templates/sge.sh.j2 +22 -0
  32. researchloop-0.3.0/researchloop/runner/job_templates/sge_tweak.sh.j2 +186 -0
  33. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/job_templates/slurm.sh.j2 +22 -0
  34. researchloop-0.3.0/researchloop/runner/job_templates/slurm_tweak.sh.j2 +184 -0
  35. researchloop-0.3.0/researchloop/runner/templates/report.md.j2 +61 -0
  36. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/templates/research_sprint.md.j2 +1 -0
  37. researchloop-0.3.0/researchloop/runner/templates/tweak.md.j2 +12 -0
  38. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/schedulers/sge.py +6 -7
  39. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/sprints/auto_loop.py +3 -124
  40. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/sprints/manager.py +474 -41
  41. researchloop-0.3.0/researchloop/studies/manager.py +286 -0
  42. researchloop-0.3.0/researchloop/testing/__init__.py +7 -0
  43. researchloop-0.3.0/researchloop/testing/slack_mock.py +273 -0
  44. researchloop-0.3.0/researchloop/testing/slack_simulator.py +341 -0
  45. researchloop-0.3.0/tests/docker/sge/Dockerfile +35 -0
  46. researchloop-0.3.0/tests/docker/sge/entrypoint.sh +89 -0
  47. researchloop-0.3.0/tests/docker/slurm/mock_claude.sh +41 -0
  48. {researchloop-0.1.0 → researchloop-0.3.0}/tests/integration/conftest.py +99 -0
  49. researchloop-0.3.0/tests/integration/test_sge_scheduler.py +197 -0
  50. {researchloop-0.1.0 → researchloop-0.3.0}/tests/integration/test_webhook_and_refresh.py +126 -17
  51. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_auto_loop.py +2 -109
  52. researchloop-0.3.0/tests/test_dashboard.py +1102 -0
  53. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_database.py +2 -1
  54. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_queries.py +81 -24
  55. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_runner.py +55 -0
  56. researchloop-0.3.0/tests/test_search.py +177 -0
  57. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_sge.py +1 -1
  58. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_slack.py +1 -49
  59. researchloop-0.3.0/tests/test_slack_mock.py +421 -0
  60. researchloop-0.3.0/tests/test_slack_simulator.py +432 -0
  61. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_sprint_manager.py +262 -1
  62. researchloop-0.3.0/tests/test_study_manager.py +285 -0
  63. researchloop-0.3.0/tests/test_tweaks.py +728 -0
  64. {researchloop-0.1.0 → researchloop-0.3.0}/uv.lock +4 -2
  65. researchloop-0.1.0/PKG-INFO +0 -596
  66. researchloop-0.1.0/README.md +0 -566
  67. researchloop-0.1.0/researchloop/__init__.py +0 -1
  68. researchloop-0.1.0/researchloop/comms/conversation.py +0 -465
  69. researchloop-0.1.0/researchloop/core/auth.py +0 -78
  70. researchloop-0.1.0/researchloop/dashboard/templates/loop_detail.html +0 -58
  71. researchloop-0.1.0/researchloop/dashboard/templates/sprint_detail.html +0 -109
  72. researchloop-0.1.0/researchloop/dashboard/templates/sprints.html +0 -48
  73. researchloop-0.1.0/researchloop/dashboard/templates/studies.html +0 -18
  74. researchloop-0.1.0/researchloop/runner/templates/report.md.j2 +0 -31
  75. researchloop-0.1.0/researchloop/studies/manager.py +0 -102
  76. researchloop-0.1.0/tests/test_conversation.py +0 -366
  77. researchloop-0.1.0/tests/test_dashboard.py +0 -465
  78. researchloop-0.1.0/tests/test_study_manager.py +0 -77
  79. {researchloop-0.1.0 → researchloop-0.3.0}/.github/workflows/docs.yml +0 -0
  80. {researchloop-0.1.0 → researchloop-0.3.0}/.github/workflows/release.yml +0 -0
  81. {researchloop-0.1.0 → researchloop-0.3.0}/.gitignore +0 -0
  82. {researchloop-0.1.0 → researchloop-0.3.0}/Dockerfile +0 -0
  83. {researchloop-0.1.0 → researchloop-0.3.0}/LICENSE +0 -0
  84. {researchloop-0.1.0 → researchloop-0.3.0}/docs/cli.md +0 -0
  85. {researchloop-0.1.0 → researchloop-0.3.0}/docs/configuration.md +0 -0
  86. {researchloop-0.1.0 → researchloop-0.3.0}/docs/dashboard.md +0 -0
  87. {researchloop-0.1.0 → researchloop-0.3.0}/docs/security.md +0 -0
  88. {researchloop-0.1.0 → researchloop-0.3.0}/docs/slack.md +0 -0
  89. {researchloop-0.1.0 → researchloop-0.3.0}/mkdocs.yml +0 -0
  90. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/__main__.py +0 -0
  91. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/clusters/__init__.py +0 -0
  92. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/clusters/monitor.py +0 -0
  93. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/comms/__init__.py +0 -0
  94. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/comms/base.py +0 -0
  95. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/comms/ntfy.py +0 -0
  96. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/comms/router.py +0 -0
  97. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/core/__init__.py +0 -0
  98. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/core/config.py +0 -0
  99. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/core/credentials.py +0 -0
  100. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/dashboard/__init__.py +0 -0
  101. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/dashboard/app.py +0 -0
  102. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/dashboard/auth.py +0 -0
  103. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/dashboard/templates/login.html +0 -0
  104. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/dashboard/templates/setup.html +0 -0
  105. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/db/__init__.py +0 -0
  106. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/db/database.py +0 -0
  107. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/__init__.py +0 -0
  108. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/claude.py +0 -0
  109. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/main.py +0 -0
  110. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/pipeline.py +0 -0
  111. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/templates/fix_issues.md.j2 +0 -0
  112. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/templates/idea_generator.md.j2 +0 -0
  113. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/templates/red_team.md.j2 +0 -0
  114. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/templates/summarizer.md.j2 +0 -0
  115. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/runner/upload.py +0 -0
  116. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/schedulers/__init__.py +0 -0
  117. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/schedulers/base.py +0 -0
  118. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/schedulers/local.py +0 -0
  119. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/schedulers/slurm.py +0 -0
  120. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/sprints/__init__.py +0 -0
  121. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop/studies/__init__.py +0 -0
  122. {researchloop-0.1.0 → researchloop-0.3.0}/researchloop.toml.example +0 -0
  123. {researchloop-0.1.0 → researchloop-0.3.0}/slack-app-manifest.yml +0 -0
  124. {researchloop-0.1.0 → researchloop-0.3.0}/tests/__init__.py +0 -0
  125. {researchloop-0.1.0 → researchloop-0.3.0}/tests/conftest.py +0 -0
  126. {researchloop-0.1.0/tests/docker/slurm → researchloop-0.3.0/tests/docker/sge}/mock_claude.sh +0 -0
  127. {researchloop-0.1.0 → researchloop-0.3.0}/tests/docker/slurm/Dockerfile +0 -0
  128. {researchloop-0.1.0 → researchloop-0.3.0}/tests/docker/slurm/entrypoint.sh +0 -0
  129. {researchloop-0.1.0 → researchloop-0.3.0}/tests/integration/__init__.py +0 -0
  130. {researchloop-0.1.0 → researchloop-0.3.0}/tests/integration/test_loop_advancement.py +0 -0
  131. {researchloop-0.1.0 → researchloop-0.3.0}/tests/integration/test_loop_and_monitor.py +0 -0
  132. {researchloop-0.1.0 → researchloop-0.3.0}/tests/integration/test_slurm_scheduler.py +0 -0
  133. {researchloop-0.1.0 → researchloop-0.3.0}/tests/integration/test_sprint_slurm.py +0 -0
  134. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_api.py +0 -0
  135. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_cli.py +0 -0
  136. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_config.py +0 -0
  137. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_models.py +0 -0
  138. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_notification.py +0 -0
  139. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_schedulers.py +0 -0
  140. {researchloop-0.1.0 → researchloop-0.3.0}/tests/test_slack_events.py +0 -0
@@ -67,7 +67,11 @@ jobs:
67
67
  - name: Build SLURM test container
68
68
  run: docker build -t researchloop-slurm-test tests/docker/slurm/
69
69
 
70
+ - name: Build SGE test container
71
+ run: docker build -t researchloop-sge-test tests/docker/sge/
72
+
70
73
  - name: Run integration tests
71
- run: uv run pytest tests/integration/ -v --tb=short -x --timeout=120
74
+ run: uv run pytest tests/integration/ -v --tb=short -x --timeout=180
72
75
  env:
73
76
  SLURM_TEST_IMAGE: researchloop-slurm-test
77
+ SGE_TEST_IMAGE: researchloop-sge-test
@@ -8,18 +8,19 @@ ResearchLoop is an automated research sprint platform for HPC clusters. It orche
8
8
 
9
9
  Two processes:
10
10
 
11
- 1. **Orchestrator** (`researchloop serve`) — FastAPI server that manages studies/sprints in SQLite, submits jobs via SSH, receives webhooks from runners, stores artifacts. Also serves the web dashboard and handles Slack events.
11
+ 1. **Orchestrator** (`researchloop serve`) — FastAPI server that manages studies/sprints in SQLite, submits jobs via SSH, receives webhooks from runners, stores artifacts. Also serves the web dashboard and handles Slack events. Has no Claude CLI dependency.
12
12
  2. **Sprint Runner** — runs inside each SLURM/SGE job on HPC. Self-contained bash scripts chain `claude -p` calls through a pipeline (research → red-team → fix → report → summarize), then upload artifacts and send a completion webhook.
13
13
 
14
14
  Key design decisions:
15
15
 
16
- - All AI work runs on HPC, never on the orchestrator (except Slack conversations and auto-loop idea generation, which use `claude -p` locally with restricted tools)
17
- - `claude -p --output-format stream-json` for sprint steps (enables live progress), `--output-format json` for conversations
16
+ - All AI work runs on HPC; the orchestrator never invokes `claude`
17
+ - `claude -p --output-format stream-json` for sprint steps (enables live progress)
18
18
  - SSH to HPC login nodes for sbatch/squeue/scancel/qsub/qdel
19
19
  - Job completion via per-sprint webhook tokens (runner → orchestrator), SSH polling as fallback
20
20
  - SQLite (aiosqlite, WAL mode) for metadata, with a `settings` table for persistent config (signing key, password hash)
21
21
  - Jinja2 templates for all prompts and job scripts — prompts are pre-rendered by the orchestrator and embedded as base64 in the job script
22
22
  - Auto-loop sprints generate their own ideas on the cluster (where Claude is authenticated) rather than on the orchestrator
23
+ - Slack integration is notification + structured slash-style commands only (`sprint run`, `sprint list`, `loop start`, `help`); free-form Q&A was removed
23
24
  - Context hierarchy: global → cluster → study (inline text + file paths at each level)
24
25
 
25
26
  ## Tech stack
@@ -30,7 +31,7 @@ Python 3.10+, uv, asyncio throughout. Key deps: click (CLI), FastAPI (API + dash
30
31
 
31
32
  ```bash
32
33
  uv sync # install deps
33
- uv run pytest tests/ -v -m "not integration" # unit tests (339 tests, ~3s)
34
+ uv run pytest tests/ -v -m "not integration" # unit tests (374 tests, ~3s)
34
35
  uv run pytest tests/integration/ -v --timeout=120 # integration tests (needs Docker)
35
36
  uv run ruff check . # lint
36
37
  uv run ruff format . # format
@@ -51,11 +52,10 @@ researchloop/
51
52
  models.py — SprintStatus enum, Sprint/Study/AutoLoop dataclasses, generate_sprint_id(), format_sprint_dirname()
52
53
  orchestrator.py — Orchestrator class + create_app() FastAPI factory (API + Slack + dashboard)
53
54
  credentials.py — CLI credential storage (~/.config/researchloop/credentials.json) for remote orchestrator auth
54
- auth.py — check_claude_auth_async() helper for verifying Claude CLI auth status
55
55
  db/
56
56
  __init__.py
57
57
  database.py — async SQLite wrapper (WAL mode, auto-migrations, fetch_one/fetch_all/execute)
58
- migrations.py — CREATE TABLE statements (7 tables: studies, sprints, auto_loops, artifacts, slack_sessions, events, settings) + indexes + incremental column migrations
58
+ migrations.py — CREATE TABLE statements (7 tables: studies, sprints, tweaks, auto_loops, artifacts, events, settings) + indexes + incremental column migrations
59
59
  queries.py — async CRUD functions (all take Database as first arg, return dicts)
60
60
  clusters/
61
61
  __init__.py
@@ -95,7 +95,6 @@ researchloop/
95
95
  base.py — BaseNotifier ABC (notify_sprint_started/completed/failed)
96
96
  ntfy.py — NtfyNotifier (ntfy.sh push notifications)
97
97
  slack.py — SlackNotifier (chat:write + files:write) + verify_slack_signature()
98
- conversation.py — ConversationManager (Slack threads → Claude sessions via --resume, action execution, markdown→Slack conversion)
99
98
  router.py — NotificationRouter (fan-out to all configured notifiers)
100
99
  dashboard/
101
100
  __init__.py
@@ -116,7 +115,7 @@ researchloop/
116
115
 
117
116
  ## Database
118
117
 
119
- SQLite with 7 tables: `studies`, `sprints`, `auto_loops`, `artifacts`, `slack_sessions`, `events`, `settings`. Schema in `db/migrations.py`. All queries in `db/queries.py` use parameterized SQL and return plain dicts.
118
+ SQLite with 7 tables: `studies`, `sprints`, `tweaks`, `auto_loops`, `artifacts`, `events`, `settings`. Schema in `db/migrations.py`. All queries in `db/queries.py` use parameterized SQL and return plain dicts. (An older `slack_sessions` table is dropped by the migration if present.)
120
119
 
121
120
  Key columns:
122
121
  - `sprints.webhook_token` — per-sprint token for webhook auth (generated at creation)
@@ -124,6 +123,8 @@ Key columns:
124
123
  - `sprints.metadata_json` — stores report text, has_pdf flag, heartbeat info
125
124
  - `sprints.error` — stores live progress (progress.md + output.log + tool log) during running sprints
126
125
  - `auto_loops.metadata_json` — stores loop context and job_options
126
+ - `tweaks.sprint_id` — links tweak to parent sprint (one sprint can have many tweaks)
127
+ - `tweaks.instruction` — the user's tweak request text
127
128
  - `settings` — key/value store for signing_key and dashboard_password_hash
128
129
 
129
130
  ## Key patterns
@@ -146,7 +147,7 @@ Key columns:
146
147
  - CSRF protection: HMAC-based tokens derived from session token + signing secret, checked on all mutating dashboard POST routes
147
148
  - Dashboard refresh: pulls live status from cluster via SSH (reads logs, progress.md, output.log, report.md, findings.md, summary.txt, idea.txt, checks for PDF)
148
149
  - Slack events: deduplication via event_id set, signature verification, background task processing (return 200 immediately), bot message filtering
149
- - Slack conversation: thread session mapping in DB, context building with study/sprint info, action execution via [ACTION: ...] tags
150
+ - Slack commands: `sprint run`, `sprint list`, `loop start`, `help` (no free-form chat orchestrator does not run Claude locally)
150
151
  - Auto-loop: sprint idea=None → job script generates idea on cluster → idea.txt read back via SSH/webhook
151
152
  - CLI auth: `researchloop connect` gets a bearer token via /api/auth, stored in ~/.config/researchloop/credentials.json with 600 permissions
152
153
  - CLI auto-reauth: on 401, prompts for password, gets new token, saves it
@@ -154,7 +155,7 @@ Key columns:
154
155
 
155
156
  ## Testing
156
157
 
157
- 339 unit tests covering: models, config parsing, database operations, all query functions, SLURM scheduler (mock SSH), SGE scheduler (mock SSH), local scheduler (real subprocesses), study/sprint managers, auto-loop controller (with mock claude), notification router, Slack notifier + signature verification + conversation manager + Slack events API, FastAPI API endpoints (TestClient), dashboard routes + auth + setup + CSRF, CLI commands (CliRunner), runner output parsing, and template rendering.
158
+ Unit tests cover: models, config parsing, database operations, all query functions, SLURM scheduler (mock SSH), SGE scheduler (mock SSH), local scheduler (real subprocesses), study/sprint managers, auto-loop controller, notification router, Slack notifier + signature verification + Slack events API, FastAPI API endpoints (TestClient), dashboard routes + auth + setup + CSRF, CLI commands (CliRunner), runner output parsing, and template rendering.
158
159
 
159
160
  Integration tests (in tests/integration/) use a Docker SLURM container to test real job submission.
160
161
 
@@ -0,0 +1,213 @@
1
+ Metadata-Version: 2.4
2
+ Name: researchloop
3
+ Version: 0.3.0
4
+ Summary: Automated research sprint platform for HPC clusters
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: aiosqlite<1,>=0.19
18
+ Requires-Dist: asyncssh<3,>=2.14
19
+ Requires-Dist: bcrypt<5,>=4.0
20
+ Requires-Dist: click<9,>=8.0
21
+ Requires-Dist: fastapi>=0.100
22
+ Requires-Dist: httpx<1,>=0.24
23
+ Requires-Dist: itsdangerous<3,>=2.1
24
+ Requires-Dist: jinja2<4,>=3.1
25
+ Requires-Dist: markdown<4,>=3.4
26
+ Requires-Dist: python-multipart<1,>=0.0.6
27
+ Requires-Dist: starlette<1
28
+ Requires-Dist: tomli<3,>=2.0; python_version < '3.11'
29
+ Requires-Dist: uvicorn[standard]<1,>=0.20
30
+ Description-Content-Type: text/markdown
31
+
32
+ # ResearchLoop
33
+
34
+ **Run AI-automated research experiments on your HPC cluster. Monitor from anywhere.**
35
+
36
+ [![CI](https://github.com/researchloop/researchloop/actions/workflows/ci.yml/badge.svg)](https://github.com/researchloop/researchloop/actions/workflows/ci.yml)
37
+ [![PyPI](https://img.shields.io/pypi/v/researchloop.svg)](https://pypi.org/project/researchloop/)
38
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
39
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
40
+
41
+ <img width="720" height="456" alt="mmlu-combined" src="https://github.com/user-attachments/assets/6d1d495f-1078-4f81-9f8a-bb1792ea3905" />
42
+
43
+ ---
44
+
45
+ ResearchLoop submits AI-powered research experiments to your SLURM or SGE cluster, then reports back the results. You describe a research idea in natural language, it handles the rest: submitting the job, running a multi-step pipeline with [Claude Code](https://docs.anthropic.com/en/docs/claude-code), red-teaming the results, generating a report, and notifying you when it's done.
46
+
47
+ ```bash
48
+ pip install researchloop
49
+
50
+ # Submit an experiment to your cluster
51
+ researchloop sprint run "Investigate whether batch normalization improves convergence" --study my-project
52
+
53
+ # Start an auto-loop: 5 experiments, each building on the last
54
+ researchloop loop start --study my-project --count 5 --context "Focus on improving F1 score"
55
+ ```
56
+
57
+ Monitor everything from a web dashboard, Slack, or the CLI -- no need to SSH in and check on jobs.
58
+
59
+ ## Why ResearchLoop?
60
+
61
+ If you run experiments on shared HPC clusters, you know the pain: SSH in, write a script, submit with sbatch, wait, check logs, repeat. ResearchLoop automates this loop:
62
+
63
+ 1. **You describe what to investigate** (via CLI, dashboard, or Slack)
64
+ 2. **ResearchLoop submits a job** to your cluster via SSH
65
+ 3. **Claude runs the full experiment** -- writes code, runs it, analyzes results
66
+ 4. **A red-team step critiques the work** and Claude fixes any issues
67
+ 5. **You get a report** with a summary, PDF, and all artifacts
68
+
69
+ The **auto-loop** feature takes this further: after each experiment, Claude analyzes the results and proposes the next one. You set how many iterations, and walk away.
70
+
71
+ ## Get started in 5 minutes
72
+
73
+ **Prerequisites:** Python 3.10+, SSH access to an HPC cluster, [Claude Code](https://docs.anthropic.com/en/docs/claude-code) installed on the cluster.
74
+
75
+ ### 1. Install and initialize
76
+
77
+ ```bash
78
+ pip install researchloop
79
+ researchloop init
80
+ ```
81
+
82
+ ### 2. Edit `researchloop.toml`
83
+
84
+ ```toml
85
+ shared_secret = "pick-a-secret"
86
+ orchestrator_url = "http://localhost:8080"
87
+
88
+ [[cluster]]
89
+ name = "my-cluster"
90
+ host = "login.cluster.example.com"
91
+ user = "researcher"
92
+ key_path = "~/.ssh/id_ed25519"
93
+ scheduler_type = "slurm"
94
+ working_dir = "/scratch/researcher/researchloop"
95
+
96
+ [cluster.job_options]
97
+ gres = "gpu:1"
98
+ mem = "64G"
99
+ cpus-per-task = "8"
100
+
101
+ [[study]]
102
+ name = "my-project"
103
+ cluster = "my-cluster"
104
+ description = "Investigating X"
105
+ ```
106
+
107
+ ### 3. Start the server and run your first sprint
108
+
109
+ ```bash
110
+ researchloop serve &
111
+ researchloop connect http://localhost:8080
112
+ researchloop sprint run "Try approach X on dataset Y" --study my-project
113
+ ```
114
+
115
+ That's it. ResearchLoop SSHes to your cluster, submits the job, and you can monitor progress from the dashboard at `http://localhost:8080/dashboard/`.
116
+
117
+ ## Three ways to interact
118
+
119
+ ### Web dashboard
120
+
121
+ Browse to `/dashboard/` to see all your studies, sprints, and loops. Submit new sprints, start loops with custom GPU/memory settings, refresh live status from the cluster, and read reports -- all from the browser.
122
+
123
+ ### Slack bot
124
+
125
+ Get sprint notifications in your Slack channel and run commands from a thread:
126
+
127
+ ```
128
+ sprint run my-project "investigate feature X under condition Y"
129
+ sprint list
130
+ loop start my-project 5
131
+ help
132
+ ```
133
+
134
+ See the [Slack setup guide](https://researchloop.github.io/researchloop/slack/) for configuration.
135
+
136
+ ### CLI
137
+
138
+ ```bash
139
+ researchloop sprint run "idea" --study my-project # Submit a sprint
140
+ researchloop sprint list # List recent sprints
141
+ researchloop sprint show sp-a3f7b2 # View details
142
+ researchloop loop start --study my-project --count 5 # Auto-loop
143
+ researchloop loop stop loop-b4e1c9 # Stop a loop
144
+ ```
145
+
146
+ ## Customizing your studies
147
+
148
+ Each study can have its own context, cluster settings, and configuration:
149
+
150
+ ```toml
151
+ [[study]]
152
+ name = "sae-research"
153
+ cluster = "my-cluster"
154
+ max_sprint_duration_hours = 12
155
+ red_team_max_rounds = 2
156
+ allow_loop = true
157
+
158
+ # Tell Claude what this study is about and how to approach it
159
+ context = """
160
+ You are researching sparse autoencoder architectures.
161
+ Always train for 200M samples. Use batch size 1024.
162
+ Validate on the variation models listed in ~/reference/models.txt.
163
+ """
164
+
165
+ # Or point to a file with detailed instructions
166
+ claude_md_path = "./studies/sae-research/CLAUDE.md"
167
+
168
+ # Override GPU/memory for this study
169
+ [study.job_options]
170
+ gres = "gpu:a100:2"
171
+ mem = "128G"
172
+ ```
173
+
174
+ The context hierarchy is: **global** > **cluster** > **study**. All levels are merged and included in every sprint's prompt.
175
+
176
+ ## Deployment
177
+
178
+ For production, deploy the orchestrator as a Docker container on Fly.io, Railway, or any platform that supports persistent volumes:
179
+
180
+ ```bash
181
+ pip install researchloop
182
+ # See deployment guide for Docker/Fly.io setup
183
+ ```
184
+
185
+ Full deployment guide: [researchloop.github.io/researchloop/deployment](https://researchloop.github.io/researchloop/deployment/)
186
+
187
+ ## Documentation
188
+
189
+ Full docs at **[researchloop.github.io/researchloop](https://researchloop.github.io/researchloop/)**, including:
190
+
191
+ - [Configuration reference](https://researchloop.github.io/researchloop/configuration/) -- all TOML options and environment variables
192
+ - [Deployment guide](https://researchloop.github.io/researchloop/deployment/) -- Docker, Fly.io, SSH key setup
193
+ - [Dashboard guide](https://researchloop.github.io/researchloop/dashboard/) -- web UI features and authentication
194
+ - [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands, notifications
195
+ - [CLI reference](https://researchloop.github.io/researchloop/cli/) -- all commands with examples
196
+ - [Security](https://researchloop.github.io/researchloop/security/) -- authentication, CSRF, webhook tokens
197
+ - [Development](https://researchloop.github.io/researchloop/development/) -- contributing, testing, architecture
198
+
199
+ ## Contributing
200
+
201
+ ```bash
202
+ git clone https://github.com/researchloop/researchloop.git
203
+ cd researchloop
204
+ uv sync
205
+ uv run pytest tests/ -m "not integration" # Unit tests
206
+ uv run ruff check . && uv run pyright researchloop/ # Lint + type check
207
+ ```
208
+
209
+ Integration tests run against a real SLURM scheduler in Docker -- see [development guide](https://researchloop.github.io/researchloop/development/).
210
+
211
+ ## License
212
+
213
+ MIT
@@ -0,0 +1,182 @@
1
+ # ResearchLoop
2
+
3
+ **Run AI-automated research experiments on your HPC cluster. Monitor from anywhere.**
4
+
5
+ [![CI](https://github.com/researchloop/researchloop/actions/workflows/ci.yml/badge.svg)](https://github.com/researchloop/researchloop/actions/workflows/ci.yml)
6
+ [![PyPI](https://img.shields.io/pypi/v/researchloop.svg)](https://pypi.org/project/researchloop/)
7
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
9
+
10
+ <img width="720" height="456" alt="mmlu-combined" src="https://github.com/user-attachments/assets/6d1d495f-1078-4f81-9f8a-bb1792ea3905" />
11
+
12
+ ---
13
+
14
+ ResearchLoop submits AI-powered research experiments to your SLURM or SGE cluster, then reports back the results. You describe a research idea in natural language, it handles the rest: submitting the job, running a multi-step pipeline with [Claude Code](https://docs.anthropic.com/en/docs/claude-code), red-teaming the results, generating a report, and notifying you when it's done.
15
+
16
+ ```bash
17
+ pip install researchloop
18
+
19
+ # Submit an experiment to your cluster
20
+ researchloop sprint run "Investigate whether batch normalization improves convergence" --study my-project
21
+
22
+ # Start an auto-loop: 5 experiments, each building on the last
23
+ researchloop loop start --study my-project --count 5 --context "Focus on improving F1 score"
24
+ ```
25
+
26
+ Monitor everything from a web dashboard, Slack, or the CLI -- no need to SSH in and check on jobs.
27
+
28
+ ## Why ResearchLoop?
29
+
30
+ If you run experiments on shared HPC clusters, you know the pain: SSH in, write a script, submit with sbatch, wait, check logs, repeat. ResearchLoop automates this loop:
31
+
32
+ 1. **You describe what to investigate** (via CLI, dashboard, or Slack)
33
+ 2. **ResearchLoop submits a job** to your cluster via SSH
34
+ 3. **Claude runs the full experiment** -- writes code, runs it, analyzes results
35
+ 4. **A red-team step critiques the work** and Claude fixes any issues
36
+ 5. **You get a report** with a summary, PDF, and all artifacts
37
+
38
+ The **auto-loop** feature takes this further: after each experiment, Claude analyzes the results and proposes the next one. You set how many iterations, and walk away.
39
+
40
+ ## Get started in 5 minutes
41
+
42
+ **Prerequisites:** Python 3.10+, SSH access to an HPC cluster, [Claude Code](https://docs.anthropic.com/en/docs/claude-code) installed on the cluster.
43
+
44
+ ### 1. Install and initialize
45
+
46
+ ```bash
47
+ pip install researchloop
48
+ researchloop init
49
+ ```
50
+
51
+ ### 2. Edit `researchloop.toml`
52
+
53
+ ```toml
54
+ shared_secret = "pick-a-secret"
55
+ orchestrator_url = "http://localhost:8080"
56
+
57
+ [[cluster]]
58
+ name = "my-cluster"
59
+ host = "login.cluster.example.com"
60
+ user = "researcher"
61
+ key_path = "~/.ssh/id_ed25519"
62
+ scheduler_type = "slurm"
63
+ working_dir = "/scratch/researcher/researchloop"
64
+
65
+ [cluster.job_options]
66
+ gres = "gpu:1"
67
+ mem = "64G"
68
+ cpus-per-task = "8"
69
+
70
+ [[study]]
71
+ name = "my-project"
72
+ cluster = "my-cluster"
73
+ description = "Investigating X"
74
+ ```
75
+
76
+ ### 3. Start the server and run your first sprint
77
+
78
+ ```bash
79
+ researchloop serve &
80
+ researchloop connect http://localhost:8080
81
+ researchloop sprint run "Try approach X on dataset Y" --study my-project
82
+ ```
83
+
84
+ That's it. ResearchLoop SSHes to your cluster, submits the job, and you can monitor progress from the dashboard at `http://localhost:8080/dashboard/`.
85
+
86
+ ## Three ways to interact
87
+
88
+ ### Web dashboard
89
+
90
+ Browse to `/dashboard/` to see all your studies, sprints, and loops. Submit new sprints, start loops with custom GPU/memory settings, refresh live status from the cluster, and read reports -- all from the browser.
91
+
92
+ ### Slack bot
93
+
94
+ Get sprint notifications in your Slack channel and run commands from a thread:
95
+
96
+ ```
97
+ sprint run my-project "investigate feature X under condition Y"
98
+ sprint list
99
+ loop start my-project 5
100
+ help
101
+ ```
102
+
103
+ See the [Slack setup guide](https://researchloop.github.io/researchloop/slack/) for configuration.
104
+
105
+ ### CLI
106
+
107
+ ```bash
108
+ researchloop sprint run "idea" --study my-project # Submit a sprint
109
+ researchloop sprint list # List recent sprints
110
+ researchloop sprint show sp-a3f7b2 # View details
111
+ researchloop loop start --study my-project --count 5 # Auto-loop
112
+ researchloop loop stop loop-b4e1c9 # Stop a loop
113
+ ```
114
+
115
+ ## Customizing your studies
116
+
117
+ Each study can have its own context, cluster settings, and configuration:
118
+
119
+ ```toml
120
+ [[study]]
121
+ name = "sae-research"
122
+ cluster = "my-cluster"
123
+ max_sprint_duration_hours = 12
124
+ red_team_max_rounds = 2
125
+ allow_loop = true
126
+
127
+ # Tell Claude what this study is about and how to approach it
128
+ context = """
129
+ You are researching sparse autoencoder architectures.
130
+ Always train for 200M samples. Use batch size 1024.
131
+ Validate on the variation models listed in ~/reference/models.txt.
132
+ """
133
+
134
+ # Or point to a file with detailed instructions
135
+ claude_md_path = "./studies/sae-research/CLAUDE.md"
136
+
137
+ # Override GPU/memory for this study
138
+ [study.job_options]
139
+ gres = "gpu:a100:2"
140
+ mem = "128G"
141
+ ```
142
+
143
+ The context hierarchy is: **global** > **cluster** > **study**. All levels are merged and included in every sprint's prompt.
144
+
145
+ ## Deployment
146
+
147
+ For production, deploy the orchestrator as a Docker container on Fly.io, Railway, or any platform that supports persistent volumes:
148
+
149
+ ```bash
150
+ pip install researchloop
151
+ # See deployment guide for Docker/Fly.io setup
152
+ ```
153
+
154
+ Full deployment guide: [researchloop.github.io/researchloop/deployment](https://researchloop.github.io/researchloop/deployment/)
155
+
156
+ ## Documentation
157
+
158
+ Full docs at **[researchloop.github.io/researchloop](https://researchloop.github.io/researchloop/)**, including:
159
+
160
+ - [Configuration reference](https://researchloop.github.io/researchloop/configuration/) -- all TOML options and environment variables
161
+ - [Deployment guide](https://researchloop.github.io/researchloop/deployment/) -- Docker, Fly.io, SSH key setup
162
+ - [Dashboard guide](https://researchloop.github.io/researchloop/dashboard/) -- web UI features and authentication
163
+ - [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands, notifications
164
+ - [CLI reference](https://researchloop.github.io/researchloop/cli/) -- all commands with examples
165
+ - [Security](https://researchloop.github.io/researchloop/security/) -- authentication, CSRF, webhook tokens
166
+ - [Development](https://researchloop.github.io/researchloop/development/) -- contributing, testing, architecture
167
+
168
+ ## Contributing
169
+
170
+ ```bash
171
+ git clone https://github.com/researchloop/researchloop.git
172
+ cd researchloop
173
+ uv sync
174
+ uv run pytest tests/ -m "not integration" # Unit tests
175
+ uv run ruff check . && uv run pyright researchloop/ # Lint + type check
176
+ ```
177
+
178
+ Integration tests run against a real SLURM scheduler in Docker -- see [development guide](https://researchloop.github.io/researchloop/development/).
179
+
180
+ ## License
181
+
182
+ MIT
@@ -20,8 +20,7 @@ RUN curl -fsSL https://claude.ai/install.sh | bash
20
20
  # Install researchloop
21
21
  COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
22
22
  RUN uv venv /app/.venv && \
23
- uv pip install --python /app/.venv/bin/python --no-cache \
24
- "researchloop @ git+https://github.com/chanind/researchloop.git"
23
+ uv pip install --python /app/.venv/bin/python --no-cache researchloop
25
24
 
26
25
  WORKDIR /app
27
26
 
@@ -3,7 +3,7 @@
3
3
  ## Setup
4
4
 
5
5
  ```bash
6
- git clone https://github.com/chanind/researchloop.git
6
+ git clone https://github.com/researchloop/researchloop.git
7
7
  cd researchloop
8
8
  uv sync
9
9
  ```
@@ -3,24 +3,13 @@
3
3
  ## Prerequisites
4
4
 
5
5
  - **Python 3.10+**
6
- - **[uv](https://docs.astral.sh/uv/)** (recommended) or pip
7
6
  - **SSH access** to an HPC cluster with SLURM or SGE
8
7
  - **[Claude Code CLI](https://docs.anthropic.com/en/docs/claude-code)** installed and authenticated on the HPC cluster
9
8
 
10
9
  ## Installation
11
10
 
12
- ### From GitHub
13
-
14
- ```bash
15
- pip install git+https://github.com/chanind/researchloop.git
16
- ```
17
-
18
- ### For development
19
-
20
11
  ```bash
21
- git clone https://github.com/chanind/researchloop.git
22
- cd researchloop
23
- uv sync
12
+ pip install researchloop
24
13
  ```
25
14
 
26
15
  ## Initialize a project
@@ -2,6 +2,10 @@
2
2
 
3
3
  **Automated AI research sprints on HPC clusters.**
4
4
 
5
+ <video autoplay muted loop playsinline width="720" style="max-width:100%;height:auto;border-radius:6px">
6
+ <source src="assets/mmlu-combined.mp4" type="video/mp4">
7
+ </video>
8
+
5
9
  ---
6
10
 
7
11
  ResearchLoop automates multi-step AI research pipelines on SLURM and SGE clusters. You describe a research idea, and ResearchLoop submits it to your HPC cluster where [Claude Code](https://docs.anthropic.com/en/docs/claude-code) executes a full research pipeline -- coding, red-teaming, fixing, reporting -- inside a single job. Results are reported back via webhooks, Slack, or push notifications, and you can monitor everything from a web dashboard or the CLI.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "researchloop"
3
- version = "0.1.0"
3
+ version = "0.3.0"
4
4
  description = "Automated research sprint platform for HPC clusters"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -18,7 +18,8 @@ classifiers = [
18
18
  ]
19
19
  dependencies = [
20
20
  "click>=8.0,<9",
21
- "fastapi>=0.100,<1",
21
+ "fastapi>=0.100",
22
+ "starlette<1",
22
23
  "uvicorn[standard]>=0.20,<1",
23
24
  "jinja2>=3.1,<4",
24
25
  "aiosqlite>=0.19,<1",
@@ -0,0 +1 @@
1
+ __version__ = "0.3.0"