researchloop 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {researchloop-0.1.0 → researchloop-0.2.0}/.github/workflows/ci.yml +5 -1
  2. {researchloop-0.1.0 → researchloop-0.2.0}/CLAUDE.md +4 -2
  3. researchloop-0.2.0/PKG-INFO +209 -0
  4. researchloop-0.2.0/README.md +178 -0
  5. {researchloop-0.1.0 → researchloop-0.2.0}/docs/deployment.md +1 -2
  6. {researchloop-0.1.0 → researchloop-0.2.0}/docs/development.md +1 -1
  7. {researchloop-0.1.0 → researchloop-0.2.0}/docs/getting-started.md +1 -12
  8. {researchloop-0.1.0 → researchloop-0.2.0}/pyproject.toml +3 -2
  9. researchloop-0.2.0/researchloop/__init__.py +1 -0
  10. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/cli.py +182 -0
  11. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/clusters/ssh.py +4 -1
  12. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/comms/conversation.py +8 -1
  13. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/comms/slack.py +2 -1
  14. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/core/models.py +5 -0
  15. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/core/orchestrator.py +19 -0
  16. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/routes.py +292 -11
  17. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/templates/base.html +6 -1
  18. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/templates/loops.html +13 -2
  19. researchloop-0.2.0/researchloop/dashboard/templates/search.html +35 -0
  20. researchloop-0.2.0/researchloop/dashboard/templates/sprint_detail.html +208 -0
  21. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/templates/study_detail.html +4 -4
  22. researchloop-0.2.0/researchloop/dashboard/templates/tweak_detail.html +71 -0
  23. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/db/migrations.py +14 -0
  24. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/db/queries.py +97 -0
  25. researchloop-0.2.0/researchloop/runner/job_templates/sge_tweak.sh.j2 +186 -0
  26. researchloop-0.2.0/researchloop/runner/job_templates/slurm_tweak.sh.j2 +184 -0
  27. researchloop-0.2.0/researchloop/runner/templates/report.md.j2 +61 -0
  28. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/templates/research_sprint.md.j2 +1 -0
  29. researchloop-0.2.0/researchloop/runner/templates/tweak.md.j2 +12 -0
  30. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/schedulers/sge.py +6 -7
  31. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/sprints/manager.py +431 -29
  32. researchloop-0.2.0/researchloop/testing/__init__.py +7 -0
  33. researchloop-0.2.0/researchloop/testing/slack_mock.py +273 -0
  34. researchloop-0.2.0/researchloop/testing/slack_simulator.py +341 -0
  35. researchloop-0.2.0/tests/docker/sge/Dockerfile +25 -0
  36. researchloop-0.2.0/tests/docker/sge/entrypoint.sh +72 -0
  37. researchloop-0.2.0/tests/docker/slurm/mock_claude.sh +41 -0
  38. {researchloop-0.1.0 → researchloop-0.2.0}/tests/integration/conftest.py +99 -0
  39. researchloop-0.2.0/tests/integration/test_sge_scheduler.py +197 -0
  40. {researchloop-0.1.0 → researchloop-0.2.0}/tests/integration/test_webhook_and_refresh.py +126 -17
  41. researchloop-0.2.0/tests/test_search.py +177 -0
  42. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_sge.py +1 -1
  43. researchloop-0.2.0/tests/test_slack_mock.py +421 -0
  44. researchloop-0.2.0/tests/test_slack_simulator.py +478 -0
  45. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_sprint_manager.py +72 -0
  46. researchloop-0.2.0/tests/test_tweaks.py +728 -0
  47. {researchloop-0.1.0 → researchloop-0.2.0}/uv.lock +4 -2
  48. researchloop-0.1.0/PKG-INFO +0 -596
  49. researchloop-0.1.0/README.md +0 -566
  50. researchloop-0.1.0/researchloop/__init__.py +0 -1
  51. researchloop-0.1.0/researchloop/dashboard/templates/sprint_detail.html +0 -109
  52. researchloop-0.1.0/researchloop/runner/templates/report.md.j2 +0 -31
  53. {researchloop-0.1.0 → researchloop-0.2.0}/.github/workflows/docs.yml +0 -0
  54. {researchloop-0.1.0 → researchloop-0.2.0}/.github/workflows/release.yml +0 -0
  55. {researchloop-0.1.0 → researchloop-0.2.0}/.gitignore +0 -0
  56. {researchloop-0.1.0 → researchloop-0.2.0}/Dockerfile +0 -0
  57. {researchloop-0.1.0 → researchloop-0.2.0}/LICENSE +0 -0
  58. {researchloop-0.1.0 → researchloop-0.2.0}/docs/cli.md +0 -0
  59. {researchloop-0.1.0 → researchloop-0.2.0}/docs/configuration.md +0 -0
  60. {researchloop-0.1.0 → researchloop-0.2.0}/docs/dashboard.md +0 -0
  61. {researchloop-0.1.0 → researchloop-0.2.0}/docs/index.md +0 -0
  62. {researchloop-0.1.0 → researchloop-0.2.0}/docs/security.md +0 -0
  63. {researchloop-0.1.0 → researchloop-0.2.0}/docs/slack.md +0 -0
  64. {researchloop-0.1.0 → researchloop-0.2.0}/mkdocs.yml +0 -0
  65. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/__main__.py +0 -0
  66. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/clusters/__init__.py +0 -0
  67. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/clusters/monitor.py +0 -0
  68. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/comms/__init__.py +0 -0
  69. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/comms/base.py +0 -0
  70. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/comms/ntfy.py +0 -0
  71. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/comms/router.py +0 -0
  72. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/core/__init__.py +0 -0
  73. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/core/auth.py +0 -0
  74. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/core/config.py +0 -0
  75. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/core/credentials.py +0 -0
  76. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/__init__.py +0 -0
  77. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/app.py +0 -0
  78. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/auth.py +0 -0
  79. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/templates/login.html +0 -0
  80. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/templates/loop_detail.html +0 -0
  81. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/templates/setup.html +0 -0
  82. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/templates/sprints.html +0 -0
  83. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/dashboard/templates/studies.html +0 -0
  84. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/db/__init__.py +0 -0
  85. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/db/database.py +0 -0
  86. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/__init__.py +0 -0
  87. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/claude.py +0 -0
  88. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/job_templates/sge.sh.j2 +0 -0
  89. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/job_templates/slurm.sh.j2 +0 -0
  90. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/main.py +0 -0
  91. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/pipeline.py +0 -0
  92. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/templates/fix_issues.md.j2 +0 -0
  93. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/templates/idea_generator.md.j2 +0 -0
  94. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/templates/red_team.md.j2 +0 -0
  95. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/templates/summarizer.md.j2 +0 -0
  96. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/runner/upload.py +0 -0
  97. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/schedulers/__init__.py +0 -0
  98. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/schedulers/base.py +0 -0
  99. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/schedulers/local.py +0 -0
  100. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/schedulers/slurm.py +0 -0
  101. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/sprints/__init__.py +0 -0
  102. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/sprints/auto_loop.py +0 -0
  103. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/studies/__init__.py +0 -0
  104. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop/studies/manager.py +0 -0
  105. {researchloop-0.1.0 → researchloop-0.2.0}/researchloop.toml.example +0 -0
  106. {researchloop-0.1.0 → researchloop-0.2.0}/slack-app-manifest.yml +0 -0
  107. {researchloop-0.1.0 → researchloop-0.2.0}/tests/__init__.py +0 -0
  108. {researchloop-0.1.0 → researchloop-0.2.0}/tests/conftest.py +0 -0
  109. {researchloop-0.1.0/tests/docker/slurm → researchloop-0.2.0/tests/docker/sge}/mock_claude.sh +0 -0
  110. {researchloop-0.1.0 → researchloop-0.2.0}/tests/docker/slurm/Dockerfile +0 -0
  111. {researchloop-0.1.0 → researchloop-0.2.0}/tests/docker/slurm/entrypoint.sh +0 -0
  112. {researchloop-0.1.0 → researchloop-0.2.0}/tests/integration/__init__.py +0 -0
  113. {researchloop-0.1.0 → researchloop-0.2.0}/tests/integration/test_loop_advancement.py +0 -0
  114. {researchloop-0.1.0 → researchloop-0.2.0}/tests/integration/test_loop_and_monitor.py +0 -0
  115. {researchloop-0.1.0 → researchloop-0.2.0}/tests/integration/test_slurm_scheduler.py +0 -0
  116. {researchloop-0.1.0 → researchloop-0.2.0}/tests/integration/test_sprint_slurm.py +0 -0
  117. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_api.py +0 -0
  118. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_auto_loop.py +0 -0
  119. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_cli.py +0 -0
  120. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_config.py +0 -0
  121. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_conversation.py +0 -0
  122. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_dashboard.py +0 -0
  123. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_database.py +0 -0
  124. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_models.py +0 -0
  125. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_notification.py +0 -0
  126. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_queries.py +0 -0
  127. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_runner.py +0 -0
  128. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_schedulers.py +0 -0
  129. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_slack.py +0 -0
  130. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_slack_events.py +0 -0
  131. {researchloop-0.1.0 → researchloop-0.2.0}/tests/test_study_manager.py +0 -0
@@ -67,7 +67,11 @@ jobs:
67
67
  - name: Build SLURM test container
68
68
  run: docker build -t researchloop-slurm-test tests/docker/slurm/
69
69
 
70
+ - name: Build SGE test container
71
+ run: docker build -t researchloop-sge-test tests/docker/sge/
72
+
70
73
  - name: Run integration tests
71
- run: uv run pytest tests/integration/ -v --tb=short -x --timeout=120
74
+ run: uv run pytest tests/integration/ -v --tb=short -x --timeout=180
72
75
  env:
73
76
  SLURM_TEST_IMAGE: researchloop-slurm-test
77
+ SGE_TEST_IMAGE: researchloop-sge-test
@@ -30,7 +30,7 @@ Python 3.10+, uv, asyncio throughout. Key deps: click (CLI), FastAPI (API + dash
30
30
 
31
31
  ```bash
32
32
  uv sync # install deps
33
- uv run pytest tests/ -v -m "not integration" # unit tests (339 tests, ~3s)
33
+ uv run pytest tests/ -v -m "not integration" # unit tests (374 tests, ~3s)
34
34
  uv run pytest tests/integration/ -v --timeout=120 # integration tests (needs Docker)
35
35
  uv run ruff check . # lint
36
36
  uv run ruff format . # format
@@ -116,7 +116,7 @@ researchloop/
116
116
 
117
117
  ## Database
118
118
 
119
- SQLite with 7 tables: `studies`, `sprints`, `auto_loops`, `artifacts`, `slack_sessions`, `events`, `settings`. Schema in `db/migrations.py`. All queries in `db/queries.py` use parameterized SQL and return plain dicts.
119
+ SQLite with 8 tables: `studies`, `sprints`, `tweaks`, `auto_loops`, `artifacts`, `slack_sessions`, `events`, `settings`. Schema in `db/migrations.py`. All queries in `db/queries.py` use parameterized SQL and return plain dicts.
120
120
 
121
121
  Key columns:
122
122
  - `sprints.webhook_token` — per-sprint token for webhook auth (generated at creation)
@@ -124,6 +124,8 @@ Key columns:
124
124
  - `sprints.metadata_json` — stores report text, has_pdf flag, heartbeat info
125
125
  - `sprints.error` — stores live progress (progress.md + output.log + tool log) during running sprints
126
126
  - `auto_loops.metadata_json` — stores loop context and job_options
127
+ - `tweaks.sprint_id` — links tweak to parent sprint (one sprint can have many tweaks)
128
+ - `tweaks.instruction` — the user's tweak request text
127
129
  - `settings` — key/value store for signing_key and dashboard_password_hash
128
130
 
129
131
  ## Key patterns
@@ -0,0 +1,209 @@
1
+ Metadata-Version: 2.4
2
+ Name: researchloop
3
+ Version: 0.2.0
4
+ Summary: Automated research sprint platform for HPC clusters
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: aiosqlite<1,>=0.19
18
+ Requires-Dist: asyncssh<3,>=2.14
19
+ Requires-Dist: bcrypt<5,>=4.0
20
+ Requires-Dist: click<9,>=8.0
21
+ Requires-Dist: fastapi>=0.100
22
+ Requires-Dist: httpx<1,>=0.24
23
+ Requires-Dist: itsdangerous<3,>=2.1
24
+ Requires-Dist: jinja2<4,>=3.1
25
+ Requires-Dist: markdown<4,>=3.4
26
+ Requires-Dist: python-multipart<1,>=0.0.6
27
+ Requires-Dist: starlette<1
28
+ Requires-Dist: tomli<3,>=2.0; python_version < '3.11'
29
+ Requires-Dist: uvicorn[standard]<1,>=0.20
30
+ Description-Content-Type: text/markdown
31
+
32
+ # ResearchLoop
33
+
34
+ **Run AI-automated research experiments on your HPC cluster. Monitor from anywhere.**
35
+
36
+ [![CI](https://github.com/researchloop/researchloop/actions/workflows/ci.yml/badge.svg)](https://github.com/researchloop/researchloop/actions/workflows/ci.yml)
37
+ [![PyPI](https://img.shields.io/pypi/v/researchloop.svg)](https://pypi.org/project/researchloop/)
38
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
39
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
40
+
41
+ ---
42
+
43
+ ResearchLoop submits AI-powered research experiments to your SLURM or SGE cluster, then reports back the results. You describe a research idea in natural language, it handles the rest: submitting the job, running a multi-step pipeline with [Claude Code](https://docs.anthropic.com/en/docs/claude-code), red-teaming the results, generating a report, and notifying you when it's done.
44
+
45
+ ```bash
46
+ pip install researchloop
47
+
48
+ # Submit an experiment to your cluster
49
+ researchloop sprint run "Investigate whether batch normalization improves convergence" --study my-project
50
+
51
+ # Start an auto-loop: 5 experiments, each building on the last
52
+ researchloop loop start --study my-project --count 5 --context "Focus on improving F1 score"
53
+ ```
54
+
55
+ Monitor everything from a web dashboard, Slack, or the CLI -- no need to SSH in and check on jobs.
56
+
57
+ ## Why ResearchLoop?
58
+
59
+ If you run experiments on shared HPC clusters, you know the pain: SSH in, write a script, submit with sbatch, wait, check logs, repeat. ResearchLoop automates this loop:
60
+
61
+ 1. **You describe what to investigate** (via CLI, dashboard, or Slack)
62
+ 2. **ResearchLoop submits a job** to your cluster via SSH
63
+ 3. **Claude runs the full experiment** -- writes code, runs it, analyzes results
64
+ 4. **A red-team step critiques the work** and Claude fixes any issues
65
+ 5. **You get a report** with a summary, PDF, and all artifacts
66
+
67
+ The **auto-loop** feature takes this further: after each experiment, Claude analyzes the results and proposes the next one. You set how many iterations, and walk away.
68
+
69
+ ## Get started in 5 minutes
70
+
71
+ **Prerequisites:** Python 3.10+, SSH access to an HPC cluster, [Claude Code](https://docs.anthropic.com/en/docs/claude-code) installed on the cluster.
72
+
73
+ ### 1. Install and initialize
74
+
75
+ ```bash
76
+ pip install researchloop
77
+ researchloop init
78
+ ```
79
+
80
+ ### 2. Edit `researchloop.toml`
81
+
82
+ ```toml
83
+ shared_secret = "pick-a-secret"
84
+ orchestrator_url = "http://localhost:8080"
85
+
86
+ [[cluster]]
87
+ name = "my-cluster"
88
+ host = "login.cluster.example.com"
89
+ user = "researcher"
90
+ key_path = "~/.ssh/id_ed25519"
91
+ scheduler_type = "slurm"
92
+ working_dir = "/scratch/researcher/researchloop"
93
+
94
+ [cluster.job_options]
95
+ gres = "gpu:1"
96
+ mem = "64G"
97
+ cpus-per-task = "8"
98
+
99
+ [[study]]
100
+ name = "my-project"
101
+ cluster = "my-cluster"
102
+ description = "Investigating X"
103
+ ```
104
+
105
+ ### 3. Start the server and run your first sprint
106
+
107
+ ```bash
108
+ researchloop serve &
109
+ researchloop connect http://localhost:8080
110
+ researchloop sprint run "Try approach X on dataset Y" --study my-project
111
+ ```
112
+
113
+ That's it. ResearchLoop SSHes to your cluster, submits the job, and you can monitor progress from the dashboard at `http://localhost:8080/dashboard/`.
114
+
115
+ ## Three ways to interact
116
+
117
+ ### Web dashboard
118
+
119
+ Browse to `/dashboard/` to see all your studies, sprints, and loops. Submit new sprints, start loops with custom GPU/memory settings, refresh live status from the cluster, and read reports -- all from the browser.
120
+
121
+ ### Slack bot
122
+
123
+ Chat with the bot to start sprints, check status, or discuss research ideas. The bot maintains conversation context across a thread, so you can have a back-and-forth about what to try next.
124
+
125
+ ```
126
+ You: What should I investigate next based on the results from sp-a3f7b2?
127
+ Bot: Based on the findings, I'd suggest... [ACTION: sprint_run {"study": "my-project", "idea": "..."}]
128
+ ```
129
+
130
+ See the [Slack setup guide](https://researchloop.github.io/researchloop/slack/) for configuration.
131
+
132
+ ### CLI
133
+
134
+ ```bash
135
+ researchloop sprint run "idea" --study my-project # Submit a sprint
136
+ researchloop sprint list # List recent sprints
137
+ researchloop sprint show sp-a3f7b2 # View details
138
+ researchloop loop start --study my-project --count 5 # Auto-loop
139
+ researchloop loop stop loop-b4e1c9 # Stop a loop
140
+ ```
141
+
142
+ ## Customizing your studies
143
+
144
+ Each study can have its own context, cluster settings, and configuration:
145
+
146
+ ```toml
147
+ [[study]]
148
+ name = "sae-research"
149
+ cluster = "my-cluster"
150
+ max_sprint_duration_hours = 12
151
+ red_team_max_rounds = 2
152
+ allow_loop = true
153
+
154
+ # Tell Claude what this study is about and how to approach it
155
+ context = """
156
+ You are researching sparse autoencoder architectures.
157
+ Always train for 200M samples. Use batch size 1024.
158
+ Validate on the variation models listed in ~/reference/models.txt.
159
+ """
160
+
161
+ # Or point to a file with detailed instructions
162
+ claude_md_path = "./studies/sae-research/CLAUDE.md"
163
+
164
+ # Override GPU/memory for this study
165
+ [study.job_options]
166
+ gres = "gpu:a100:2"
167
+ mem = "128G"
168
+ ```
169
+
170
+ The context hierarchy is: **global** > **cluster** > **study**. All levels are merged and included in every sprint's prompt.
171
+
172
+ ## Deployment
173
+
174
+ For production, deploy the orchestrator as a Docker container on Fly.io, Railway, or any platform that supports persistent volumes:
175
+
176
+ ```bash
177
+ pip install researchloop
178
+ # See deployment guide for Docker/Fly.io setup
179
+ ```
180
+
181
+ Full deployment guide: [researchloop.github.io/researchloop/deployment](https://researchloop.github.io/researchloop/deployment/)
182
+
183
+ ## Documentation
184
+
185
+ Full docs at **[researchloop.github.io/researchloop](https://researchloop.github.io/researchloop/)**, including:
186
+
187
+ - [Configuration reference](https://researchloop.github.io/researchloop/configuration/) -- all TOML options and environment variables
188
+ - [Deployment guide](https://researchloop.github.io/researchloop/deployment/) -- Docker, Fly.io, SSH key setup
189
+ - [Dashboard guide](https://researchloop.github.io/researchloop/dashboard/) -- web UI features and authentication
190
+ - [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands, conversational mode
191
+ - [CLI reference](https://researchloop.github.io/researchloop/cli/) -- all commands with examples
192
+ - [Security](https://researchloop.github.io/researchloop/security/) -- authentication, CSRF, webhook tokens
193
+ - [Development](https://researchloop.github.io/researchloop/development/) -- contributing, testing, architecture
194
+
195
+ ## Contributing
196
+
197
+ ```bash
198
+ git clone https://github.com/researchloop/researchloop.git
199
+ cd researchloop
200
+ uv sync
201
+ uv run pytest tests/ -m "not integration" # Unit tests
202
+ uv run ruff check . && uv run pyright researchloop/ # Lint + type check
203
+ ```
204
+
205
+ Integration tests run against a real SLURM scheduler in Docker -- see [development guide](https://researchloop.github.io/researchloop/development/).
206
+
207
+ ## License
208
+
209
+ MIT
@@ -0,0 +1,178 @@
1
+ # ResearchLoop
2
+
3
+ **Run AI-automated research experiments on your HPC cluster. Monitor from anywhere.**
4
+
5
+ [![CI](https://github.com/researchloop/researchloop/actions/workflows/ci.yml/badge.svg)](https://github.com/researchloop/researchloop/actions/workflows/ci.yml)
6
+ [![PyPI](https://img.shields.io/pypi/v/researchloop.svg)](https://pypi.org/project/researchloop/)
7
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
9
+
10
+ ---
11
+
12
+ ResearchLoop submits AI-powered research experiments to your SLURM or SGE cluster, then reports back the results. You describe a research idea in natural language, it handles the rest: submitting the job, running a multi-step pipeline with [Claude Code](https://docs.anthropic.com/en/docs/claude-code), red-teaming the results, generating a report, and notifying you when it's done.
13
+
14
+ ```bash
15
+ pip install researchloop
16
+
17
+ # Submit an experiment to your cluster
18
+ researchloop sprint run "Investigate whether batch normalization improves convergence" --study my-project
19
+
20
+ # Start an auto-loop: 5 experiments, each building on the last
21
+ researchloop loop start --study my-project --count 5 --context "Focus on improving F1 score"
22
+ ```
23
+
24
+ Monitor everything from a web dashboard, Slack, or the CLI -- no need to SSH in and check on jobs.
25
+
26
+ ## Why ResearchLoop?
27
+
28
+ If you run experiments on shared HPC clusters, you know the pain: SSH in, write a script, submit with sbatch, wait, check logs, repeat. ResearchLoop automates this loop:
29
+
30
+ 1. **You describe what to investigate** (via CLI, dashboard, or Slack)
31
+ 2. **ResearchLoop submits a job** to your cluster via SSH
32
+ 3. **Claude runs the full experiment** -- writes code, runs it, analyzes results
33
+ 4. **A red-team step critiques the work** and Claude fixes any issues
34
+ 5. **You get a report** with a summary, PDF, and all artifacts
35
+
36
+ The **auto-loop** feature takes this further: after each experiment, Claude analyzes the results and proposes the next one. You set how many iterations, and walk away.
37
+
38
+ ## Get started in 5 minutes
39
+
40
+ **Prerequisites:** Python 3.10+, SSH access to an HPC cluster, [Claude Code](https://docs.anthropic.com/en/docs/claude-code) installed on the cluster.
41
+
42
+ ### 1. Install and initialize
43
+
44
+ ```bash
45
+ pip install researchloop
46
+ researchloop init
47
+ ```
48
+
49
+ ### 2. Edit `researchloop.toml`
50
+
51
+ ```toml
52
+ shared_secret = "pick-a-secret"
53
+ orchestrator_url = "http://localhost:8080"
54
+
55
+ [[cluster]]
56
+ name = "my-cluster"
57
+ host = "login.cluster.example.com"
58
+ user = "researcher"
59
+ key_path = "~/.ssh/id_ed25519"
60
+ scheduler_type = "slurm"
61
+ working_dir = "/scratch/researcher/researchloop"
62
+
63
+ [cluster.job_options]
64
+ gres = "gpu:1"
65
+ mem = "64G"
66
+ cpus-per-task = "8"
67
+
68
+ [[study]]
69
+ name = "my-project"
70
+ cluster = "my-cluster"
71
+ description = "Investigating X"
72
+ ```
73
+
74
+ ### 3. Start the server and run your first sprint
75
+
76
+ ```bash
77
+ researchloop serve &
78
+ researchloop connect http://localhost:8080
79
+ researchloop sprint run "Try approach X on dataset Y" --study my-project
80
+ ```
81
+
82
+ That's it. ResearchLoop SSHes to your cluster, submits the job, and you can monitor progress from the dashboard at `http://localhost:8080/dashboard/`.
83
+
84
+ ## Three ways to interact
85
+
86
+ ### Web dashboard
87
+
88
+ Browse to `/dashboard/` to see all your studies, sprints, and loops. Submit new sprints, start loops with custom GPU/memory settings, refresh live status from the cluster, and read reports -- all from the browser.
89
+
90
+ ### Slack bot
91
+
92
+ Chat with the bot to start sprints, check status, or discuss research ideas. The bot maintains conversation context across a thread, so you can have a back-and-forth about what to try next.
93
+
94
+ ```
95
+ You: What should I investigate next based on the results from sp-a3f7b2?
96
+ Bot: Based on the findings, I'd suggest... [ACTION: sprint_run {"study": "my-project", "idea": "..."}]
97
+ ```
98
+
99
+ See the [Slack setup guide](https://researchloop.github.io/researchloop/slack/) for configuration.
100
+
101
+ ### CLI
102
+
103
+ ```bash
104
+ researchloop sprint run "idea" --study my-project # Submit a sprint
105
+ researchloop sprint list # List recent sprints
106
+ researchloop sprint show sp-a3f7b2 # View details
107
+ researchloop loop start --study my-project --count 5 # Auto-loop
108
+ researchloop loop stop loop-b4e1c9 # Stop a loop
109
+ ```
110
+
111
+ ## Customizing your studies
112
+
113
+ Each study can have its own context, cluster settings, and configuration:
114
+
115
+ ```toml
116
+ [[study]]
117
+ name = "sae-research"
118
+ cluster = "my-cluster"
119
+ max_sprint_duration_hours = 12
120
+ red_team_max_rounds = 2
121
+ allow_loop = true
122
+
123
+ # Tell Claude what this study is about and how to approach it
124
+ context = """
125
+ You are researching sparse autoencoder architectures.
126
+ Always train for 200M samples. Use batch size 1024.
127
+ Validate on the variation models listed in ~/reference/models.txt.
128
+ """
129
+
130
+ # Or point to a file with detailed instructions
131
+ claude_md_path = "./studies/sae-research/CLAUDE.md"
132
+
133
+ # Override GPU/memory for this study
134
+ [study.job_options]
135
+ gres = "gpu:a100:2"
136
+ mem = "128G"
137
+ ```
138
+
139
+ The context hierarchy is: **global** > **cluster** > **study**. All levels are merged and included in every sprint's prompt.
140
+
141
+ ## Deployment
142
+
143
+ For production, deploy the orchestrator as a Docker container on Fly.io, Railway, or any platform that supports persistent volumes:
144
+
145
+ ```bash
146
+ pip install researchloop
147
+ # See deployment guide for Docker/Fly.io setup
148
+ ```
149
+
150
+ Full deployment guide: [researchloop.github.io/researchloop/deployment](https://researchloop.github.io/researchloop/deployment/)
151
+
152
+ ## Documentation
153
+
154
+ Full docs at **[researchloop.github.io/researchloop](https://researchloop.github.io/researchloop/)**, including:
155
+
156
+ - [Configuration reference](https://researchloop.github.io/researchloop/configuration/) -- all TOML options and environment variables
157
+ - [Deployment guide](https://researchloop.github.io/researchloop/deployment/) -- Docker, Fly.io, SSH key setup
158
+ - [Dashboard guide](https://researchloop.github.io/researchloop/dashboard/) -- web UI features and authentication
159
+ - [Slack integration](https://researchloop.github.io/researchloop/slack/) -- setup, commands, conversational mode
160
+ - [CLI reference](https://researchloop.github.io/researchloop/cli/) -- all commands with examples
161
+ - [Security](https://researchloop.github.io/researchloop/security/) -- authentication, CSRF, webhook tokens
162
+ - [Development](https://researchloop.github.io/researchloop/development/) -- contributing, testing, architecture
163
+
164
+ ## Contributing
165
+
166
+ ```bash
167
+ git clone https://github.com/researchloop/researchloop.git
168
+ cd researchloop
169
+ uv sync
170
+ uv run pytest tests/ -m "not integration" # Unit tests
171
+ uv run ruff check . && uv run pyright researchloop/ # Lint + type check
172
+ ```
173
+
174
+ Integration tests run against a real SLURM scheduler in Docker -- see [development guide](https://researchloop.github.io/researchloop/development/).
175
+
176
+ ## License
177
+
178
+ MIT
@@ -20,8 +20,7 @@ RUN curl -fsSL https://claude.ai/install.sh | bash
20
20
  # Install researchloop
21
21
  COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
22
22
  RUN uv venv /app/.venv && \
23
- uv pip install --python /app/.venv/bin/python --no-cache \
24
- "researchloop @ git+https://github.com/chanind/researchloop.git"
23
+ uv pip install --python /app/.venv/bin/python --no-cache researchloop
25
24
 
26
25
  WORKDIR /app
27
26
 
@@ -3,7 +3,7 @@
3
3
  ## Setup
4
4
 
5
5
  ```bash
6
- git clone https://github.com/chanind/researchloop.git
6
+ git clone https://github.com/researchloop/researchloop.git
7
7
  cd researchloop
8
8
  uv sync
9
9
  ```
@@ -3,24 +3,13 @@
3
3
  ## Prerequisites
4
4
 
5
5
  - **Python 3.10+**
6
- - **[uv](https://docs.astral.sh/uv/)** (recommended) or pip
7
6
  - **SSH access** to an HPC cluster with SLURM or SGE
8
7
  - **[Claude Code CLI](https://docs.anthropic.com/en/docs/claude-code)** installed and authenticated on the HPC cluster
9
8
 
10
9
  ## Installation
11
10
 
12
- ### From GitHub
13
-
14
- ```bash
15
- pip install git+https://github.com/chanind/researchloop.git
16
- ```
17
-
18
- ### For development
19
-
20
11
  ```bash
21
- git clone https://github.com/chanind/researchloop.git
22
- cd researchloop
23
- uv sync
12
+ pip install researchloop
24
13
  ```
25
14
 
26
15
  ## Initialize a project
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "researchloop"
3
- version = "0.1.0"
3
+ version = "0.2.0"
4
4
  description = "Automated research sprint platform for HPC clusters"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -18,7 +18,8 @@ classifiers = [
18
18
  ]
19
19
  dependencies = [
20
20
  "click>=8.0,<9",
21
- "fastapi>=0.100,<1",
21
+ "fastapi>=0.100",
22
+ "starlette<1",
22
23
  "uvicorn[standard]>=0.20,<1",
23
24
  "jinja2>=3.1,<4",
24
25
  "aiosqlite>=0.19,<1",
@@ -0,0 +1 @@
1
+ __version__ = "0.2.0"
@@ -1136,3 +1136,185 @@ async def _cluster_check(config_path: str | None, cluster_name: str | None) -> N
1136
1136
  def cluster_check(ctx: click.Context, name: str | None) -> None:
1137
1137
  """Check cluster connectivity."""
1138
1138
  run_async(_cluster_check(ctx.obj.get("config_path"), name))
1139
+
1140
+
1141
+ # ===================================================================
1142
+ # Slack testing commands
1143
+ # ===================================================================
1144
+
1145
+
1146
+ @cli.command("test-slack")
1147
+ @click.argument("message")
1148
+ @click.option(
1149
+ "--url",
1150
+ default="http://localhost:8080",
1151
+ show_default=True,
1152
+ help="Orchestrator URL to send the event to",
1153
+ )
1154
+ @click.option(
1155
+ "--signing-secret",
1156
+ default="test_signing_secret",
1157
+ show_default=True,
1158
+ help="Slack signing secret for signature generation",
1159
+ )
1160
+ @click.option("--user", default="U_TEST", show_default=True, help="Slack user ID")
1161
+ @click.option("--channel", default="C_TEST", show_default=True, help="Slack channel ID")
1162
+ @click.option(
1163
+ "--thread-ts",
1164
+ default=None,
1165
+ help="Thread timestamp (for threaded replies)",
1166
+ )
1167
+ def test_slack(
1168
+ message: str,
1169
+ url: str,
1170
+ signing_secret: str,
1171
+ user: str,
1172
+ channel: str,
1173
+ thread_ts: str | None,
1174
+ ) -> None:
1175
+ """Send a signed Slack event to the orchestrator for testing.
1176
+
1177
+ Example::
1178
+
1179
+ researchloop test-slack "help" --url http://localhost:8080
1180
+
1181
+ researchloop test-slack "sprint run my-study some idea" \\
1182
+ --signing-secret abc123
1183
+ """
1184
+ import hashlib as _hashlib
1185
+ import hmac as _hmac
1186
+ import time as _time
1187
+ import uuid as _uuid
1188
+
1189
+ ts = thread_ts or f"{int(_time.time())}.000001"
1190
+
1191
+ event_id = f"Ev{_uuid.uuid4().hex[:10].upper()}"
1192
+ payload: dict[str, Any] = {
1193
+ "type": "event_callback",
1194
+ "event_id": event_id,
1195
+ "event": {
1196
+ "type": "message",
1197
+ "text": message,
1198
+ "user": user,
1199
+ "channel": channel,
1200
+ "channel_type": "channel",
1201
+ "ts": ts,
1202
+ },
1203
+ }
1204
+ if thread_ts:
1205
+ payload["event"]["thread_ts"] = thread_ts
1206
+
1207
+ body = json.dumps(payload).encode()
1208
+
1209
+ timestamp = str(int(_time.time()))
1210
+ basestring = f"v0:{timestamp}:{body.decode('utf-8')}"
1211
+ sig = (
1212
+ "v0="
1213
+ + _hmac.new(
1214
+ signing_secret.encode(),
1215
+ basestring.encode(),
1216
+ _hashlib.sha256,
1217
+ ).hexdigest()
1218
+ )
1219
+
1220
+ headers = {
1221
+ "Content-Type": "application/json",
1222
+ "X-Slack-Request-Timestamp": timestamp,
1223
+ "X-Slack-Signature": sig,
1224
+ }
1225
+
1226
+ target = url.rstrip("/") + "/api/slack/events"
1227
+ click.echo(f"Sending event to {target}")
1228
+ click.echo(f" User : {user}")
1229
+ click.echo(f" Channel: {channel}")
1230
+ click.echo(f" Text : {message}")
1231
+ if thread_ts:
1232
+ click.echo(f" Thread : {thread_ts}")
1233
+ click.echo()
1234
+
1235
+ try:
1236
+ resp = httpx.post(target, content=body, headers=headers, timeout=10)
1237
+ click.echo(click.style(f"Response [{resp.status_code}]:", bold=True))
1238
+ try:
1239
+ click.echo(json.dumps(resp.json(), indent=2))
1240
+ except Exception:
1241
+ click.echo(resp.text)
1242
+ except httpx.ConnectError:
1243
+ raise click.ClickException(f"Cannot connect to {target}")
1244
+ except httpx.TimeoutException:
1245
+ raise click.ClickException(f"Request timed out: {target}")
1246
+
1247
+
1248
+ @cli.command("mock-slack")
1249
+ @click.option(
1250
+ "--port",
1251
+ default=9876,
1252
+ show_default=True,
1253
+ type=int,
1254
+ help="Port to run the mock server on",
1255
+ )
1256
+ @click.option(
1257
+ "--host",
1258
+ default="127.0.0.1",
1259
+ show_default=True,
1260
+ help="Host to bind to",
1261
+ )
1262
+ @click.option(
1263
+ "--target-url",
1264
+ default="http://localhost:8080",
1265
+ show_default=True,
1266
+ help="Orchestrator URL for the /send-event endpoint",
1267
+ )
1268
+ @click.option(
1269
+ "--signing-secret",
1270
+ default="mock_signing_secret",
1271
+ show_default=True,
1272
+ help="Signing secret for generated events",
1273
+ )
1274
+ def mock_slack(
1275
+ port: int,
1276
+ host: str,
1277
+ target_url: str,
1278
+ signing_secret: str,
1279
+ ) -> None:
1280
+ """Start a mock Slack API server for local testing.
1281
+
1282
+ This server captures all outbound Slack API calls (postMessage,
1283
+ files.upload) and exposes inspection endpoints.
1284
+
1285
+ Example::
1286
+
1287
+ researchloop mock-slack --port 9876
1288
+
1289
+ Then set the environment variable before starting the orchestrator::
1290
+
1291
+ export RESEARCHLOOP_SLACK_API_URL=http://localhost:9876/api
1292
+ researchloop serve
1293
+ """
1294
+ import uvicorn
1295
+
1296
+ from researchloop.testing.slack_mock import create_mock_slack_app
1297
+
1298
+ app = create_mock_slack_app(
1299
+ target_url=target_url,
1300
+ signing_secret=signing_secret,
1301
+ )
1302
+
1303
+ api_url = f"http://{host}:{port}/api"
1304
+ click.echo()
1305
+ click.echo(click.style("Mock Slack API", fg="cyan", bold=True))
1306
+ click.echo()
1307
+ click.echo(f" Running at: {api_url}")
1308
+ click.echo()
1309
+ click.echo(
1310
+ " Set "
1311
+ + click.style(f"RESEARCHLOOP_SLACK_API_URL={api_url}", bold=True)
1312
+ + " to redirect Slack calls"
1313
+ )
1314
+ click.echo()
1315
+ click.echo(f" View captured messages: http://{host}:{port}/captured")
1316
+ click.echo(f" Clear captured: POST http://{host}:{port}/clear")
1317
+ click.echo(f" Send test event: POST http://{host}:{port}/send-event")
1318
+ click.echo()
1319
+
1320
+ uvicorn.run(app, host=host, port=port)