researchloop 0.3.2__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. {researchloop-0.3.2 → researchloop-0.3.4}/PKG-INFO +1 -1
  2. {researchloop-0.3.2 → researchloop-0.3.4}/pyproject.toml +1 -1
  3. researchloop-0.3.4/researchloop/__init__.py +1 -0
  4. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/job_templates/sge.sh.j2 +11 -0
  5. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/job_templates/slurm.sh.j2 +11 -0
  6. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/templates/fix_issues.md.j2 +1 -0
  7. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/templates/research_sprint.md.j2 +11 -0
  8. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_runner.py +43 -0
  9. {researchloop-0.3.2 → researchloop-0.3.4}/uv.lock +1 -1
  10. researchloop-0.3.2/researchloop/__init__.py +0 -1
  11. {researchloop-0.3.2 → researchloop-0.3.4}/.github/workflows/ci.yml +0 -0
  12. {researchloop-0.3.2 → researchloop-0.3.4}/.github/workflows/docs.yml +0 -0
  13. {researchloop-0.3.2 → researchloop-0.3.4}/.github/workflows/release.yml +0 -0
  14. {researchloop-0.3.2 → researchloop-0.3.4}/.gitignore +0 -0
  15. {researchloop-0.3.2 → researchloop-0.3.4}/CLAUDE.md +0 -0
  16. {researchloop-0.3.2 → researchloop-0.3.4}/Dockerfile +0 -0
  17. {researchloop-0.3.2 → researchloop-0.3.4}/LICENSE +0 -0
  18. {researchloop-0.3.2 → researchloop-0.3.4}/README.md +0 -0
  19. {researchloop-0.3.2 → researchloop-0.3.4}/docs/assets/mmlu-combined.gif +0 -0
  20. {researchloop-0.3.2 → researchloop-0.3.4}/docs/assets/mmlu-combined.mp4 +0 -0
  21. {researchloop-0.3.2 → researchloop-0.3.4}/docs/cli.md +0 -0
  22. {researchloop-0.3.2 → researchloop-0.3.4}/docs/configuration.md +0 -0
  23. {researchloop-0.3.2 → researchloop-0.3.4}/docs/dashboard.md +0 -0
  24. {researchloop-0.3.2 → researchloop-0.3.4}/docs/deployment.md +0 -0
  25. {researchloop-0.3.2 → researchloop-0.3.4}/docs/development.md +0 -0
  26. {researchloop-0.3.2 → researchloop-0.3.4}/docs/getting-started.md +0 -0
  27. {researchloop-0.3.2 → researchloop-0.3.4}/docs/index.md +0 -0
  28. {researchloop-0.3.2 → researchloop-0.3.4}/docs/security.md +0 -0
  29. {researchloop-0.3.2 → researchloop-0.3.4}/docs/slack.md +0 -0
  30. {researchloop-0.3.2 → researchloop-0.3.4}/mkdocs.yml +0 -0
  31. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/__main__.py +0 -0
  32. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/cli.py +0 -0
  33. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/clusters/__init__.py +0 -0
  34. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/clusters/monitor.py +0 -0
  35. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/clusters/ssh.py +0 -0
  36. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/comms/__init__.py +0 -0
  37. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/comms/base.py +0 -0
  38. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/comms/ntfy.py +0 -0
  39. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/comms/router.py +0 -0
  40. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/comms/slack.py +0 -0
  41. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/core/__init__.py +0 -0
  42. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/core/config.py +0 -0
  43. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/core/credentials.py +0 -0
  44. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/core/models.py +0 -0
  45. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/core/orchestrator.py +0 -0
  46. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/__init__.py +0 -0
  47. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/app.py +0 -0
  48. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/auth.py +0 -0
  49. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/routes.py +0 -0
  50. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/base.html +0 -0
  51. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/login.html +0 -0
  52. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/loop_detail.html +0 -0
  53. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/loops.html +0 -0
  54. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/search.html +0 -0
  55. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/setup.html +0 -0
  56. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/sprint_detail.html +0 -0
  57. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/sprints.html +0 -0
  58. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/studies.html +0 -0
  59. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/study_detail.html +0 -0
  60. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/study_form.html +0 -0
  61. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/dashboard/templates/tweak_detail.html +0 -0
  62. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/db/__init__.py +0 -0
  63. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/db/database.py +0 -0
  64. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/db/migrations.py +0 -0
  65. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/db/queries.py +0 -0
  66. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/__init__.py +0 -0
  67. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/claude.py +0 -0
  68. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/job_templates/sge_tweak.sh.j2 +0 -0
  69. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/job_templates/slurm_tweak.sh.j2 +0 -0
  70. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/main.py +0 -0
  71. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/pipeline.py +0 -0
  72. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/templates/idea_generator.md.j2 +0 -0
  73. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/templates/red_team.md.j2 +0 -0
  74. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/templates/report.md.j2 +0 -0
  75. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/templates/summarizer.md.j2 +0 -0
  76. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/templates/tweak.md.j2 +0 -0
  77. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/runner/upload.py +0 -0
  78. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/schedulers/__init__.py +0 -0
  79. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/schedulers/base.py +0 -0
  80. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/schedulers/local.py +0 -0
  81. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/schedulers/sge.py +0 -0
  82. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/schedulers/slurm.py +0 -0
  83. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/sprints/__init__.py +0 -0
  84. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/sprints/auto_loop.py +0 -0
  85. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/sprints/manager.py +0 -0
  86. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/studies/__init__.py +0 -0
  87. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/studies/manager.py +0 -0
  88. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/testing/__init__.py +0 -0
  89. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/testing/slack_mock.py +0 -0
  90. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop/testing/slack_simulator.py +0 -0
  91. {researchloop-0.3.2 → researchloop-0.3.4}/researchloop.toml.example +0 -0
  92. {researchloop-0.3.2 → researchloop-0.3.4}/slack-app-manifest.yml +0 -0
  93. {researchloop-0.3.2 → researchloop-0.3.4}/tests/__init__.py +0 -0
  94. {researchloop-0.3.2 → researchloop-0.3.4}/tests/conftest.py +0 -0
  95. {researchloop-0.3.2 → researchloop-0.3.4}/tests/docker/sge/Dockerfile +0 -0
  96. {researchloop-0.3.2 → researchloop-0.3.4}/tests/docker/sge/entrypoint.sh +0 -0
  97. {researchloop-0.3.2 → researchloop-0.3.4}/tests/docker/sge/mock_claude.sh +0 -0
  98. {researchloop-0.3.2 → researchloop-0.3.4}/tests/docker/slurm/Dockerfile +0 -0
  99. {researchloop-0.3.2 → researchloop-0.3.4}/tests/docker/slurm/entrypoint.sh +0 -0
  100. {researchloop-0.3.2 → researchloop-0.3.4}/tests/docker/slurm/mock_claude.sh +0 -0
  101. {researchloop-0.3.2 → researchloop-0.3.4}/tests/integration/__init__.py +0 -0
  102. {researchloop-0.3.2 → researchloop-0.3.4}/tests/integration/conftest.py +0 -0
  103. {researchloop-0.3.2 → researchloop-0.3.4}/tests/integration/test_loop_advancement.py +0 -0
  104. {researchloop-0.3.2 → researchloop-0.3.4}/tests/integration/test_loop_and_monitor.py +0 -0
  105. {researchloop-0.3.2 → researchloop-0.3.4}/tests/integration/test_sge_scheduler.py +0 -0
  106. {researchloop-0.3.2 → researchloop-0.3.4}/tests/integration/test_slurm_scheduler.py +0 -0
  107. {researchloop-0.3.2 → researchloop-0.3.4}/tests/integration/test_sprint_slurm.py +0 -0
  108. {researchloop-0.3.2 → researchloop-0.3.4}/tests/integration/test_webhook_and_refresh.py +0 -0
  109. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_api.py +0 -0
  110. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_auto_loop.py +0 -0
  111. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_cli.py +0 -0
  112. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_config.py +0 -0
  113. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_dashboard.py +0 -0
  114. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_database.py +0 -0
  115. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_models.py +0 -0
  116. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_notification.py +0 -0
  117. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_queries.py +0 -0
  118. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_schedulers.py +0 -0
  119. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_search.py +0 -0
  120. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_sge.py +0 -0
  121. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_slack.py +0 -0
  122. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_slack_events.py +0 -0
  123. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_slack_mock.py +0 -0
  124. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_slack_simulator.py +0 -0
  125. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_sprint_manager.py +0 -0
  126. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_study_manager.py +0 -0
  127. {researchloop-0.3.2 → researchloop-0.3.4}/tests/test_tweaks.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: researchloop
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: Automated research sprint platform for HPC clusters
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "researchloop"
3
- version = "0.3.2"
3
+ version = "0.3.4"
4
4
  description = "Automated research sprint platform for HPC clusters"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -0,0 +1 @@
1
+ __version__ = "0.3.4"
@@ -241,6 +241,17 @@ print(json.dumps({
241
241
  }
242
242
 
243
243
  _heartbeat_loop() {
244
+ # The parent script runs with `set -euo pipefail`, which is inherited
245
+ # by this backgrounded subshell. An unprotected command-substitution
246
+ # failure here (e.g. `ls -t pattern | head -1` racing into SIGPIPE on
247
+ # ls under pipefail, or a glob that matches nothing) is enough to kill
248
+ # the entire watchdog and leave the sprint without heartbeats or
249
+ # STUCK_PIPE detection for the rest of its wall-clock. Disable strict
250
+ # mode for the watchdog — a noisy heartbeat is far better than a
251
+ # silently-dead one.
252
+ set +e
253
+ set +o pipefail
254
+
244
255
  # Watchdog: detect and recover from hung pipelines.
245
256
  #
246
257
  # STUCK_PIPE warn: the active step's stream-json file goes silent for
@@ -243,6 +243,17 @@ print(json.dumps({
243
243
  }
244
244
 
245
245
  _heartbeat_loop() {
246
+ # The parent script runs with `set -euo pipefail`, which is inherited
247
+ # by this backgrounded subshell. An unprotected command-substitution
248
+ # failure here (e.g. `ls -t pattern | head -1` racing into SIGPIPE on
249
+ # ls under pipefail, or a glob that matches nothing) is enough to kill
250
+ # the entire watchdog and leave the sprint without heartbeats or
251
+ # STUCK_PIPE detection for the rest of its wall-clock. Disable strict
252
+ # mode for the watchdog — a noisy heartbeat is far better than a
253
+ # silently-dead one.
254
+ set +e
255
+ set +o pipefail
256
+
246
257
  # Watchdog: detect and recover from hung pipelines.
247
258
  #
248
259
  # STUCK_PIPE warn: the active step's stream-json file goes silent for
@@ -9,3 +9,4 @@ Review the file: red_team_round_{{ round_number }}.md
9
9
  - Document what you changed and why in fixes_round_{{ round_number }}.md
10
10
  - Do not break existing working functionality
11
11
  - Update progress.md with what you're fixing and the results
12
+ - Run any re-training or re-evaluation commands synchronously in the foreground. NEVER pass `run_in_background: true` on a Bash tool call, and do NOT use `nohup`, `disown`, `setsid`, or `&` to detach work — this is a one-shot `claude -p` session and any orphaned subprocess will be killed when you end your turn.
@@ -23,6 +23,17 @@ python train.py 2>&1 | tee -a output.log
23
23
 
24
24
  This lets the team monitor script output remotely. Always use `tee -a` (append mode) so all runs accumulate in the same log file. Do this for every script execution, training run, or evaluation.
25
25
 
26
+ ## Long-running commands — run synchronously, NEVER in the background
27
+ This sprint runs as a single one-shot `claude -p` invocation. There is no notification mechanism, no resumed turn, and no way to "wait and be notified". The moment you end your turn, the session is over and the sprint runner kills any detached subprocesses (training, evaluation, anything you started).
28
+
29
+ Rules:
30
+ - NEVER pass `run_in_background: true` on a Bash tool call. Always run training and evaluation in the foreground.
31
+ - NEVER use `nohup`, `disown`, `setsid`, `&`, `&>`, or any other shell-level backgrounding for work you care about. The orphan will be killed.
32
+ - If a single Bash call needs to run longer than its default timeout, raise the per-call `timeout` parameter instead of backgrounding it.
33
+ - The `PushNotification`, `Monitor`, `Cron*`, `Task`, `AskUserQuestion`, `EnterPlanMode`, and `EnterWorktree` tools are NOT available in this one-shot mode. Ignore them if they appear in your tool list.
34
+
35
+ Plan training as a series of synchronous foreground commands. Wait for each to finish before ending your turn.
36
+
26
37
  ## Progress Log
27
38
  Maintain a file called `progress.md` in the sprint directory. Update it regularly as you work — it's how the team monitors your progress remotely. Keep it concise and current:
28
39
 
@@ -102,6 +102,24 @@ class TestRenderTemplate:
102
102
  )
103
103
  assert "progress.md" in output
104
104
 
105
+ def test_research_template_forbids_backgrounding(self):
106
+ """claude -p is one-shot — backgrounded subprocesses get orphaned and
107
+ killed by the runner's pipeline-cleanup watchdog. The template must
108
+ tell claude not to use run_in_background or shell-level detach."""
109
+ output = render_template(
110
+ "research_sprint.md.j2",
111
+ study_context="Study context",
112
+ idea="test idea",
113
+ sprint_dir="/tmp/sprint",
114
+ )
115
+ assert "run_in_background" in output
116
+ assert "nohup" in output
117
+ assert "one-shot" in output
118
+
119
+ def test_fix_template_forbids_backgrounding(self):
120
+ output = render_template("fix_issues.md.j2", round_number=1)
121
+ assert "run_in_background" in output
122
+
105
123
  def test_red_team_template(self):
106
124
  output = render_template(
107
125
  "red_team.md.j2",
@@ -158,6 +176,21 @@ class TestJobScriptWatchdog:
158
176
  # Once-per-stuck-episode flag, not log-every-heartbeat.
159
177
  assert "stuck_warned" in script
160
178
 
179
+ def _assert_heartbeat_loop_disables_strict_mode(self, script: str) -> None:
180
+ # The parent script has set -euo pipefail. The backgrounded
181
+ # _heartbeat_loop inherits it — without explicitly disabling errexit
182
+ # and pipefail, a single command-substitution failure (e.g. an
183
+ # ls|head SIGPIPE race) can silently kill the watchdog and the
184
+ # sprint loses all heartbeat + STUCK_PIPE detection for the rest of
185
+ # its wall-clock. The first two `set` lines inside the function
186
+ # must disable both modes.
187
+ loop_idx = script.index("_heartbeat_loop() {")
188
+ body_idx = script.index("\n", loop_idx) + 1
189
+ # Check the disabling lines are at the top of the function body.
190
+ head = script[body_idx : body_idx + 800]
191
+ assert "set +e" in head
192
+ assert "set +o pipefail" in head
193
+
161
194
  def _assert_hung_pipeline_recovery_present(self, script: str) -> None:
162
195
  # claude must run in its own session so the watchdog can SIGTERM the
163
196
  # whole group (claude + any leaked Bash-tool subprocesses) by pgid.
@@ -191,3 +224,13 @@ class TestJobScriptWatchdog:
191
224
 
192
225
  def test_sge_template_includes_hung_pipeline_recovery(self):
193
226
  self._assert_hung_pipeline_recovery_present(_render_job_template("sge.sh.j2"))
227
+
228
+ def test_slurm_heartbeat_loop_disables_strict_mode(self):
229
+ self._assert_heartbeat_loop_disables_strict_mode(
230
+ _render_job_template("slurm.sh.j2")
231
+ )
232
+
233
+ def test_sge_heartbeat_loop_disables_strict_mode(self):
234
+ self._assert_heartbeat_loop_disables_strict_mode(
235
+ _render_job_template("sge.sh.j2")
236
+ )
@@ -1152,7 +1152,7 @@ wheels = [
1152
1152
 
1153
1153
  [[package]]
1154
1154
  name = "researchloop"
1155
- version = "0.3.2"
1155
+ version = "0.3.4"
1156
1156
  source = { editable = "." }
1157
1157
  dependencies = [
1158
1158
  { name = "aiosqlite" },
@@ -1 +0,0 @@
1
- __version__ = "0.3.2"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes