researchloop 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. {researchloop-0.3.0 → researchloop-0.3.1}/PKG-INFO +1 -1
  2. {researchloop-0.3.0 → researchloop-0.3.1}/pyproject.toml +1 -1
  3. researchloop-0.3.1/researchloop/__init__.py +1 -0
  4. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/clusters/monitor.py +21 -7
  5. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/core/orchestrator.py +8 -4
  6. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/routes.py +6 -11
  7. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/sprint_detail.html +5 -1
  8. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/templates/tweak.md.j2 +6 -1
  9. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/sprints/manager.py +85 -15
  10. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_sprint_manager.py +169 -0
  11. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_tweaks.py +112 -3
  12. {researchloop-0.3.0 → researchloop-0.3.1}/uv.lock +1 -1
  13. researchloop-0.3.0/researchloop/__init__.py +0 -1
  14. {researchloop-0.3.0 → researchloop-0.3.1}/.github/workflows/ci.yml +0 -0
  15. {researchloop-0.3.0 → researchloop-0.3.1}/.github/workflows/docs.yml +0 -0
  16. {researchloop-0.3.0 → researchloop-0.3.1}/.github/workflows/release.yml +0 -0
  17. {researchloop-0.3.0 → researchloop-0.3.1}/.gitignore +0 -0
  18. {researchloop-0.3.0 → researchloop-0.3.1}/CLAUDE.md +0 -0
  19. {researchloop-0.3.0 → researchloop-0.3.1}/Dockerfile +0 -0
  20. {researchloop-0.3.0 → researchloop-0.3.1}/LICENSE +0 -0
  21. {researchloop-0.3.0 → researchloop-0.3.1}/README.md +0 -0
  22. {researchloop-0.3.0 → researchloop-0.3.1}/docs/assets/mmlu-combined.gif +0 -0
  23. {researchloop-0.3.0 → researchloop-0.3.1}/docs/assets/mmlu-combined.mp4 +0 -0
  24. {researchloop-0.3.0 → researchloop-0.3.1}/docs/cli.md +0 -0
  25. {researchloop-0.3.0 → researchloop-0.3.1}/docs/configuration.md +0 -0
  26. {researchloop-0.3.0 → researchloop-0.3.1}/docs/dashboard.md +0 -0
  27. {researchloop-0.3.0 → researchloop-0.3.1}/docs/deployment.md +0 -0
  28. {researchloop-0.3.0 → researchloop-0.3.1}/docs/development.md +0 -0
  29. {researchloop-0.3.0 → researchloop-0.3.1}/docs/getting-started.md +0 -0
  30. {researchloop-0.3.0 → researchloop-0.3.1}/docs/index.md +0 -0
  31. {researchloop-0.3.0 → researchloop-0.3.1}/docs/security.md +0 -0
  32. {researchloop-0.3.0 → researchloop-0.3.1}/docs/slack.md +0 -0
  33. {researchloop-0.3.0 → researchloop-0.3.1}/mkdocs.yml +0 -0
  34. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/__main__.py +0 -0
  35. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/cli.py +0 -0
  36. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/clusters/__init__.py +0 -0
  37. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/clusters/ssh.py +0 -0
  38. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/comms/__init__.py +0 -0
  39. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/comms/base.py +0 -0
  40. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/comms/ntfy.py +0 -0
  41. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/comms/router.py +0 -0
  42. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/comms/slack.py +0 -0
  43. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/core/__init__.py +0 -0
  44. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/core/config.py +0 -0
  45. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/core/credentials.py +0 -0
  46. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/core/models.py +0 -0
  47. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/__init__.py +0 -0
  48. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/app.py +0 -0
  49. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/auth.py +0 -0
  50. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/base.html +0 -0
  51. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/login.html +0 -0
  52. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/loop_detail.html +0 -0
  53. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/loops.html +0 -0
  54. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/search.html +0 -0
  55. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/setup.html +0 -0
  56. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/sprints.html +0 -0
  57. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/studies.html +0 -0
  58. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/study_detail.html +0 -0
  59. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/study_form.html +0 -0
  60. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/dashboard/templates/tweak_detail.html +0 -0
  61. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/db/__init__.py +0 -0
  62. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/db/database.py +0 -0
  63. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/db/migrations.py +0 -0
  64. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/db/queries.py +0 -0
  65. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/__init__.py +0 -0
  66. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/claude.py +0 -0
  67. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/job_templates/sge.sh.j2 +0 -0
  68. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/job_templates/sge_tweak.sh.j2 +0 -0
  69. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/job_templates/slurm.sh.j2 +0 -0
  70. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/job_templates/slurm_tweak.sh.j2 +0 -0
  71. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/main.py +0 -0
  72. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/pipeline.py +0 -0
  73. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/templates/fix_issues.md.j2 +0 -0
  74. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/templates/idea_generator.md.j2 +0 -0
  75. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/templates/red_team.md.j2 +0 -0
  76. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/templates/report.md.j2 +0 -0
  77. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/templates/research_sprint.md.j2 +0 -0
  78. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/templates/summarizer.md.j2 +0 -0
  79. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/runner/upload.py +0 -0
  80. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/schedulers/__init__.py +0 -0
  81. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/schedulers/base.py +0 -0
  82. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/schedulers/local.py +0 -0
  83. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/schedulers/sge.py +0 -0
  84. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/schedulers/slurm.py +0 -0
  85. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/sprints/__init__.py +0 -0
  86. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/sprints/auto_loop.py +0 -0
  87. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/studies/__init__.py +0 -0
  88. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/studies/manager.py +0 -0
  89. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/testing/__init__.py +0 -0
  90. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/testing/slack_mock.py +0 -0
  91. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop/testing/slack_simulator.py +0 -0
  92. {researchloop-0.3.0 → researchloop-0.3.1}/researchloop.toml.example +0 -0
  93. {researchloop-0.3.0 → researchloop-0.3.1}/slack-app-manifest.yml +0 -0
  94. {researchloop-0.3.0 → researchloop-0.3.1}/tests/__init__.py +0 -0
  95. {researchloop-0.3.0 → researchloop-0.3.1}/tests/conftest.py +0 -0
  96. {researchloop-0.3.0 → researchloop-0.3.1}/tests/docker/sge/Dockerfile +0 -0
  97. {researchloop-0.3.0 → researchloop-0.3.1}/tests/docker/sge/entrypoint.sh +0 -0
  98. {researchloop-0.3.0 → researchloop-0.3.1}/tests/docker/sge/mock_claude.sh +0 -0
  99. {researchloop-0.3.0 → researchloop-0.3.1}/tests/docker/slurm/Dockerfile +0 -0
  100. {researchloop-0.3.0 → researchloop-0.3.1}/tests/docker/slurm/entrypoint.sh +0 -0
  101. {researchloop-0.3.0 → researchloop-0.3.1}/tests/docker/slurm/mock_claude.sh +0 -0
  102. {researchloop-0.3.0 → researchloop-0.3.1}/tests/integration/__init__.py +0 -0
  103. {researchloop-0.3.0 → researchloop-0.3.1}/tests/integration/conftest.py +0 -0
  104. {researchloop-0.3.0 → researchloop-0.3.1}/tests/integration/test_loop_advancement.py +0 -0
  105. {researchloop-0.3.0 → researchloop-0.3.1}/tests/integration/test_loop_and_monitor.py +0 -0
  106. {researchloop-0.3.0 → researchloop-0.3.1}/tests/integration/test_sge_scheduler.py +0 -0
  107. {researchloop-0.3.0 → researchloop-0.3.1}/tests/integration/test_slurm_scheduler.py +0 -0
  108. {researchloop-0.3.0 → researchloop-0.3.1}/tests/integration/test_sprint_slurm.py +0 -0
  109. {researchloop-0.3.0 → researchloop-0.3.1}/tests/integration/test_webhook_and_refresh.py +0 -0
  110. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_api.py +0 -0
  111. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_auto_loop.py +0 -0
  112. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_cli.py +0 -0
  113. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_config.py +0 -0
  114. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_dashboard.py +0 -0
  115. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_database.py +0 -0
  116. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_models.py +0 -0
  117. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_notification.py +0 -0
  118. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_queries.py +0 -0
  119. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_runner.py +0 -0
  120. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_schedulers.py +0 -0
  121. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_search.py +0 -0
  122. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_sge.py +0 -0
  123. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_slack.py +0 -0
  124. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_slack_events.py +0 -0
  125. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_slack_mock.py +0 -0
  126. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_slack_simulator.py +0 -0
  127. {researchloop-0.3.0 → researchloop-0.3.1}/tests/test_study_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: researchloop
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Automated research sprint platform for HPC clusters
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "researchloop"
3
- version = "0.3.0"
3
+ version = "0.3.1"
4
4
  description = "Automated research sprint platform for HPC clusters"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -0,0 +1 @@
1
+ __version__ = "0.3.1"
@@ -6,12 +6,15 @@ import asyncio
6
6
  import json
7
7
  import logging
8
8
  from datetime import datetime, timezone
9
- from typing import Any
9
+ from typing import TYPE_CHECKING, Any
10
10
 
11
11
  from researchloop.clusters.ssh import SSHManager
12
12
  from researchloop.db import queries
13
13
  from researchloop.schedulers.base import BaseScheduler
14
14
 
15
+ if TYPE_CHECKING:
16
+ from researchloop.sprints.manager import SprintManager
17
+
15
18
  logger = logging.getLogger(__name__)
16
19
 
17
20
  # If a job's heartbeat is older than this many seconds AND the job is not
@@ -28,11 +31,17 @@ class JobMonitor:
28
31
  db: Any,
29
32
  schedulers: dict[str, BaseScheduler],
30
33
  config: Any = None,
34
+ sprint_manager: SprintManager | None = None,
31
35
  ) -> None:
32
36
  self.ssh_manager = ssh_manager
33
37
  self.db = db
34
38
  self.schedulers = schedulers
35
39
  self.config = config
40
+ # Optional: when set, terminal-state transitions go through
41
+ # sprint_manager.mark_sprint_terminal so the parent auto-loop
42
+ # advances. None falls back to a direct DB update (used by
43
+ # minimal test fixtures that don't construct a SprintManager).
44
+ self.sprint_manager = sprint_manager
36
45
  self._polling_task: asyncio.Task[None] | None = None
37
46
  self._stop_event = asyncio.Event()
38
47
 
@@ -143,12 +152,17 @@ class JobMonitor:
143
152
  # Persist the updated status if it changed.
144
153
  if status in ("completed", "failed"):
145
154
  try:
146
- await queries.update_sprint(
147
- self.db,
148
- sprint_id,
149
- status=status,
150
- completed_at=datetime.now(timezone.utc).isoformat(),
151
- )
155
+ if self.sprint_manager is not None:
156
+ await self.sprint_manager.mark_sprint_terminal(
157
+ sprint_id, status
158
+ )
159
+ else:
160
+ await queries.update_sprint(
161
+ self.db,
162
+ sprint_id,
163
+ status=status,
164
+ completed_at=datetime.now(timezone.utc).isoformat(),
165
+ )
152
166
  except Exception:
153
167
  logger.exception(
154
168
  "Failed to update DB status for sprint %s", sprint_id
@@ -112,6 +112,9 @@ class Orchestrator:
112
112
  sprint_manager=self.sprint_manager,
113
113
  config=self.config,
114
114
  )
115
+ # Late-bind the back-reference so SprintManager.mark_sprint_terminal
116
+ # can advance the parent loop on every terminal transition.
117
+ self.sprint_manager.auto_loop = self.auto_loop
115
118
 
116
119
  # 8. Job monitor
117
120
  self.job_monitor = JobMonitor(
@@ -119,6 +122,7 @@ class Orchestrator:
119
122
  db=self.db,
120
123
  schedulers=self.schedulers,
121
124
  config=self.config,
125
+ sprint_manager=self.sprint_manager,
122
126
  )
123
127
  await self.job_monitor.start_polling()
124
128
 
@@ -405,6 +409,10 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
405
409
  {"ok": True, "sprint_id": sprint_id, "tweak_id": tweak_id}
406
410
  )
407
411
 
412
+ # handle_completion fires auto_loop.on_sprint_complete internally
413
+ # via mark_sprint_terminal — single chokepoint for terminal-state
414
+ # transitions, so the loop also advances when the JobMonitor or a
415
+ # dashboard refresh is the one that detects the terminal status.
408
416
  await orchestrator.sprint_manager.handle_completion(
409
417
  sprint_id=sprint_id,
410
418
  status=status,
@@ -413,10 +421,6 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
413
421
  idea=idea,
414
422
  )
415
423
 
416
- # Trigger auto-loop advancement if applicable.
417
- if orchestrator.auto_loop is not None:
418
- await orchestrator.auto_loop.on_sprint_complete(sprint_id)
419
-
420
424
  logger.info(
421
425
  "Webhook: sprint %s completion processed (status=%s)",
422
426
  sprint_id,
@@ -899,17 +899,12 @@ def add_dashboard_routes(
899
899
  }
900
900
  cur = sprint["status"]
901
901
  if real_status in terminal and cur not in terminal:
902
- from datetime import (
903
- datetime,
904
- timezone,
905
- )
906
-
907
- now = datetime.now(timezone.utc).isoformat()
908
- await queries.update_sprint(
909
- orchestrator.db,
910
- sprint_id,
911
- status=real_status,
912
- completed_at=now,
902
+ # Route through SprintManager so the parent
903
+ # auto-loop is advanced too — otherwise a
904
+ # webhook-less failure leaves the loop stuck
905
+ # in "running".
906
+ await orchestrator.sprint_manager.mark_sprint_terminal(
907
+ sprint_id, real_status
913
908
  )
914
909
 
915
910
  # Resolve sprints_base the same way
@@ -106,7 +106,7 @@
106
106
  </div>
107
107
  {% endif %}
108
108
 
109
- {% if sprint.status == 'completed' %}
109
+ {% if sprint.status in ('completed', 'failed', 'cancelled') %}
110
110
  <h3>Quick Tweak</h3>
111
111
  <div class="card">
112
112
  {% if tweak_active %}
@@ -116,7 +116,11 @@
116
116
  onsubmit="var b=this.querySelector('button[type=submit]');if(b.disabled)return false;b.disabled=true;b.textContent='Submitting...';">
117
117
  <input type="hidden" name="csrf_token" value="{{ csrf_token }}">
118
118
  <div class="form-group">
119
+ {% if sprint.status == 'completed' %}
119
120
  <textarea name="instruction" rows="3" placeholder="e.g. Fix the axis labels on the scatter plots, add a histogram of residuals"></textarea>
121
+ {% else %}
122
+ <textarea name="instruction" rows="3" placeholder="e.g. Retry with smaller batch size, or investigate the error and continue"></textarea>
123
+ {% endif %}
120
124
  </div>
121
125
  <details style="margin-bottom:0.75rem">
122
126
  <summary class="dim" style="cursor:pointer;font-size:0.85rem">Resource settings</summary>
@@ -1,4 +1,9 @@
1
- You are applying a quick tweak to a completed research sprint.
1
+ You are applying a quick tweak to a research sprint.
2
+
3
+ The sprint may have completed successfully, or it may have failed or
4
+ been cancelled before finishing. Check the existing files (findings.md,
5
+ report.md, results/, sprint logs) to see how far it got, then apply the
6
+ instruction below.
2
7
 
3
8
  ## Tweak Instruction
4
9
  {{ instruction }}
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
17
17
  from researchloop.core.config import Config
18
18
  from researchloop.db.database import Database
19
19
  from researchloop.schedulers.base import BaseScheduler
20
+ from researchloop.sprints.auto_loop import AutoLoopController
20
21
 
21
22
  from researchloop.core.models import (
22
23
  Sprint,
@@ -89,6 +90,9 @@ class SprintManager:
89
90
  self.schedulers = schedulers
90
91
  self.study_manager = study_manager
91
92
  self.notification_router = notification_router
93
+ # Late-bound by Orchestrator after AutoLoopController is built;
94
+ # circular dep otherwise (AutoLoopController takes a SprintManager).
95
+ self.auto_loop: AutoLoopController | None = None
92
96
 
93
97
  # ------------------------------------------------------------------
94
98
  # Create
@@ -613,6 +617,56 @@ class SprintManager:
613
617
  # Completion handling
614
618
  # ------------------------------------------------------------------
615
619
 
620
+ async def mark_sprint_terminal(
621
+ self,
622
+ sprint_id: str,
623
+ status: str,
624
+ error: str | None = None,
625
+ **extra_fields: Any,
626
+ ) -> bool:
627
+ """Transition a sprint to a terminal state and notify its auto-loop.
628
+
629
+ This is the single chokepoint for terminal-state transitions —
630
+ webhook, JobMonitor SSH polling, and dashboard refresh all flow
631
+ through here so a loop sprint that fails outside the webhook path
632
+ still advances the parent loop.
633
+
634
+ Idempotent: returns ``False`` (and skips the loop callback) if the
635
+ sprint is already terminal or doesn't exist, so callers can branch
636
+ on whether their call was the one that did the transition.
637
+ """
638
+ sprint = await queries.get_sprint(self.db, sprint_id)
639
+ if sprint is None:
640
+ return False
641
+ terminal = {
642
+ SprintStatus.COMPLETED.value,
643
+ SprintStatus.FAILED.value,
644
+ SprintStatus.CANCELLED.value,
645
+ }
646
+ if sprint.get("status") in terminal:
647
+ return False
648
+
649
+ update_kw: dict[str, Any] = {
650
+ "status": status,
651
+ "completed_at": datetime.now(timezone.utc).isoformat(),
652
+ }
653
+ if error is not None:
654
+ update_kw["error"] = error
655
+ update_kw.update(extra_fields)
656
+ await queries.update_sprint(self.db, sprint_id, **update_kw)
657
+
658
+ if self.auto_loop is not None and sprint.get("loop_id"):
659
+ try:
660
+ await self.auto_loop.on_sprint_complete(sprint_id)
661
+ except Exception:
662
+ logger.exception(
663
+ "Auto-loop advance failed for sprint %s; "
664
+ "loop may be stuck — manual intervention needed",
665
+ sprint_id,
666
+ )
667
+
668
+ return True
669
+
616
670
  async def handle_completion(
617
671
  self,
618
672
  sprint_id: str,
@@ -624,29 +678,34 @@ class SprintManager:
624
678
  """Handle a sprint completion event.
625
679
 
626
680
  Updates the database, sends notifications, and creates an event
627
- record.
681
+ record. Idempotent — if the sprint is already terminal (e.g. the
682
+ JobMonitor caught it first), the rich notification work is
683
+ skipped to avoid double-firing.
628
684
  """
629
- now = datetime.now(timezone.utc).isoformat()
630
-
631
- update_kw: dict[str, str | None] = {
632
- "status": status,
633
- "completed_at": now,
634
- "summary": summary,
635
- "error": error,
636
- }
685
+ extra: dict[str, Any] = {}
686
+ if summary is not None:
687
+ extra["summary"] = summary
637
688
 
638
689
  # Update the idea if it was auto-generated (sprint had idea=None).
639
690
  sprint_before = await queries.get_sprint(self.db, sprint_id)
640
691
  if sprint_before and not sprint_before.get("idea"):
641
692
  if idea:
642
- update_kw["idea"] = idea[:500]
693
+ extra["idea"] = idea[:500]
643
694
  else:
644
695
  # Fallback: try to read idea.txt from the cluster.
645
696
  fetched = await self._fetch_idea(sprint_before)
646
697
  if fetched:
647
- update_kw["idea"] = fetched[:500]
698
+ extra["idea"] = fetched[:500]
648
699
 
649
- await queries.update_sprint(self.db, sprint_id, **update_kw)
700
+ transitioned = await self.mark_sprint_terminal(
701
+ sprint_id, status, error=error, **extra
702
+ )
703
+ if not transitioned:
704
+ logger.info(
705
+ "Sprint %s already terminal; skipping completion processing",
706
+ sprint_id,
707
+ )
708
+ return
650
709
 
651
710
  sprint = await queries.get_sprint(self.db, sprint_id)
652
711
  study_name = sprint["study_name"] if sprint else "unknown"
@@ -857,7 +916,12 @@ class SprintManager:
857
916
  job_options: dict[str, str] | None = None,
858
917
  time_limit: str | None = None,
859
918
  ) -> str:
860
- """Submit a quick tweak job for a completed sprint.
919
+ """Submit a quick tweak job for a terminal sprint.
920
+
921
+ Allowed when the sprint is in a terminal state (completed, failed,
922
+ or cancelled) — failed/cancelled sprints often have partial state
923
+ worth iterating on with an instruction like "retry with smaller
924
+ batch size".
861
925
 
862
926
  If *time_limit* is None, the study's ``max_sprint_duration_hours``
863
927
  is used (same default as a regular sprint). Returns the tweak ID.
@@ -865,9 +929,15 @@ class SprintManager:
865
929
  sprint = await queries.get_sprint(self.db, sprint_id)
866
930
  if sprint is None:
867
931
  raise ValueError(f"Sprint not found: {sprint_id}")
868
- if sprint["status"] != SprintStatus.COMPLETED.value:
932
+ terminal = {
933
+ SprintStatus.COMPLETED.value,
934
+ SprintStatus.FAILED.value,
935
+ SprintStatus.CANCELLED.value,
936
+ }
937
+ if sprint["status"] not in terminal:
869
938
  raise ValueError(
870
- f"Sprint {sprint_id} is not completed (status={sprint['status']})"
939
+ f"Sprint {sprint_id} is not in a terminal state "
940
+ f"(status={sprint['status']}); wait for it to finish first"
871
941
  )
872
942
 
873
943
  # Reject if there's already a running tweak for this sprint.
@@ -319,6 +319,175 @@ class TestSprintManagerCompletion:
319
319
  mock_notifier.notify_sprint_completed.assert_called_once()
320
320
 
321
321
 
322
+ class TestMarkSprintTerminal:
323
+ """The single chokepoint for terminal-state transitions.
324
+
325
+ Webhook, JobMonitor SSH polling, and dashboard refresh all flow through
326
+ here so the parent auto-loop advances on every terminal transition,
327
+ including the no-webhook case (OOM kills, walltime, node failure).
328
+ """
329
+
330
+ async def test_advances_loop_on_failed(self, db_with_study, sample_config):
331
+ """The original bug: a failed loop sprint detected outside the
332
+ webhook path used to leave the loop hanging in 'running'.
333
+ """
334
+ mgr = SprintManager(
335
+ db=db_with_study,
336
+ config=sample_config,
337
+ ssh_manager=AsyncMock(),
338
+ schedulers={},
339
+ )
340
+ await queries.create_auto_loop(db_with_study, "loop-mst1", "test-study", 5)
341
+ sprint = await mgr.create_sprint("test-study", "idea")
342
+ await queries.update_sprint(
343
+ db_with_study,
344
+ sprint.id,
345
+ status="running",
346
+ loop_id="loop-mst1",
347
+ )
348
+ await queries.update_auto_loop(
349
+ db_with_study,
350
+ "loop-mst1",
351
+ current_sprint_id=sprint.id,
352
+ status="running",
353
+ )
354
+
355
+ from researchloop.sprints.auto_loop import AutoLoopController
356
+
357
+ mgr.auto_loop = AutoLoopController(
358
+ db=db_with_study, sprint_manager=mgr, config=sample_config
359
+ )
360
+
361
+ transitioned = await mgr.mark_sprint_terminal(sprint.id, "failed")
362
+ assert transitioned is True
363
+
364
+ loop = await queries.get_auto_loop(db_with_study, "loop-mst1")
365
+ assert loop is not None
366
+ assert loop["status"] == "failed"
367
+ sp = await queries.get_sprint(db_with_study, sprint.id)
368
+ assert sp["status"] == "failed"
369
+ assert sp["completed_at"] is not None
370
+
371
+ async def test_idempotent_when_already_terminal(self, db_with_study, sample_config):
372
+ """A second call must not advance the loop again — otherwise a
373
+ completed sprint plus a late JobMonitor sweep could submit a
374
+ duplicate next sprint.
375
+ """
376
+ mgr = SprintManager(
377
+ db=db_with_study,
378
+ config=sample_config,
379
+ ssh_manager=AsyncMock(),
380
+ schedulers={},
381
+ )
382
+ sprint = await mgr.create_sprint("test-study", "idea")
383
+ await queries.update_sprint(db_with_study, sprint.id, status="failed")
384
+
385
+ mgr.auto_loop = AsyncMock()
386
+ result = await mgr.mark_sprint_terminal(sprint.id, "failed")
387
+ assert result is False
388
+ mgr.auto_loop.on_sprint_complete.assert_not_called()
389
+
390
+ async def test_no_callback_when_not_in_loop(self, db_with_study, sample_config):
391
+ """Standalone sprints (no loop_id) don't trigger the callback."""
392
+ mgr = SprintManager(
393
+ db=db_with_study,
394
+ config=sample_config,
395
+ ssh_manager=AsyncMock(),
396
+ schedulers={},
397
+ )
398
+ sprint = await mgr.create_sprint("test-study", "idea")
399
+ await queries.update_sprint(db_with_study, sprint.id, status="running")
400
+
401
+ mgr.auto_loop = AsyncMock()
402
+ await mgr.mark_sprint_terminal(sprint.id, "completed")
403
+ mgr.auto_loop.on_sprint_complete.assert_not_called()
404
+
405
+ async def test_callback_failure_does_not_block_status_update(
406
+ self, db_with_study, sample_config
407
+ ):
408
+ """If the loop callback raises, the DB update still stands —
409
+ otherwise a transient on_sprint_complete bug would leave the
410
+ sprint in the wrong status forever.
411
+ """
412
+ mgr = SprintManager(
413
+ db=db_with_study,
414
+ config=sample_config,
415
+ ssh_manager=AsyncMock(),
416
+ schedulers={},
417
+ )
418
+ sprint = await mgr.create_sprint("test-study", "idea")
419
+ await queries.update_sprint(
420
+ db_with_study, sprint.id, status="running", loop_id="loop-x"
421
+ )
422
+
423
+ mgr.auto_loop = AsyncMock()
424
+ mgr.auto_loop.on_sprint_complete.side_effect = RuntimeError("boom")
425
+ result = await mgr.mark_sprint_terminal(sprint.id, "failed")
426
+ assert result is True
427
+ sp = await queries.get_sprint(db_with_study, sprint.id)
428
+ assert sp["status"] == "failed"
429
+
430
+
431
+ class TestJobMonitorAdvancesLoop:
432
+ """JobMonitor failure detection must advance the parent auto-loop.
433
+
434
+ Regression: the SSH-polling fallback used to bypass the auto-loop
435
+ callback, so loops whose sprint died without sending a webhook were
436
+ stranded in 'running' with their failed sprint as current_sprint_id.
437
+ """
438
+
439
+ async def test_failed_sprint_marks_loop_failed(self, db_with_study, sample_config):
440
+ from unittest.mock import patch
441
+
442
+ from researchloop.clusters.monitor import JobMonitor
443
+ from researchloop.sprints.auto_loop import AutoLoopController
444
+
445
+ mgr = SprintManager(
446
+ db=db_with_study,
447
+ config=sample_config,
448
+ ssh_manager=AsyncMock(),
449
+ schedulers={},
450
+ )
451
+ mgr.auto_loop = AutoLoopController(
452
+ db=db_with_study, sprint_manager=mgr, config=sample_config
453
+ )
454
+
455
+ await queries.create_auto_loop(db_with_study, "loop-jm1", "test-study", 5)
456
+ sprint = await mgr.create_sprint("test-study", "idea")
457
+ await queries.update_sprint(
458
+ db_with_study,
459
+ sprint.id,
460
+ status="running",
461
+ job_id="123",
462
+ loop_id="loop-jm1",
463
+ )
464
+ await queries.update_auto_loop(
465
+ db_with_study,
466
+ "loop-jm1",
467
+ current_sprint_id=sprint.id,
468
+ status="running",
469
+ )
470
+
471
+ monitor = JobMonitor(
472
+ ssh_manager=AsyncMock(),
473
+ db=db_with_study,
474
+ schedulers={},
475
+ sprint_manager=mgr,
476
+ )
477
+
478
+ with patch.object(
479
+ JobMonitor, "check_job", new=AsyncMock(return_value="failed")
480
+ ):
481
+ await monitor.poll_active_jobs()
482
+
483
+ loop = await queries.get_auto_loop(db_with_study, "loop-jm1")
484
+ assert loop is not None
485
+ assert loop["status"] == "failed", (
486
+ "Loop should advance to failed when JobMonitor catches a failure "
487
+ "the runner couldn't webhook about (OOM, walltime, node death)."
488
+ )
489
+
490
+
322
491
  def _make_config(
323
492
  tmp_path: Path,
324
493
  global_context: str = "",
@@ -170,10 +170,10 @@ class TestSubmitTweak:
170
170
  assert ssh_mock.run.call_count >= 2 # write script + chmod
171
171
  scheduler.submit.assert_called_once()
172
172
 
173
- async def test_submit_tweak_rejects_non_completed_sprint(
173
+ async def test_submit_tweak_rejects_non_terminal_sprint(
174
174
  self, db_with_study, sample_config
175
175
  ):
176
- """Should raise ValueError for non-completed sprints."""
176
+ """Should raise ValueError when the sprint is still pending/running."""
177
177
  mgr = SprintManager(
178
178
  db=db_with_study,
179
179
  config=sample_config,
@@ -186,7 +186,59 @@ class TestSubmitTweak:
186
186
  await mgr.submit_tweak(sprint.id, "some tweak")
187
187
  assert False, "Expected ValueError"
188
188
  except ValueError as e:
189
- assert "not completed" in str(e)
189
+ assert "terminal" in str(e)
190
+
191
+ async def test_submit_tweak_on_failed_sprint(self, db_with_study, tmp_path):
192
+ """Failed sprints accept tweaks too — useful for "retry with X"."""
193
+ config = _tweak_config(tmp_path)
194
+ ssh_mock = AsyncMock()
195
+ ssh_mgr = AsyncMock()
196
+ ssh_mgr.get_connection.return_value = ssh_mock
197
+ scheduler = AsyncMock()
198
+ scheduler.submit.return_value = "888"
199
+ study_mgr = StudyManager(db_with_study, config)
200
+
201
+ mgr = SprintManager(
202
+ db=db_with_study,
203
+ config=config,
204
+ ssh_manager=ssh_mgr,
205
+ schedulers={"slurm": scheduler},
206
+ study_manager=study_mgr,
207
+ )
208
+ sprint = await mgr.create_sprint("test-study", "original idea")
209
+ await queries.update_sprint(db_with_study, sprint.id, status="failed")
210
+
211
+ tweak_id = await mgr.submit_tweak(sprint.id, "retry with smaller batch size")
212
+
213
+ tweak = await queries.get_tweak(db_with_study, tweak_id)
214
+ assert tweak is not None
215
+ assert tweak["status"] == "submitted"
216
+
217
+ async def test_submit_tweak_on_cancelled_sprint(self, db_with_study, tmp_path):
218
+ """Cancelled sprints accept tweaks too."""
219
+ config = _tweak_config(tmp_path)
220
+ ssh_mock = AsyncMock()
221
+ ssh_mgr = AsyncMock()
222
+ ssh_mgr.get_connection.return_value = ssh_mock
223
+ scheduler = AsyncMock()
224
+ scheduler.submit.return_value = "777"
225
+ study_mgr = StudyManager(db_with_study, config)
226
+
227
+ mgr = SprintManager(
228
+ db=db_with_study,
229
+ config=config,
230
+ ssh_manager=ssh_mgr,
231
+ schedulers={"slurm": scheduler},
232
+ study_manager=study_mgr,
233
+ )
234
+ sprint = await mgr.create_sprint("test-study", "original idea")
235
+ await queries.update_sprint(db_with_study, sprint.id, status="cancelled")
236
+
237
+ tweak_id = await mgr.submit_tweak(sprint.id, "pick up where you left off")
238
+
239
+ tweak = await queries.get_tweak(db_with_study, tweak_id)
240
+ assert tweak is not None
241
+ assert tweak["status"] == "submitted"
190
242
 
191
243
  async def test_submit_tweak_defaults_to_study_time_limit(
192
244
  self, db_with_study, tmp_path
@@ -621,6 +673,63 @@ class TestTweakDashboard:
621
673
  assert "Quick Tweak" in resp.text
622
674
  assert 'name="instruction"' in resp.text
623
675
 
676
+ async def test_tweak_form_visible_on_failed_sprint(
677
+ self, db_with_study, sample_config
678
+ ):
679
+ """The tweak form must appear on failed/cancelled sprints too —
680
+ users iterate on partial state with instructions like "retry with
681
+ smaller batch size".
682
+ """
683
+ import tempfile
684
+
685
+ from fastapi.testclient import TestClient
686
+
687
+ from researchloop.core.config import DashboardConfig
688
+ from researchloop.core.orchestrator import Orchestrator, create_app
689
+
690
+ config = Config(
691
+ studies=sample_config.studies,
692
+ clusters=sample_config.clusters,
693
+ db_path=":memory:",
694
+ artifact_dir=tempfile.mkdtemp(),
695
+ dashboard=DashboardConfig(password_hash=None),
696
+ )
697
+ orch = Orchestrator(config)
698
+ app = create_app(orch)
699
+ client = TestClient(app)
700
+
701
+ with client:
702
+ assert orch.db is not None
703
+ from researchloop.dashboard.auth import hash_password
704
+
705
+ pw_hash = hash_password("testpass123")
706
+ await orch.db.execute(
707
+ "INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)",
708
+ ("dashboard_password_hash", pw_hash),
709
+ )
710
+ resp = client.post(
711
+ "/dashboard/login",
712
+ data={"password": "testpass123"},
713
+ follow_redirects=False,
714
+ )
715
+ cookies = dict(resp.cookies)
716
+
717
+ for sid, status in [
718
+ ("sp-twk-fail", "failed"),
719
+ ("sp-twk-canc", "cancelled"),
720
+ ]:
721
+ await queries.create_sprint(orch.db, sid, "test-study", "idea")
722
+ await queries.update_sprint(orch.db, sid, status=status)
723
+ resp = client.get(
724
+ f"/dashboard/sprints/{sid}",
725
+ cookies=cookies,
726
+ )
727
+ assert resp.status_code == 200, status
728
+ assert "Quick Tweak" in resp.text, (
729
+ f"Tweak form missing for {status} sprint"
730
+ )
731
+ assert 'name="instruction"' in resp.text
732
+
624
733
  async def test_tweak_form_hidden_on_running_sprint(
625
734
  self, db_with_study, sample_config
626
735
  ):
@@ -1152,7 +1152,7 @@ wheels = [
1152
1152
 
1153
1153
  [[package]]
1154
1154
  name = "researchloop"
1155
- version = "0.3.0"
1155
+ version = "0.3.1"
1156
1156
  source = { editable = "." }
1157
1157
  dependencies = [
1158
1158
  { name = "aiosqlite" },
@@ -1 +0,0 @@
1
- __version__ = "0.3.0"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes