researchloop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- researchloop/__init__.py +1 -0
- researchloop/__main__.py +3 -0
- researchloop/cli.py +1138 -0
- researchloop/clusters/__init__.py +4 -0
- researchloop/clusters/monitor.py +199 -0
- researchloop/clusters/ssh.py +183 -0
- researchloop/comms/__init__.py +0 -0
- researchloop/comms/base.py +34 -0
- researchloop/comms/conversation.py +465 -0
- researchloop/comms/ntfy.py +95 -0
- researchloop/comms/router.py +71 -0
- researchloop/comms/slack.py +188 -0
- researchloop/core/__init__.py +0 -0
- researchloop/core/auth.py +78 -0
- researchloop/core/config.py +328 -0
- researchloop/core/credentials.py +38 -0
- researchloop/core/models.py +119 -0
- researchloop/core/orchestrator.py +910 -0
- researchloop/dashboard/__init__.py +0 -0
- researchloop/dashboard/app.py +15 -0
- researchloop/dashboard/auth.py +60 -0
- researchloop/dashboard/routes.py +912 -0
- researchloop/dashboard/templates/base.html +84 -0
- researchloop/dashboard/templates/login.html +12 -0
- researchloop/dashboard/templates/loop_detail.html +58 -0
- researchloop/dashboard/templates/loops.html +61 -0
- researchloop/dashboard/templates/setup.html +14 -0
- researchloop/dashboard/templates/sprint_detail.html +109 -0
- researchloop/dashboard/templates/sprints.html +48 -0
- researchloop/dashboard/templates/studies.html +18 -0
- researchloop/dashboard/templates/study_detail.html +64 -0
- researchloop/db/__init__.py +5 -0
- researchloop/db/database.py +86 -0
- researchloop/db/migrations.py +172 -0
- researchloop/db/queries.py +351 -0
- researchloop/runner/__init__.py +1 -0
- researchloop/runner/claude.py +169 -0
- researchloop/runner/job_templates/sge.sh.j2 +319 -0
- researchloop/runner/job_templates/slurm.sh.j2 +336 -0
- researchloop/runner/main.py +156 -0
- researchloop/runner/pipeline.py +272 -0
- researchloop/runner/templates/fix_issues.md.j2 +11 -0
- researchloop/runner/templates/idea_generator.md.j2 +16 -0
- researchloop/runner/templates/red_team.md.j2 +15 -0
- researchloop/runner/templates/report.md.j2 +31 -0
- researchloop/runner/templates/research_sprint.md.j2 +51 -0
- researchloop/runner/templates/summarizer.md.j2 +7 -0
- researchloop/runner/upload.py +153 -0
- researchloop/schedulers/__init__.py +11 -0
- researchloop/schedulers/base.py +43 -0
- researchloop/schedulers/local.py +188 -0
- researchloop/schedulers/sge.py +163 -0
- researchloop/schedulers/slurm.py +179 -0
- researchloop/sprints/__init__.py +0 -0
- researchloop/sprints/auto_loop.py +458 -0
- researchloop/sprints/manager.py +750 -0
- researchloop/studies/__init__.py +0 -0
- researchloop/studies/manager.py +102 -0
- researchloop-0.1.0.dist-info/METADATA +596 -0
- researchloop-0.1.0.dist-info/RECORD +63 -0
- researchloop-0.1.0.dist-info/WHEEL +4 -0
- researchloop-0.1.0.dist-info/entry_points.txt +3 -0
- researchloop-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Entry point for the ``researchloop-runner`` CLI.
|
|
2
|
+
|
|
3
|
+
This runs INSIDE a SLURM/SGE job on HPC clusters. It executes the
|
|
4
|
+
sub-agent pipeline and reports results back to the orchestrator.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import click
|
|
15
|
+
|
|
16
|
+
from researchloop.runner.pipeline import Pipeline
|
|
17
|
+
from researchloop.runner.upload import send_webhook, upload_artifacts
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger("researchloop.runner")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
async def _run_pipeline(
|
|
23
|
+
sprint_id: str,
|
|
24
|
+
sprint_dir: str,
|
|
25
|
+
claude_md: str,
|
|
26
|
+
idea: str,
|
|
27
|
+
orchestrator_url: str,
|
|
28
|
+
shared_secret: str,
|
|
29
|
+
red_team_rounds: int,
|
|
30
|
+
claude_command: str = "claude --dangerously-skip-permissions",
|
|
31
|
+
) -> None:
|
|
32
|
+
"""Execute the full pipeline and report back to the orchestrator."""
|
|
33
|
+
sprint_path = Path(sprint_dir)
|
|
34
|
+
sprint_path.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
(sprint_path / ".researchloop").mkdir(parents=True, exist_ok=True)
|
|
36
|
+
(sprint_path / "results").mkdir(parents=True, exist_ok=True)
|
|
37
|
+
|
|
38
|
+
pipeline = Pipeline(
|
|
39
|
+
sprint_id=sprint_id,
|
|
40
|
+
sprint_dir=sprint_dir,
|
|
41
|
+
claude_md=claude_md,
|
|
42
|
+
idea=idea,
|
|
43
|
+
orchestrator_url=orchestrator_url,
|
|
44
|
+
shared_secret=shared_secret,
|
|
45
|
+
red_team_rounds=red_team_rounds,
|
|
46
|
+
claude_command=claude_command,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
summary: str | None = None
|
|
50
|
+
error_msg: str | None = None
|
|
51
|
+
final_status = "completed"
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
summary = await pipeline.run()
|
|
55
|
+
except Exception:
|
|
56
|
+
logger.exception("Pipeline failed for sprint %s", sprint_id)
|
|
57
|
+
final_status = "failed"
|
|
58
|
+
error_msg = f"Pipeline error: {sys.exc_info()[1]}"
|
|
59
|
+
finally:
|
|
60
|
+
await pipeline.stop()
|
|
61
|
+
|
|
62
|
+
# Upload artifacts before sending the completion webhook so the
|
|
63
|
+
# orchestrator can access them immediately.
|
|
64
|
+
if final_status == "completed":
|
|
65
|
+
try:
|
|
66
|
+
uploaded = await upload_artifacts(
|
|
67
|
+
sprint_dir=sprint_dir,
|
|
68
|
+
orchestrator_url=orchestrator_url,
|
|
69
|
+
shared_secret=shared_secret,
|
|
70
|
+
sprint_id=sprint_id,
|
|
71
|
+
)
|
|
72
|
+
logger.info("Uploaded %d artifact(s)", len(uploaded))
|
|
73
|
+
except Exception:
|
|
74
|
+
logger.exception("Artifact upload failed for sprint %s", sprint_id)
|
|
75
|
+
|
|
76
|
+
# Notify orchestrator of completion (or failure).
|
|
77
|
+
try:
|
|
78
|
+
await send_webhook(
|
|
79
|
+
orchestrator_url=orchestrator_url,
|
|
80
|
+
shared_secret=shared_secret,
|
|
81
|
+
sprint_id=sprint_id,
|
|
82
|
+
status=final_status,
|
|
83
|
+
summary=summary,
|
|
84
|
+
error=error_msg,
|
|
85
|
+
)
|
|
86
|
+
except Exception:
|
|
87
|
+
logger.exception("Failed to send completion webhook for sprint %s", sprint_id)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@click.group()
|
|
91
|
+
def cli() -> None:
|
|
92
|
+
"""ResearchLoop sprint runner - executes inside HPC jobs."""
|
|
93
|
+
logging.basicConfig(
|
|
94
|
+
level=logging.INFO,
|
|
95
|
+
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@cli.command()
|
|
100
|
+
@click.option("--sprint-id", required=True, help="Unique sprint identifier")
|
|
101
|
+
@click.option("--sprint-dir", required=True, help="Working directory for this sprint")
|
|
102
|
+
@click.option("--claude-md", required=True, help="Path to the study's CLAUDE.md")
|
|
103
|
+
@click.option("--idea", required=True, help="Research idea / prompt for this sprint")
|
|
104
|
+
@click.option(
|
|
105
|
+
"--orchestrator-url", required=True, help="Base URL of the orchestrator API"
|
|
106
|
+
)
|
|
107
|
+
@click.option(
|
|
108
|
+
"--shared-secret", required=True, help="Shared secret for orchestrator auth"
|
|
109
|
+
)
|
|
110
|
+
@click.option(
|
|
111
|
+
"--red-team-rounds",
|
|
112
|
+
default=3,
|
|
113
|
+
show_default=True,
|
|
114
|
+
help="Maximum number of red-team / fix rounds",
|
|
115
|
+
)
|
|
116
|
+
@click.option(
|
|
117
|
+
"--claude-command",
|
|
118
|
+
default="claude --dangerously-skip-permissions",
|
|
119
|
+
show_default=True,
|
|
120
|
+
help="Command to invoke Claude CLI",
|
|
121
|
+
)
|
|
122
|
+
def run(
|
|
123
|
+
sprint_id: str,
|
|
124
|
+
sprint_dir: str,
|
|
125
|
+
claude_md: str,
|
|
126
|
+
idea: str,
|
|
127
|
+
orchestrator_url: str,
|
|
128
|
+
shared_secret: str,
|
|
129
|
+
red_team_rounds: int,
|
|
130
|
+
claude_command: str,
|
|
131
|
+
) -> None:
|
|
132
|
+
"""Run the full research sprint pipeline."""
|
|
133
|
+
logger.info(
|
|
134
|
+
"Starting sprint %s in %s (red-team rounds: %d)",
|
|
135
|
+
sprint_id,
|
|
136
|
+
sprint_dir,
|
|
137
|
+
red_team_rounds,
|
|
138
|
+
)
|
|
139
|
+
asyncio.run(
|
|
140
|
+
_run_pipeline(
|
|
141
|
+
sprint_id=sprint_id,
|
|
142
|
+
sprint_dir=sprint_dir,
|
|
143
|
+
claude_md=claude_md,
|
|
144
|
+
idea=idea,
|
|
145
|
+
orchestrator_url=orchestrator_url,
|
|
146
|
+
shared_secret=shared_secret,
|
|
147
|
+
red_team_rounds=red_team_rounds,
|
|
148
|
+
claude_command=claude_command,
|
|
149
|
+
)
|
|
150
|
+
)
|
|
151
|
+
logger.info("Sprint %s finished.", sprint_id)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def main() -> None:
|
|
155
|
+
"""Package entry point for ``researchloop-runner``."""
|
|
156
|
+
cli()
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
"""Sub-agent pipeline orchestration.
|
|
2
|
+
|
|
3
|
+
Each step invokes the Claude CLI as a subprocess and writes progress into
|
|
4
|
+
``.researchloop/status.json`` so that the orchestrator can track liveness.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from researchloop.runner.claude import render_template, run_claude
|
|
17
|
+
from researchloop.runner.upload import send_heartbeat
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
# Step labels (indexed from 1).
|
|
22
|
+
_STEP_LABELS: list[str] = [
|
|
23
|
+
"research",
|
|
24
|
+
"red_team",
|
|
25
|
+
"report",
|
|
26
|
+
"summarize",
|
|
27
|
+
]
|
|
28
|
+
_TOTAL_STEPS = len(_STEP_LABELS) + 1 # +1 because red_team counts as 2 (loop)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Pipeline:
|
|
32
|
+
"""Runs the full research sprint pipeline inside an HPC job."""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
sprint_id: str,
|
|
37
|
+
sprint_dir: str,
|
|
38
|
+
claude_md: str,
|
|
39
|
+
idea: str,
|
|
40
|
+
orchestrator_url: str,
|
|
41
|
+
shared_secret: str,
|
|
42
|
+
red_team_rounds: int = 3,
|
|
43
|
+
claude_command: str = "claude --dangerously-skip-permissions",
|
|
44
|
+
) -> None:
|
|
45
|
+
self.sprint_id = sprint_id
|
|
46
|
+
self.sprint_dir = sprint_dir
|
|
47
|
+
self.claude_md = claude_md
|
|
48
|
+
self.idea = idea
|
|
49
|
+
self.orchestrator_url = orchestrator_url
|
|
50
|
+
self.shared_secret = shared_secret
|
|
51
|
+
self.red_team_rounds = red_team_rounds
|
|
52
|
+
self.claude_command = claude_command
|
|
53
|
+
|
|
54
|
+
self._started_at = datetime.now(timezone.utc).isoformat()
|
|
55
|
+
self._status_path = Path(sprint_dir) / ".researchloop" / "status.json"
|
|
56
|
+
self._heartbeat_task: asyncio.Task[None] | None = None
|
|
57
|
+
self._session_id: str | None = None
|
|
58
|
+
|
|
59
|
+
# ------------------------------------------------------------------
|
|
60
|
+
# Public interface
|
|
61
|
+
# ------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
async def run(self) -> str | None:
|
|
64
|
+
"""Execute the full pipeline and return the summary text (or None)."""
|
|
65
|
+
self._start_heartbeat()
|
|
66
|
+
|
|
67
|
+
# Read study context from CLAUDE.md if available.
|
|
68
|
+
study_context = ""
|
|
69
|
+
claude_md_path = Path(self.claude_md)
|
|
70
|
+
if claude_md_path.exists():
|
|
71
|
+
study_context = claude_md_path.read_text(encoding="utf-8")
|
|
72
|
+
|
|
73
|
+
# Step 1 - Research
|
|
74
|
+
await self._update_status("research", step=1)
|
|
75
|
+
research_prompt = render_template(
|
|
76
|
+
"research_sprint.md.j2",
|
|
77
|
+
study_context=study_context,
|
|
78
|
+
idea=self.idea,
|
|
79
|
+
sprint_dir=self.sprint_dir,
|
|
80
|
+
)
|
|
81
|
+
output, self._session_id = await run_claude(
|
|
82
|
+
prompt=research_prompt,
|
|
83
|
+
working_dir=self.sprint_dir,
|
|
84
|
+
claude_md=self.claude_md,
|
|
85
|
+
claude_command=self.claude_command,
|
|
86
|
+
)
|
|
87
|
+
logger.info("Research step complete (%d chars output)", len(output))
|
|
88
|
+
|
|
89
|
+
# Step 2 - Red-team / fix loop
|
|
90
|
+
await self._update_status("red_team", step=2)
|
|
91
|
+
for round_num in range(1, self.red_team_rounds + 1):
|
|
92
|
+
substep = f"round_{round_num}"
|
|
93
|
+
await self._update_status("red_team", step=2, substep=substep)
|
|
94
|
+
|
|
95
|
+
# Run the red-team critique.
|
|
96
|
+
rt_prompt = render_template(
|
|
97
|
+
"red_team.md.j2",
|
|
98
|
+
idea=self.idea,
|
|
99
|
+
round_number=round_num,
|
|
100
|
+
max_rounds=self.red_team_rounds,
|
|
101
|
+
)
|
|
102
|
+
rt_output, self._session_id = await run_claude(
|
|
103
|
+
prompt=rt_prompt,
|
|
104
|
+
working_dir=self.sprint_dir,
|
|
105
|
+
claude_md=self.claude_md,
|
|
106
|
+
session_id=self._session_id,
|
|
107
|
+
claude_command=self.claude_command,
|
|
108
|
+
)
|
|
109
|
+
logger.info(
|
|
110
|
+
"Red-team round %d complete (%d chars)", round_num, len(rt_output)
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Check whether the red-team found critical issues.
|
|
114
|
+
rt_file = Path(self.sprint_dir) / f"red_team_round_{round_num}.md"
|
|
115
|
+
if rt_file.exists():
|
|
116
|
+
content = rt_file.read_text(encoding="utf-8")
|
|
117
|
+
if "NO CRITICAL ISSUES" in content:
|
|
118
|
+
logger.info(
|
|
119
|
+
"Red-team round %d: no critical issues, stopping loop.",
|
|
120
|
+
round_num,
|
|
121
|
+
)
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
# Run fix step for this round.
|
|
125
|
+
await self._update_status("red_team", step=2, substep=f"fix_{round_num}")
|
|
126
|
+
fix_prompt = render_template(
|
|
127
|
+
"fix_issues.md.j2",
|
|
128
|
+
round_number=round_num,
|
|
129
|
+
)
|
|
130
|
+
fix_output, self._session_id = await run_claude(
|
|
131
|
+
prompt=fix_prompt,
|
|
132
|
+
working_dir=self.sprint_dir,
|
|
133
|
+
claude_md=self.claude_md,
|
|
134
|
+
session_id=self._session_id,
|
|
135
|
+
claude_command=self.claude_command,
|
|
136
|
+
)
|
|
137
|
+
logger.info("Fix round %d complete (%d chars)", round_num, len(fix_output))
|
|
138
|
+
|
|
139
|
+
# Step 3 - Report
|
|
140
|
+
await self._update_status("report", step=3)
|
|
141
|
+
report_prompt = render_template(
|
|
142
|
+
"report.md.j2",
|
|
143
|
+
idea=self.idea,
|
|
144
|
+
)
|
|
145
|
+
report_output, self._session_id = await run_claude(
|
|
146
|
+
prompt=report_prompt,
|
|
147
|
+
working_dir=self.sprint_dir,
|
|
148
|
+
claude_md=self.claude_md,
|
|
149
|
+
session_id=self._session_id,
|
|
150
|
+
)
|
|
151
|
+
logger.info("Report step complete (%d chars)", len(report_output))
|
|
152
|
+
|
|
153
|
+
# Step 4 - Summarize
|
|
154
|
+
await self._update_status("summarize", step=4)
|
|
155
|
+
summary_prompt = render_template("summarizer.md.j2")
|
|
156
|
+
summary_output, self._session_id = await run_claude(
|
|
157
|
+
prompt=summary_prompt,
|
|
158
|
+
working_dir=self.sprint_dir,
|
|
159
|
+
claude_md=self.claude_md,
|
|
160
|
+
session_id=self._session_id,
|
|
161
|
+
)
|
|
162
|
+
logger.info("Summary step complete (%d chars)", len(summary_output))
|
|
163
|
+
|
|
164
|
+
# Read summary.txt written by the summarizer agent.
|
|
165
|
+
summary_path = Path(self.sprint_dir) / "summary.txt"
|
|
166
|
+
summary: str | None = None
|
|
167
|
+
if summary_path.exists():
|
|
168
|
+
summary = summary_path.read_text(encoding="utf-8").strip()
|
|
169
|
+
|
|
170
|
+
await self._update_status("completed", step=_TOTAL_STEPS)
|
|
171
|
+
return summary
|
|
172
|
+
|
|
173
|
+
async def stop(self) -> None:
|
|
174
|
+
"""Clean up background tasks."""
|
|
175
|
+
await self._stop_heartbeat()
|
|
176
|
+
|
|
177
|
+
# ------------------------------------------------------------------
|
|
178
|
+
# Status tracking
|
|
179
|
+
# ------------------------------------------------------------------
|
|
180
|
+
|
|
181
|
+
async def _update_status(
|
|
182
|
+
self,
|
|
183
|
+
status: str,
|
|
184
|
+
step: int = 0,
|
|
185
|
+
substep: str | None = None,
|
|
186
|
+
error: str | None = None,
|
|
187
|
+
) -> None:
|
|
188
|
+
"""Write the current status to ``.researchloop/status.json``."""
|
|
189
|
+
data: dict[str, Any] = {
|
|
190
|
+
"sprint_id": self.sprint_id,
|
|
191
|
+
"status": status,
|
|
192
|
+
"step": step,
|
|
193
|
+
"total_steps": _TOTAL_STEPS,
|
|
194
|
+
"substep": substep,
|
|
195
|
+
"heartbeat": datetime.now(timezone.utc).isoformat(),
|
|
196
|
+
"started_at": self._started_at,
|
|
197
|
+
"error": error,
|
|
198
|
+
}
|
|
199
|
+
self._status_path.parent.mkdir(parents=True, exist_ok=True)
|
|
200
|
+
self._status_path.write_text(
|
|
201
|
+
json.dumps(data, indent=2) + "\n", encoding="utf-8"
|
|
202
|
+
)
|
|
203
|
+
logger.info("Status updated: %s step=%d substep=%s", status, step, substep)
|
|
204
|
+
|
|
205
|
+
# Best-effort heartbeat to orchestrator.
|
|
206
|
+
try:
|
|
207
|
+
await send_heartbeat(
|
|
208
|
+
orchestrator_url=self.orchestrator_url,
|
|
209
|
+
shared_secret=self.shared_secret,
|
|
210
|
+
sprint_id=self.sprint_id,
|
|
211
|
+
status=status,
|
|
212
|
+
step=step,
|
|
213
|
+
)
|
|
214
|
+
except Exception:
|
|
215
|
+
logger.debug("Heartbeat POST failed (non-fatal)", exc_info=True)
|
|
216
|
+
|
|
217
|
+
# ------------------------------------------------------------------
|
|
218
|
+
# Heartbeat background task
|
|
219
|
+
# ------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
def _start_heartbeat(self) -> None:
|
|
222
|
+
"""Start a background task that updates status.json every 60 seconds."""
|
|
223
|
+
if self._heartbeat_task is not None and not self._heartbeat_task.done():
|
|
224
|
+
return
|
|
225
|
+
self._heartbeat_task = asyncio.create_task(
|
|
226
|
+
self._heartbeat_loop(), name="heartbeat"
|
|
227
|
+
)
|
|
228
|
+
logger.info("Heartbeat background task started.")
|
|
229
|
+
|
|
230
|
+
async def _heartbeat_loop(self) -> None:
|
|
231
|
+
"""Periodically refresh the heartbeat timestamp."""
|
|
232
|
+
try:
|
|
233
|
+
while True:
|
|
234
|
+
await asyncio.sleep(60)
|
|
235
|
+
# Re-read current status to update only the heartbeat field.
|
|
236
|
+
if self._status_path.exists():
|
|
237
|
+
try:
|
|
238
|
+
data = json.loads(self._status_path.read_text(encoding="utf-8"))
|
|
239
|
+
except (json.JSONDecodeError, OSError):
|
|
240
|
+
data = {}
|
|
241
|
+
else:
|
|
242
|
+
data = {}
|
|
243
|
+
|
|
244
|
+
data["heartbeat"] = datetime.now(timezone.utc).isoformat()
|
|
245
|
+
self._status_path.write_text(
|
|
246
|
+
json.dumps(data, indent=2) + "\n", encoding="utf-8"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Also ping the orchestrator.
|
|
250
|
+
try:
|
|
251
|
+
await send_heartbeat(
|
|
252
|
+
orchestrator_url=self.orchestrator_url,
|
|
253
|
+
shared_secret=self.shared_secret,
|
|
254
|
+
sprint_id=self.sprint_id,
|
|
255
|
+
status=data.get("status", "running"),
|
|
256
|
+
step=data.get("step", 0),
|
|
257
|
+
)
|
|
258
|
+
except Exception:
|
|
259
|
+
logger.debug("Heartbeat POST failed (non-fatal)", exc_info=True)
|
|
260
|
+
except asyncio.CancelledError:
|
|
261
|
+
return
|
|
262
|
+
|
|
263
|
+
async def _stop_heartbeat(self) -> None:
|
|
264
|
+
"""Cancel the heartbeat background task."""
|
|
265
|
+
if self._heartbeat_task is not None:
|
|
266
|
+
self._heartbeat_task.cancel()
|
|
267
|
+
try:
|
|
268
|
+
await self._heartbeat_task
|
|
269
|
+
except asyncio.CancelledError:
|
|
270
|
+
pass
|
|
271
|
+
self._heartbeat_task = None
|
|
272
|
+
logger.info("Heartbeat background task stopped.")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
You are fixing issues identified by a red-team review.
|
|
2
|
+
|
|
3
|
+
## Red Team Feedback
|
|
4
|
+
Review the file: red_team_round_{{ round_number }}.md
|
|
5
|
+
|
|
6
|
+
## Instructions
|
|
7
|
+
- Address ALL critical issues identified in the red-team review
|
|
8
|
+
- Fix code bugs, improve methodology, add missing controls
|
|
9
|
+
- Document what you changed and why in fixes_round_{{ round_number }}.md
|
|
10
|
+
- Do not break existing working functionality
|
|
11
|
+
- Update progress.md with what you're fixing and the results
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
You are generating the next research idea for an auto-loop sprint.
|
|
2
|
+
|
|
3
|
+
## Study Context
|
|
4
|
+
{{ study_context }}
|
|
5
|
+
|
|
6
|
+
## Previous Sprint Summaries
|
|
7
|
+
{% for sprint in previous_sprints %}
|
|
8
|
+
### {{ sprint.id }}
|
|
9
|
+
{{ sprint.summary }}
|
|
10
|
+
{% endfor %}
|
|
11
|
+
|
|
12
|
+
## Instructions
|
|
13
|
+
- Based on the study context and previous results, propose the next research idea
|
|
14
|
+
- Write a single clear, actionable research idea (1-2 sentences)
|
|
15
|
+
- Focus on: unexplored angles, following up promising leads, addressing gaps
|
|
16
|
+
- Output ONLY the idea text, nothing else
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
You are a critical reviewer red-teaming a research sprint.
|
|
2
|
+
|
|
3
|
+
## Research Idea
|
|
4
|
+
{{ idea }}
|
|
5
|
+
|
|
6
|
+
## Round {{ round_number }} of {{ max_rounds }}
|
|
7
|
+
|
|
8
|
+
## Instructions
|
|
9
|
+
- Review ALL code and analysis in the current directory
|
|
10
|
+
- Look for: bugs, methodological flaws, incorrect assumptions, missing controls, statistical errors, data leakage, unfair comparisons
|
|
11
|
+
- Be thorough but fair — focus on issues that would invalidate conclusions
|
|
12
|
+
- Write your critique to red_team_round_{{ round_number }}.md
|
|
13
|
+
- End with a section "## Critical Issues" listing only issues that MUST be fixed
|
|
14
|
+
- If no critical issues remain, write "NO CRITICAL ISSUES" in that section
|
|
15
|
+
- Update progress.md with what you're reviewing and what you found
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
You are writing a final research report.
|
|
2
|
+
|
|
3
|
+
## Research Idea
|
|
4
|
+
{{ idea }}
|
|
5
|
+
|
|
6
|
+
## Instructions
|
|
7
|
+
|
|
8
|
+
### 1. Write report.md
|
|
9
|
+
Read all files in the sprint directory and write report.md. Be thorough but direct and readable — no filler, no hedging, no restating the obvious. Structure:
|
|
10
|
+
|
|
11
|
+
- **Objective**: What we set out to test, in 1-2 sentences
|
|
12
|
+
- **Methodology**: What we did, concisely. Include enough detail to reproduce
|
|
13
|
+
- **Results**: The findings, with references to figures/tables. Lead with the key numbers
|
|
14
|
+
- **Red-team feedback**: What was challenged and how it was addressed
|
|
15
|
+
- **Conclusions**: What we learned. Be concrete
|
|
16
|
+
- **Limitations**: What this doesn't tell us
|
|
17
|
+
|
|
18
|
+
Write for a technical reader who values their time. Every sentence should earn its place.
|
|
19
|
+
|
|
20
|
+
### 2. Generate a PDF report
|
|
21
|
+
After writing report.md, create `report.pdf` in the current directory using Python. Write a script `generate_report_pdf.py` that:
|
|
22
|
+
- Uses matplotlib (via `matplotlib.backends.backend_pdf.PdfPages`)
|
|
23
|
+
- Renders the report as a formatted research paper with:
|
|
24
|
+
- Title, date, and sprint ID
|
|
25
|
+
- Section headings
|
|
26
|
+
- Body text with word wrapping
|
|
27
|
+
- All relevant plots from results/ embedded inline at appropriate points
|
|
28
|
+
- Tables where the data warrants it
|
|
29
|
+
- The PDF should look like a proper research report
|
|
30
|
+
|
|
31
|
+
If matplotlib is not available or PDF generation fails, skip this step — report.md is the primary output.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
You are a research assistant conducting a focused research sprint.
|
|
2
|
+
|
|
3
|
+
## Study Context
|
|
4
|
+
{{ study_context }}
|
|
5
|
+
|
|
6
|
+
## Research Idea
|
|
7
|
+
{{ idea }}
|
|
8
|
+
|
|
9
|
+
## Instructions
|
|
10
|
+
- Work in the current directory: {{ sprint_dir }}
|
|
11
|
+
- Create well-organized code and analysis files
|
|
12
|
+
- Document your methodology and findings
|
|
13
|
+
- Save key results to results/ subdirectory
|
|
14
|
+
- Write a brief findings.md summarizing what you discovered
|
|
15
|
+
|
|
16
|
+
## Script Output Logging
|
|
17
|
+
When running Python scripts or any long-running commands, always pipe output through `tee` so it's both visible to you AND saved to `output.log`:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
python train.py 2>&1 | tee -a output.log
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
This lets the team monitor script output remotely. Always use `tee -a` (append mode) so all runs accumulate in the same log file. Do this for every script execution, training run, or evaluation.
|
|
24
|
+
|
|
25
|
+
## Progress Log
|
|
26
|
+
Maintain a file called `progress.md` in the sprint directory. Update it regularly as you work — it's how the team monitors your progress remotely. Keep it concise and current:
|
|
27
|
+
|
|
28
|
+
- Start with your plan of attack (numbered steps)
|
|
29
|
+
- Update it each time you finish a meaningful chunk of work or change direction
|
|
30
|
+
- Include: what you just did, what you found, what you're doing next
|
|
31
|
+
- Note any problems you hit and how you resolved them
|
|
32
|
+
- When running long computations, update before and after with expected vs actual results
|
|
33
|
+
|
|
34
|
+
Example format:
|
|
35
|
+
```
|
|
36
|
+
## Plan
|
|
37
|
+
1. Set up baseline model
|
|
38
|
+
2. Run experiments with X
|
|
39
|
+
3. Analyze results
|
|
40
|
+
|
|
41
|
+
## Log
|
|
42
|
+
[08:30] Starting: setting up baseline model
|
|
43
|
+
[08:45] Baseline ready. Training config: lr=3e-4, batch=1024, 20M samples
|
|
44
|
+
[08:46] Kicked off training run. Expecting ~30 min.
|
|
45
|
+
[09:20] Baseline done. F1=0.93, MCC=0.79. Moving to experiment X.
|
|
46
|
+
[09:21] Trying X with parameters A=1, B=2
|
|
47
|
+
[10:05] X finished. F1=0.95 (+0.02). Improvement is real. Trying variant...
|
|
48
|
+
[10:50] Hit OOM with larger config. Reducing batch size to 512.
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Focus on depth over breadth. Be rigorous and document assumptions.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
You are writing a brief summary of a completed research sprint.
|
|
2
|
+
|
|
3
|
+
## Instructions
|
|
4
|
+
- Read report.md and the sprint directory contents
|
|
5
|
+
- Write a 2-3 sentence summary to summary.txt
|
|
6
|
+
- Format: what was investigated, key finding, confidence level
|
|
7
|
+
- This will be sent as a notification, so be concise and informative
|