PyPI - researchloop - Versions diffs - 0.3.0__tar.gz → 0.3.2__tar.gz - Mend

researchloop 0.3.0tar.gz → 0.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

{researchloop-0.3.0 → researchloop-0.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: researchloop
-Version: 0.3.0
+Version: 0.3.2
 Summary: Automated research sprint platform for HPC clusters
 License: MIT
 License-File: LICENSE

{researchloop-0.3.0 → researchloop-0.3.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "researchloop"
-version = "0.3.0"
+version = "0.3.2"
 description = "Automated research sprint platform for HPC clusters"
 readme = "README.md"
 license = {text = "MIT"}

researchloop-0.3.2/researchloop/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.3.2"

{researchloop-0.3.0 → researchloop-0.3.2}/researchloop/clusters/monitor.py RENAMED Viewed

@@ -6,12 +6,15 @@ import asyncio
 import json
 import logging
 from datetime import datetime, timezone
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from researchloop.clusters.ssh import SSHManager
 from researchloop.db import queries
 from researchloop.schedulers.base import BaseScheduler
+if TYPE_CHECKING:
+    from researchloop.sprints.manager import SprintManager
 logger = logging.getLogger(__name__)
 # If a job's heartbeat is older than this many seconds AND the job is not
@@ -28,11 +31,17 @@ class JobMonitor:
         db: Any,
         schedulers: dict[str, BaseScheduler],
         config: Any = None,
+        sprint_manager: SprintManager | None = None,
     ) -> None:
         self.ssh_manager = ssh_manager
         self.db = db
         self.schedulers = schedulers
         self.config = config
+        # Optional: when set, terminal-state transitions go through
+        # sprint_manager.mark_sprint_terminal so the parent auto-loop
+        # advances. None falls back to a direct DB update (used by
+        # minimal test fixtures that don't construct a SprintManager).
+        self.sprint_manager = sprint_manager
         self._polling_task: asyncio.Task[None] | None = None
         self._stop_event = asyncio.Event()
@@ -143,12 +152,17 @@ class JobMonitor:
             # Persist the updated status if it changed.
             if status in ("completed", "failed"):
                 try:
-                    await queries.update_sprint(
-                        self.db,
-                        sprint_id,
-                        status=status,
-                        completed_at=datetime.now(timezone.utc).isoformat(),
-                    )
+                    if self.sprint_manager is not None:
+                        await self.sprint_manager.mark_sprint_terminal(
+                            sprint_id, status
+                        )
+                    else:
+                        await queries.update_sprint(
+                            self.db,
+                            sprint_id,
+                            status=status,
+                            completed_at=datetime.now(timezone.utc).isoformat(),
+                        )
                 except Exception:
                     logger.exception(
                         "Failed to update DB status for sprint %s", sprint_id

{researchloop-0.3.0 → researchloop-0.3.2}/researchloop/core/orchestrator.py RENAMED Viewed

@@ -112,6 +112,9 @@ class Orchestrator:
             sprint_manager=self.sprint_manager,
             config=self.config,
         )
+        # Late-bind the back-reference so SprintManager.mark_sprint_terminal
+        # can advance the parent loop on every terminal transition.
+        self.sprint_manager.auto_loop = self.auto_loop
         # 8. Job monitor
         self.job_monitor = JobMonitor(
@@ -119,6 +122,7 @@ class Orchestrator:
             db=self.db,
             schedulers=self.schedulers,
             config=self.config,
+            sprint_manager=self.sprint_manager,
         )
         await self.job_monitor.start_polling()
@@ -405,6 +409,10 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
                 {"ok": True, "sprint_id": sprint_id, "tweak_id": tweak_id}
             )
+        # handle_completion fires auto_loop.on_sprint_complete internally
+        # via mark_sprint_terminal — single chokepoint for terminal-state
+        # transitions, so the loop also advances when the JobMonitor or a
+        # dashboard refresh is the one that detects the terminal status.
         await orchestrator.sprint_manager.handle_completion(
             sprint_id=sprint_id,
             status=status,
@@ -413,10 +421,6 @@ def create_app(orchestrator: Orchestrator) -> FastAPI:
             idea=idea,
         )
-        # Trigger auto-loop advancement if applicable.
-        if orchestrator.auto_loop is not None:
-            await orchestrator.auto_loop.on_sprint_complete(sprint_id)
         logger.info(
             "Webhook: sprint %s completion processed (status=%s)",
             sprint_id,

{researchloop-0.3.0 → researchloop-0.3.2}/researchloop/dashboard/routes.py RENAMED Viewed

@@ -899,17 +899,12 @@ def add_dashboard_routes(
                         }
                         cur = sprint["status"]
                         if real_status in terminal and cur not in terminal:
-                            from datetime import (
-                                datetime,
-                                timezone,
-                            )
-                            now = datetime.now(timezone.utc).isoformat()
-                            await queries.update_sprint(
-                                orchestrator.db,
-                                sprint_id,
-                                status=real_status,
-                                completed_at=now,
+                            # Route through SprintManager so the parent
+                            # auto-loop is advanced too — otherwise a
+                            # webhook-less failure leaves the loop stuck
+                            # in "running".
+                            await orchestrator.sprint_manager.mark_sprint_terminal(
+                                sprint_id, real_status
                             )
                         # Resolve sprints_base the same way

{researchloop-0.3.0 → researchloop-0.3.2}/researchloop/dashboard/templates/sprint_detail.html RENAMED Viewed

@@ -106,7 +106,7 @@
 </div>
 {% endif %}
-{% if sprint.status == 'completed' %}
+{% if sprint.status in ('completed', 'failed', 'cancelled') %}
 <h3>Quick Tweak</h3>
 <div class="card">
   {% if tweak_active %}
@@ -116,7 +116,11 @@
         onsubmit="var b=this.querySelector('button[type=submit]');if(b.disabled)return false;b.disabled=true;b.textContent='Submitting...';">
     <input type="hidden" name="csrf_token" value="{{ csrf_token }}">
     <div class="form-group">
+      {% if sprint.status == 'completed' %}
       <textarea name="instruction" rows="3" placeholder="e.g. Fix the axis labels on the scatter plots, add a histogram of residuals"></textarea>
+      {% else %}
+      <textarea name="instruction" rows="3" placeholder="e.g. Retry with smaller batch size, or investigate the error and continue"></textarea>
+      {% endif %}
     </div>
     <details style="margin-bottom:0.75rem">
       <summary class="dim" style="cursor:pointer;font-size:0.85rem">Resource settings</summary>

{researchloop-0.3.0 → researchloop-0.3.2}/researchloop/runner/job_templates/sge.sh.j2 RENAMED Viewed

@@ -54,6 +54,7 @@ log() {
 # --- Helper: run claude and extract session ID ---
 SESSION_ID=""
+ACTIVE_STEP_FILE="$SPRINT_DIR/.researchloop/active_step"
 run_step() {
     local prompt_file="$1"
     local step_name="$2"
@@ -65,12 +66,33 @@ run_step() {
         cmd+=(--resume "$SESSION_ID")
     fi
+    local output_file="$SPRINT_DIR/.researchloop/${step_name}_output.jsonl"
+    local pgid_file="$SPRINT_DIR/.researchloop/${step_name}_pgid"
+    local result_sentinel="$SPRINT_DIR/.researchloop/${step_name}_result_seen"
+    # Reset per-step watchdog state and announce the active step to the
+    # heartbeat loop.
+    rm -f "$pgid_file" "$result_sentinel"
+    echo "$step_name" > "$ACTIVE_STEP_FILE"
     # Run claude with streaming JSON output.
     # Each line is a JSON event — we pipe through a filter that:
     # 1. Logs human-readable summaries of what Claude is doing
     # 2. Saves the full stream for post-processing
-    local output_file="$SPRINT_DIR/.researchloop/${step_name}_output.jsonl"
-    "${cmd[@]}" 2> >(tee -a "$LOG_FILE" >&2) | tee "$output_file" | python3 -u -c "
+    # 3. Writes ${step}_result_seen when claude emits its terminal `result`
+    #    event, so the heartbeat watchdog can detect "done but pipeline open"
+    #
+    # claude is launched under `setsid` so it (and any subprocesses spawned
+    # by Bash tool calls) share a dedicated process group. If a Bash-tool
+    # subprocess later leaks (e.g. hung network call) and keeps the
+    # tee|python3 pipeline open after claude has emitted result, the
+    # heartbeat watchdog can SIGTERM the whole group cleanly via the pgid.
+    (
+        setsid "${cmd[@]}" 2> >(tee -a "$LOG_FILE" >&2) &
+        claude_pid=$!
+        # setsid makes the child a session/group leader; pgid == pid.
+        echo "$claude_pid" > "$pgid_file"
+        wait "$claude_pid"
+    ) | tee "$output_file" | python3 -u -c "
 import sys, json, os, time
 log = open('$LOG_FILE', 'a', buffering=1)
 last_tool_time = 0
@@ -112,17 +134,32 @@ for line in sys.stdin:
         sid = evt.get('session_id', '')
         if sid:
             open('$SPRINT_DIR/.researchloop/session_id', 'w').write(sid)
+        # Mark claude's terminal result so the heartbeat watchdog can kill
+        # any leaked subprocesses keeping the pipeline open past this point.
+        try:
+            open('$result_sentinel', 'w').close()
+        except OSError:
+            pass
         result = evt.get('result', '') or ''
         log.write(f'  Done ({tool_count} tool calls, {len(result)} chars output)\n')
         log.flush()
 log.close()
 " || {
         local rc=$?
-        log "ERROR: Claude failed on step $step_name (exit $rc)"
-        tail -20 "$output_file" >> "$LOG_FILE" 2>/dev/null
-        return 1
+        # Treat as failure only if claude never emitted its result event. If
+        # the sentinel exists, the work is done — the nonzero exit is just
+        # the watchdog cleaning up a leaked Bash-tool subprocess.
+        if [ ! -f "$result_sentinel" ]; then
+            log "ERROR: Claude failed on step $step_name (exit $rc)"
+            tail -20 "$output_file" >> "$LOG_FILE" 2>/dev/null
+            rm -f "$ACTIVE_STEP_FILE"
+            return 1
+        fi
+        log "NOTE: $step_name pipeline closed via watchdog after result event (exit $rc); treating step as complete"
     }
+    rm -f "$ACTIVE_STEP_FILE"
     if [ -f "$SPRINT_DIR/.researchloop/session_id" ]; then
         SESSION_ID=$(cat "$SPRINT_DIR/.researchloop/session_id")
     fi
@@ -204,22 +241,36 @@ print(json.dumps({
 }
 _heartbeat_loop() {
-    # Watchdog: warn once if the active step's stream-json file goes silent
-    # for too long while heartbeats keep firing. Signature of a hung pipeline
-    # (claude exited but a leaked fd keeps tee|python3 blocked).
+    # Watchdog: detect and recover from hung pipelines.
+    #
+    # STUCK_PIPE warn: the active step's stream-json file goes silent for
+    # 5+ minutes while heartbeats keep firing. Signature of claude having
+    # exited but a leaked Bash-tool subprocess holding the pipeline's
+    # stdout fd open.
+    #
+    # Hung-after-result kill: claude emits a terminal `result` event when
+    # its work is done, which the stream filter records as a sentinel file.
+    # If the bash pipeline does not close within result_grace_secs after
+    # that, SIGTERM the entire claude process group via its pgid so the
+    # leaked subprocesses die and run_step can return. Escalate to SIGKILL
+    # if the group ignores SIGTERM.
     local stuck_warned=0
     local stuck_threshold_secs=300
+    local result_grace_secs=60
+    local kill_escalate_secs=15
+    local last_killed_pgid=""
     while true; do
         sleep 60
+        # Detect current step from the active_step file written by run_step.
+        local active_step=""
+        if [ -f "$SPRINT_DIR/.researchloop/active_step" ]; then
+            active_step=$(cat "$SPRINT_DIR/.researchloop/active_step" 2>/dev/null || true)
+        fi
         local step="running"
-        if [ -f "$LOG_FILE" ]; then
-            local last_step
-            last_step=$(grep -o '>>> Starting step: [a-z_]*' "$LOG_FILE" | tail -1 | sed 's/>>> Starting step: //' || true)
-            if [ -n "$last_step" ]; then
-                step="running ($last_step)"
-            fi
+        if [ -n "$active_step" ]; then
+            step="running ($active_step)"
         fi
-        # STUCK_PIPE watchdog (detect-and-warn only).
+        # STUCK_PIPE watchdog (detect-and-warn).
         local newest_jsonl
         newest_jsonl=$(ls -t "$SPRINT_DIR/.researchloop"/*_output.jsonl 2>/dev/null | head -1)
         if [ -n "$newest_jsonl" ] && [ -f "$newest_jsonl" ]; then
@@ -236,6 +287,26 @@ _heartbeat_loop() {
                 stuck_warned=0
             fi
         fi
+        # Hung-after-result recovery: kill the claude pgid if the result
+        # sentinel is older than the grace period.
+        if [ -n "$active_step" ]; then
+            local sentinel="$SPRINT_DIR/.researchloop/${active_step}_result_seen"
+            local pgid_file="$SPRINT_DIR/.researchloop/${active_step}_pgid"
+            if [ -f "$sentinel" ] && [ -f "$pgid_file" ]; then
+                local s_mtime now_t s_age pgid
+                s_mtime=$(stat -c %Y "$sentinel" 2>/dev/null || stat -f %m "$sentinel" 2>/dev/null || echo 0)
+                now_t=$(date +%s)
+                s_age=$((now_t - s_mtime))
+                pgid=$(cat "$pgid_file" 2>/dev/null || true)
+                if [ -n "$pgid" ] && [ "$s_age" -ge "$result_grace_secs" ] && [ "$last_killed_pgid" != "$pgid" ]; then
+                    echo "[$(date -u +%H:%M:%S)] STUCK_PIPE recovery: $active_step result event seen ${s_age}s ago but pipeline still open; SIGTERM pgid $pgid" >> "$LOG_FILE"
+                    kill -TERM -"$pgid" 2>/dev/null || true
+                    # Escalate to SIGKILL in case the group ignores SIGTERM.
+                    ( sleep "$kill_escalate_secs" && kill -KILL -"$pgid" 2>/dev/null || true ) &
+                    last_killed_pgid="$pgid"
+                fi
+            fi
+        fi
         send_heartbeat "$step"
         echo "[$(date -u +%H:%M:%S)] ... still running (heartbeat)" >> "$LOG_FILE"
     done

{researchloop-0.3.0 → researchloop-0.3.2}/researchloop/runner/job_templates/slurm.sh.j2 RENAMED Viewed

@@ -53,6 +53,7 @@ log() {
 # --- Helper: run claude and extract session ID ---
 SESSION_ID=""
+ACTIVE_STEP_FILE="$SPRINT_DIR/.researchloop/active_step"
 run_step() {
     local prompt_file="$1"
     local step_name="$2"
@@ -64,12 +65,33 @@ run_step() {
         cmd+=(--resume "$SESSION_ID")
     fi
+    local output_file="$SPRINT_DIR/.researchloop/${step_name}_output.jsonl"
+    local pgid_file="$SPRINT_DIR/.researchloop/${step_name}_pgid"
+    local result_sentinel="$SPRINT_DIR/.researchloop/${step_name}_result_seen"
+    # Reset per-step watchdog state and announce the active step to the
+    # heartbeat loop.
+    rm -f "$pgid_file" "$result_sentinel"
+    echo "$step_name" > "$ACTIVE_STEP_FILE"
     # Run claude with streaming JSON output.
     # Each line is a JSON event — we pipe through a filter that:
     # 1. Logs human-readable summaries of what Claude is doing
     # 2. Saves the full stream for post-processing
-    local output_file="$SPRINT_DIR/.researchloop/${step_name}_output.jsonl"
-    "${cmd[@]}" 2> >(tee -a "$LOG_FILE" >&2) | tee "$output_file" | python3 -u -c "
+    # 3. Writes ${step}_result_seen when claude emits its terminal `result`
+    #    event, so the heartbeat watchdog can detect "done but pipeline open"
+    #
+    # claude is launched under `setsid` so it (and any subprocesses spawned
+    # by Bash tool calls) share a dedicated process group. If a Bash-tool
+    # subprocess later leaks (e.g. hung network call) and keeps the
+    # tee|python3 pipeline open after claude has emitted result, the
+    # heartbeat watchdog can SIGTERM the whole group cleanly via the pgid.
+    (
+        setsid "${cmd[@]}" 2> >(tee -a "$LOG_FILE" >&2) &
+        claude_pid=$!
+        # setsid makes the child a session/group leader; pgid == pid.
+        echo "$claude_pid" > "$pgid_file"
+        wait "$claude_pid"
+    ) | tee "$output_file" | python3 -u -c "
 import sys, json, os, time
 log = open('$LOG_FILE', 'a', buffering=1)
 last_tool_time = 0
@@ -112,17 +134,32 @@ for line in sys.stdin:
         sid = evt.get('session_id', '')
         if sid:
             open('$SPRINT_DIR/.researchloop/session_id', 'w').write(sid)
+        # Mark claude's terminal result so the heartbeat watchdog can kill
+        # any leaked subprocesses keeping the pipeline open past this point.
+        try:
+            open('$result_sentinel', 'w').close()
+        except OSError:
+            pass
         result = evt.get('result', '') or ''
         log.write(f'  Done ({tool_count} tool calls, {len(result)} chars output)\n')
         log.flush()
 log.close()
 " || {
         local rc=$?
-        log "ERROR: Claude failed on step $step_name (exit $rc)"
-        tail -20 "$output_file" >> "$LOG_FILE" 2>/dev/null
-        return 1
+        # Treat as failure only if claude never emitted its result event. If
+        # the sentinel exists, the work is done — the nonzero exit is just
+        # the watchdog cleaning up a leaked Bash-tool subprocess.
+        if [ ! -f "$result_sentinel" ]; then
+            log "ERROR: Claude failed on step $step_name (exit $rc)"
+            tail -20 "$output_file" >> "$LOG_FILE" 2>/dev/null
+            rm -f "$ACTIVE_STEP_FILE"
+            return 1
+        fi
+        log "NOTE: $step_name pipeline closed via watchdog after result event (exit $rc); treating step as complete"
     }
+    rm -f "$ACTIVE_STEP_FILE"
     # Read session ID (written by the stream filter above).
     if [ -f "$SPRINT_DIR/.researchloop/session_id" ]; then
         SESSION_ID=$(cat "$SPRINT_DIR/.researchloop/session_id")
@@ -206,23 +243,36 @@ print(json.dumps({
 }
 _heartbeat_loop() {
-    # Watchdog: warn once if the active step's stream-json file goes silent
-    # for too long while heartbeats keep firing. Signature of a hung pipeline
-    # (claude exited but a leaked fd keeps tee|python3 blocked).
+    # Watchdog: detect and recover from hung pipelines.
+    #
+    # STUCK_PIPE warn: the active step's stream-json file goes silent for
+    # 5+ minutes while heartbeats keep firing. Signature of claude having
+    # exited but a leaked Bash-tool subprocess holding the pipeline's
+    # stdout fd open.
+    #
+    # Hung-after-result kill: claude emits a terminal `result` event when
+    # its work is done, which the stream filter records as a sentinel file.
+    # If the bash pipeline does not close within result_grace_secs after
+    # that, SIGTERM the entire claude process group via its pgid so the
+    # leaked subprocesses die and run_step can return. Escalate to SIGKILL
+    # if the group ignores SIGTERM.
     local stuck_warned=0
     local stuck_threshold_secs=300
+    local result_grace_secs=60
+    local kill_escalate_secs=15
+    local last_killed_pgid=""
     while true; do
         sleep 60
-        # Detect current step from log.
+        # Detect current step from the active_step file written by run_step.
+        local active_step=""
+        if [ -f "$SPRINT_DIR/.researchloop/active_step" ]; then
+            active_step=$(cat "$SPRINT_DIR/.researchloop/active_step" 2>/dev/null || true)
+        fi
         local step="running"
-        if [ -f "$LOG_FILE" ]; then
-            local last_step
-            last_step=$(grep -o '>>> Starting step: [a-z_]*' "$LOG_FILE" | tail -1 | sed 's/>>> Starting step: //' || true)
-            if [ -n "$last_step" ]; then
-                step="running ($last_step)"
-            fi
+        if [ -n "$active_step" ]; then
+            step="running ($active_step)"
         fi
-        # STUCK_PIPE watchdog (detect-and-warn only).
+        # STUCK_PIPE watchdog (detect-and-warn).
         local newest_jsonl
         newest_jsonl=$(ls -t "$SPRINT_DIR/.researchloop"/*_output.jsonl 2>/dev/null | head -1)
         if [ -n "$newest_jsonl" ] && [ -f "$newest_jsonl" ]; then
@@ -239,6 +289,26 @@ _heartbeat_loop() {
                 stuck_warned=0
             fi
         fi
+        # Hung-after-result recovery: kill the claude pgid if the result
+        # sentinel is older than the grace period.
+        if [ -n "$active_step" ]; then
+            local sentinel="$SPRINT_DIR/.researchloop/${active_step}_result_seen"
+            local pgid_file="$SPRINT_DIR/.researchloop/${active_step}_pgid"
+            if [ -f "$sentinel" ] && [ -f "$pgid_file" ]; then
+                local s_mtime now_t s_age pgid
+                s_mtime=$(stat -c %Y "$sentinel" 2>/dev/null || stat -f %m "$sentinel" 2>/dev/null || echo 0)
+                now_t=$(date +%s)
+                s_age=$((now_t - s_mtime))
+                pgid=$(cat "$pgid_file" 2>/dev/null || true)
+                if [ -n "$pgid" ] && [ "$s_age" -ge "$result_grace_secs" ] && [ "$last_killed_pgid" != "$pgid" ]; then
+                    echo "[$(date -u +%H:%M:%S)] STUCK_PIPE recovery: $active_step result event seen ${s_age}s ago but pipeline still open; SIGTERM pgid $pgid" >> "$LOG_FILE"
+                    kill -TERM -"$pgid" 2>/dev/null || true
+                    # Escalate to SIGKILL in case the group ignores SIGTERM.
+                    ( sleep "$kill_escalate_secs" && kill -KILL -"$pgid" 2>/dev/null || true ) &
+                    last_killed_pgid="$pgid"
+                fi
+            fi
+        fi
         send_heartbeat "$step"
         # Also write a heartbeat timestamp to the log so it's visible.
         echo "[$(date -u +%H:%M:%S)] ... still running (heartbeat)" >> "$LOG_FILE"

{researchloop-0.3.0 → researchloop-0.3.2}/researchloop/runner/templates/report.md.j2 RENAMED Viewed

@@ -48,12 +48,19 @@ After writing report.md, create `report.pdf` in the current directory using Pyth
 - Style: remove outer edges, add subtle row shading or gridlines for readability
 - If a table has many columns (>6), use font size 7 and/or landscape orientation for that page
-**Images:**
-- When generating plots earlier in the pipeline, save at **dpi=150 or higher** so text in plots is sharp in the PDF
-- When embedding in the PDF figure, use `ax.imshow(img, extent=[x_left, x_right, y_low, y_high], aspect='auto')` or insert via `fig.figimage()`. Scale to fit within margins while preserving aspect ratio
-- Do NOT downscale high-res images — matplotlib's PdfPages will embed them at their native resolution
+**Images — embed vector plots as vectors, not rasters:**
+- Plots from `results/` are typically saved as both `.png` (raster) and `.pdf` (vector). Always embed the `.pdf`. **Anything passed through `ax.imshow()` or `fig.figimage()` gets rasterized when PdfPages saves the page** — the source file's DPI is discarded, only the figure's savefig DPI matters, and the result is still raster. This is the dominant cause of blurry plots in the final report
+- Two-pass approach to keep plots vector:
+  1. **Matplotlib pass:** for each plot, draw a placeholder rectangle (or leave whitespace) at the intended position and record its bbox in page-fraction coordinates. Render the caption normally
+  2. **PyMuPDF overlay pass:** after `PdfPages` writes `report.pdf`, open it with `fitz` and for each placeholder call `page.show_pdf_page(rect, src_doc, 0)` where `src_doc = fitz.open(plot_pdf_path)` and `rect = fitz.Rect(x0, y0, x1, y1)` is the placeholder's bbox converted to points (page-fraction × 72 × page-size-in-inches; remember PDF y-axis runs top-down). Overwrite `report.pdf` with the merged result
+- Only use `ax.imshow(img, extent=..., aspect='auto')` for genuinely raster-only sources (photos, external screenshots) where no `.pdf` companion exists. Source those at dpi=150+
+- If `PyMuPDF` is unavailable, fall back to `pypdf`'s `merge_transformed_page` (same idea, different API) rather than rasterizing
 - Leave space above/below for captions
+**Verify the result is actually vector:**
+- For each embedded plot's xref, `fitz.open("report.pdf").xref_get_key(xref, "Subtype")` should return `/Form`, never `/Image`
+- A vector-only report is typically <150 KB; if `report.pdf` is 400 KB+ you almost certainly rasterized something — re-check the overlay pass
 **Pagination:**
 - Track y_cursor starting at y_top, decrementing after each element
 - When y_cursor < y_bottom + 0.05, save the page and start a new one

{researchloop-0.3.0 → researchloop-0.3.2}/researchloop/runner/templates/tweak.md.j2 RENAMED Viewed

@@ -1,4 +1,9 @@
-You are applying a quick tweak to a completed research sprint.
+You are applying a quick tweak to a research sprint.
+The sprint may have completed successfully, or it may have failed or
+been cancelled before finishing. Check the existing files (findings.md,
+report.md, results/, sprint logs) to see how far it got, then apply the
+instruction below.
 ## Tweak Instruction
 {{ instruction }}

researchloop 0.3.0__tar.gz → 0.3.2__tar.gz

researchloop 0.3.0tar.gz → 0.3.2tar.gz