PyPI - openrunner-sdk - Versions diffs - 2.24.2__tar.gz → 2.25.0__tar.gz - Mend

openrunner-sdk 2.24.2tar.gz → 2.25.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (170) hide show

{openrunner_sdk-2.24.2 → openrunner_sdk-2.25.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openrunner-sdk
-Version: 2.24.2
+Version: 2.25.0
 Summary: OpenRunner SDK - W&B-compatible ML experiment tracking client
 Project-URL: Homepage, https://github.com/jqueguiner/openrunner
 Project-URL: Repository, https://github.com/jqueguiner/openrunner

openrunner_sdk-2.25.0/openrunner/CLAUDE.md ADDED Viewed

@@ -0,0 +1,33 @@
+# OpenRunner MCP Integration
+When the user asks about training metrics, loss charts, run data, or experiment results:
+- Use `mcp__openrunner__openrunner_watch_run` to show ASCII sparkline charts
+- Use `mcp__openrunner__openrunner_get_run_metrics` for raw metrics
+- Use `mcp__openrunner__openrunner_list_runs` to find runs
+- Use `mcp__openrunner__openrunner_search_sessions` to search AI session history
+Do NOT parse log files or grep for metrics when OpenRunner MCP tools are available.
+When the user asks to monitor a training, diagnose loss curves, or get training
+advice (diverging, overfitting, plateau, NaN loss, LR tuning):
+- Use `mcp__openrunner__openrunner_monitor_training` for a health verdict
+  (HEALTHY / IMPROVING / PLATEAUED / OVERFITTING / UNSTABLE / DIVERGING) plus
+  per-metric trend, plateau, instability and overfitting signals.
+- Run `/openrunner:training-recommendations [run_id]` for actionable fixes.
+When the user makes or describes a deliberate decision (changing a param,
+switching strategy, picking an architecture, a data/process/tooling choice):
+- Use `mcp__openrunner__openrunner_log_decision` to document it (context,
+  rationale, alternatives; `source_runs` for the run(s) it was decided from,
+  `param`/`old_value`/`new_value` for a param change, optional `markdown` body
+  + `attachment_url` PDF).
+- Use `mcp__openrunner__openrunner_set_decision_runs` to attach resulting runs.
+- Use `mcp__openrunner__openrunner_list_decisions` to review the decision log.
+- Run `/openrunner:log-decision` for a guided capture.
+- Decisions render to HTML/PDF: `GET /projects/{id}/decisions/{id}/render?format=pdf`.
+When the user asks about papers, research, or sessions:
+- Use `mcp__openrunner__openrunner_search_papers` for paper content
+- Use `mcp__openrunner__openrunner_search_research` for research plans
+- Use `mcp__openrunner__openrunner_search_sessions` for AI session history
+- Use `mcp__openrunner__openrunner_update_paper` to modify paper sections

{openrunner_sdk-2.24.2 → openrunner_sdk-2.25.0}/openrunner/__init__.py RENAMED Viewed

@@ -121,7 +121,7 @@ launch.from_run = _launch_from_run  # type: ignore[attr-defined]
 # openrunner.trace.patch_openai() syntax
 trace.patch_openai = _patch_openai  # type: ignore[attr-defined]
-__version__ = "2.24.2"
+__version__ = "2.25.0"
 logger = logging.getLogger("openrunner")

{openrunner_sdk-2.24.2 → openrunner_sdk-2.25.0}/openrunner/install_commands.py RENAMED Viewed

@@ -1062,6 +1062,170 @@ Ask if the user wants the report logged back to OpenRunner via
 - Keep the report under ~80 lines unless the user asks for the full mismatch list.
 """
+TRAINING_RECOMMENDATIONS_CMD = """---
+name: {prefix}training-recommendations
+description: Monitor a training run's metrics and recommend concrete fixes (LR, regularization, early stop, schedule)
+argument-hint: "[run_id] [--window 10] [--keys train/loss,val/loss]"
+allowed-tools:
+  - Bash
+  - Read
+  - mcp__openrunner__openrunner_monitor_training
+  - mcp__openrunner__openrunner_watch_run
+  - mcp__openrunner__openrunner_list_runs
+  - mcp__openrunner__openrunner_log_note
+---
+Monitor a training run and turn its metric health into **actionable recommendations**.
+## Inputs
+`$ARGUMENTS` may contain:
+- `run_id` -- the run to diagnose. If omitted, list recent runs and ask the user.
+- `--window N` -- recent-window size for trend/plateau analysis (default: 10)
+- `--keys a,b` -- restrict to specific metric keys (default: all logged)
+## Process
+### 1. Resolve the run
+If no `run_id` is given:
+- Call `mcp__openrunner__openrunner_list_runs` for the configured project.
+- Show the 10 most recent runs and ask the user to pick one.
+### 2. Diagnose
+Call `mcp__openrunner__openrunner_monitor_training` with the chosen `run_id` (pass
+`window` / `keys` if the user supplied them). Read the `Verdict`, the per-metric
+statuses, the `Overfitting signals`, and the machine-readable `## Signals` JSON block.
+If it reports no metrics: tell the user to log metrics from the SDK
+(`run.log({{"train/loss": ...}}, step=...)`) and stop.
+### 3. Map signals -> recommendations
+Use the verdict + signals to produce targeted advice. Only raise a recommendation when
+a signal supports it, and cite the metric/value it leans on.
+| Signal | Recommended actions |
+|--------|--------------------|
+| **DIVERGING** (loss rising, NaN/Inf) | Lower learning rate 3-10x; add or tighten grad clipping; check for overflow under mixed precision (try bf16 / loss scaling); verify data normalization; add LR warmup. |
+| **OVERFITTING** (train down while val up) | Early-stop at the val best step; add regularization (weight decay, dropout); augment or add data; reduce model size / epochs; raise early-stop patience. |
+| **PLATEAUED** (flat loss) | Decay LR (step/cosine) or run an LR finder; increase model/batch capacity; unfreeze layers; confirm data shuffling; or accept convergence and stop. |
+| **UNSTABLE** (noisy/jittery) | Increase batch size or grad accumulation; lower LR; add grad clipping; smooth the LR schedule; check for label noise. |
+| **worsening** single metric | Inspect that metric's objective/schedule; check for leakage or a metric bug. |
+| **IMPROVING** | No change needed; suggest when to checkpoint or stop, note any ETA. |
+### 4. Output
+Print a concise markdown report:
+```
+# Training Recommendations -- run <id>
+Verdict: <VERDICT>
+Key evidence: <one line per metric that drives a recommendation>
+## Recommendations (priority order)
+1. **<action>** -- why: <signal + metric:value>. how: <concrete change>.
+2. ...
+## Watchlist
+- <metric to keep watching> -- <threshold / what to look for>
+```
+Keep it under ~40 lines. Be specific (numbers, step counts), not generic.
+### 5. Offer to log
+Ask if the user wants the report logged back to OpenRunner via
+`mcp__openrunner__openrunner_log_note` so the team can review it.
+## Constraints
+- Do not invent metrics or numbers -- only use what the MCP tool returned.
+- Distinguish "the run is broken" (diverging/unstable) from "the run is done"
+  (plateaued/converged).
+- If the verdict is UNCLEAR, say so and suggest which metric to log for a clearer signal.
+"""
+LOG_DECISION_CMD = """---
+name: {prefix}log-decision
+description: Document a project decision (param change, strategy, architecture, ...) as an ADR-style record
+argument-hint: "[short decision statement] [--run run_id] [--type param|strategy|...]"
+allowed-tools:
+  - Bash
+  - Read
+  - mcp__openrunner__openrunner_log_decision
+  - mcp__openrunner__openrunner_list_decisions
+  - mcp__openrunner__openrunner_set_decision_runs
+  - mcp__openrunner__openrunner_list_runs
+---
+Capture a deliberate decision so the reasoning is preserved for the team and
+for future-you. Decisions are ADR-style records: title, context, decision,
+rationale, alternatives — decided FROM one or more runs, and optionally
+producing resulting runs. The body is markdown and renders to HTML/PDF.
+## Inputs
+`$ARGUMENTS` may contain:
+- a short decision statement (the title), e.g. "Switch optimizer to AdamW"
+- `--run run_id` (repeatable) -- run(s) the decision was decided FROM
+- `--type T` -- one of: param, strategy, architecture, data, process, tooling, other
+## Process
+### 1. Gather the decision
+Infer as much as possible from the current conversation / recent changes, then
+fill these fields (ask the user only for what you genuinely cannot infer):
+- **title** -- one-line statement of what was decided
+- **type** -- param | strategy | architecture | data | process | tooling | other
+- **context** -- what problem/situation prompted it
+- **decision** -- what was decided, in detail
+- **rationale** -- why this option
+- **alternatives** -- options considered and why they were rejected
+- **impact** -- expected consequences
+- **source_runs** -- the run(s) the decision is based on (its evidence)
+- For a **param change**: the `param` name, its `old_value`, and `new_value`.
+- Optionally a `markdown` body (full writeup) and/or an `attachment_url` (PDF).
+Do not block on missing optional fields — a title alone is enough to record.
+If the decision is based on training results, call
+`mcp__openrunner__openrunner_list_runs` to resolve the source run id(s).
+### 2. Record it
+Call `mcp__openrunner__openrunner_log_decision` with the gathered fields, passing
+`source_runs` for the run(s) it was decided from. For a param change, pass
+`param` / `old_value` / `new_value` (the tool sets `type=param` and stores the
+change in metadata automatically).
+If the decision reverses or replaces an earlier one, first call
+`mcp__openrunner__openrunner_list_decisions` to find its `id`, then pass it as
+`supersedes` so the older record is marked superseded.
+### 3. Attach resulting runs (when they exist)
+If the user later launches run(s) as a result of this decision, record them with
+`mcp__openrunner__openrunner_set_decision_runs` (`result_runs=[...]`, `append=true`
+to add to existing). This closes the loop decided-from → resulting-runs.
+### 4. Confirm
+Echo the created decision (id, title, type, status, param change, source/result
+runs). Mention it can be rendered as a PDF via
+`GET /projects/<pid>/decisions/<id>/render?format=pdf`. Offer to list recent
+decisions via `mcp__openrunner__openrunner_list_decisions`.
+## Constraints
+- One decision per call. If the user describes several, record each separately.
+- Keep `title` short and declarative; put detail in `markdown` / `decision` / `rationale`.
+- A decision should cite the run(s) it was decided from whenever results drove it.
+- Do not invent a rationale the user did not give — leave it blank instead.
+"""
 MCP_CMD = """---
 name: {prefix}mcp
 description: Register OpenRunner MCP server for native tool access
@@ -1267,6 +1431,8 @@ def install_claude_code() -> list[str]:
         "plot-session-run.md": PLOT_SESSION_RUN_CMD,
         "statusline.md": STATUSLINE_CMD,
         "gt-recommendations.md": GT_RECOMMENDATIONS_CMD,
+        "training-recommendations.md": TRAINING_RECOMMENDATIONS_CMD,
+        "log-decision.md": LOG_DECISION_CMD,
     }
     files = []
@@ -1297,6 +1463,25 @@ When the user asks about papers, research, or sessions:
 - Use `mcp__openrunner__openrunner_search_sessions` for AI session history
 - Use `mcp__openrunner__openrunner_update_paper` to modify paper sections
+When the user asks to monitor a training, diagnose loss curves, or get training
+advice (diverging, overfitting, plateau, NaN loss, LR tuning):
+- Use `mcp__openrunner__openrunner_monitor_training` for a health verdict
+  (HEALTHY / IMPROVING / PLATEAUED / OVERFITTING / UNSTABLE / DIVERGING) plus
+  per-metric trend, plateau, instability and overfitting signals.
+- Run `/openrunner:training-recommendations [run_id]` for actionable fixes.
+When the user makes or describes a deliberate decision (changing a param,
+switching strategy, picking an architecture, a data/process/tooling choice):
+- Use `mcp__openrunner__openrunner_log_decision` to document it (context,
+  rationale, alternatives; pass `source_runs` for the run(s) it was decided
+  from, `param`/`old_value`/`new_value` for a param change, optional `markdown`
+  body + `attachment_url` PDF).
+- Use `mcp__openrunner__openrunner_set_decision_runs` to attach the run(s) that
+  resulted from a decision once they exist.
+- Use `mcp__openrunner__openrunner_list_decisions` to review the decision log.
+- Run `/openrunner:log-decision` for a guided capture.
+- Decisions render to HTML/PDF: `GET /projects/{id}/decisions/{id}/render?format=pdf`.
 When the user asks about prediction quality, ground-truth errors, mislabels,
 or annotation rules:
 - Use `mcp__openrunner__openrunner_fetch_predictions` to pull a run's logged
@@ -1417,6 +1602,8 @@ def _install_all_commands_to_dir(cmd_dir: Path, prefix: str = "openrunner:") ->
         "plot-session-run.md": PLOT_SESSION_RUN_CMD,
         "statusline.md": STATUSLINE_CMD,
         "gt-recommendations.md": GT_RECOMMENDATIONS_CMD,
+        "training-recommendations.md": TRAINING_RECOMMENDATIONS_CMD,
+        "log-decision.md": LOG_DECISION_CMD,
     }
     files = []

openrunner-sdk 2.24.2__tar.gz → 2.25.0__tar.gz

openrunner-sdk 2.24.2tar.gz → 2.25.0tar.gz