npm - claude-turing - Versions diffs - 2.0.0 → 2.2.0 - Mend

claude-turing 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/.claude-plugin/plugin.json +2 -2
package/README.md +7 -2
package/commands/fork.md +40 -0
package/commands/lit.md +47 -0
package/commands/paper.md +44 -0
package/commands/queue.md +48 -0
package/commands/retry.md +41 -0
package/commands/turing.md +10 -0
package/config/failure_modes.yaml +74 -0
package/package.json +1 -1
package/src/install.js +2 -0
package/src/verify.js +6 -0
package/templates/scripts/__pycache__/draft_paper_sections.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/experiment_queue.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/fork_experiment.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/literature_search.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/smart_retry.cpython-314.pyc +0 -0
package/templates/scripts/draft_paper_sections.py +498 -0
package/templates/scripts/experiment_queue.py +441 -0
package/templates/scripts/fork_experiment.py +286 -0
package/templates/scripts/generate_brief.py +25 -0
package/templates/scripts/literature_search.py +421 -0
package/templates/scripts/scaffold.py +10 -0
package/templates/scripts/smart_retry.py +398 -0

package/templates/scripts/fork_experiment.py ADDED Viewed

@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+"""Experiment branching — run parallel tracks from a common parent.
+"Try both A and B from this point" — creates child experiments,
+runs both, reports which branch wins.
+Usage:
+    python scripts/fork_experiment.py exp-042 --branches "LightGBM dart" "XGBoost deeper"
+    python scripts/fork_experiment.py exp-042 --branches "A" "B" --auto-promote
+"""
+from __future__ import annotations
+import argparse
+import json
+import subprocess
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+import yaml
+from scripts.turing_io import load_config, load_experiments
+def find_experiment(experiments: list[dict], exp_id: str) -> dict | None:
+    """Find experiment by ID."""
+    for exp in experiments:
+        if exp.get("experiment_id") == exp_id:
+            return exp
+    return None
+def create_branch(
+    parent: dict,
+    branch_description: str,
+    branch_index: int,
+) -> dict:
+    """Create a branch descriptor from a parent experiment.
+    Returns dict with branch metadata (not yet executed).
+    """
+    parent_id = parent.get("experiment_id", "unknown")
+    return {
+        "branch_id": f"fork-{parent_id}-{branch_index + 1}",
+        "parent_id": parent_id,
+        "description": branch_description,
+        "status": "pending",
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "result_experiment": None,
+        "metrics": {},
+    }
+def run_branch(branch: dict, seed: int = 42, timeout: int = 600) -> dict:
+    """Execute a single branch experiment.
+    Returns updated branch dict with status and metrics.
+    """
+    branch["status"] = "running"
+    branch["started_at"] = datetime.now(timezone.utc).isoformat()
+    cmd = ["python", "train.py", "--seed", str(seed)]
+    try:
+        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
+    except subprocess.TimeoutExpired:
+        branch["status"] = "failed"
+        branch["error"] = "timeout"
+        return branch
+    if proc.returncode != 0:
+        branch["status"] = "failed"
+        branch["error"] = proc.stderr[-300:] if proc.stderr else "unknown error"
+        return branch
+    # Parse metrics
+    metrics = {}
+    in_block = False
+    for line in proc.stdout.splitlines():
+        line = line.strip()
+        if line == "---":
+            if in_block:
+                break
+            in_block = True
+            continue
+        if in_block and ":" in line:
+            key, value = line.split(":", 1)
+            try:
+                metrics[key.strip()] = float(value.strip())
+            except ValueError:
+                metrics[key.strip()] = value.strip()
+    branch["status"] = "completed"
+    branch["completed_at"] = datetime.now(timezone.utc).isoformat()
+    branch["metrics"] = metrics
+    return branch
+def determine_winner(
+    branches: list[dict],
+    metric: str,
+    lower_is_better: bool,
+) -> dict | None:
+    """Determine the winning branch by primary metric.
+    Returns the winning branch dict, or None if no branches completed.
+    """
+    completed = [b for b in branches if b.get("status") == "completed" and metric in b.get("metrics", {})]
+    if not completed:
+        return None
+    if lower_is_better:
+        return min(completed, key=lambda b: b["metrics"][metric])
+    else:
+        return max(completed, key=lambda b: b["metrics"][metric])
+def format_fork_report(
+    parent_id: str,
+    branches: list[dict],
+    winner: dict | None,
+    metric: str,
+) -> str:
+    """Format fork results as a comparison tree."""
+    lines = [
+        f"# Fork from {parent_id}",
+        "",
+    ]
+    if not branches:
+        lines.append("No branches executed.")
+        return "\n".join(lines)
+    winner_id = winner["branch_id"] if winner else None
+    for branch in branches:
+        status = branch.get("status", "?")
+        desc = branch.get("description", "?")
+        bid = branch.get("branch_id", "?")
+        if status == "completed":
+            metric_val = branch.get("metrics", {}).get(metric, "N/A")
+            is_winner = bid == winner_id
+            marker = "WINNER" if is_winner else ""
+            if isinstance(metric_val, float):
+                lines.append(f"├── {bid}: {desc} → {metric}={metric_val:.4f} {marker}")
+            else:
+                lines.append(f"├── {bid}: {desc} → {metric}={metric_val} {marker}")
+        elif status == "failed":
+            error = branch.get("error", "unknown")
+            lines.append(f"├── {bid}: {desc} → FAILED ({error})")
+        else:
+            lines.append(f"├── {bid}: {desc} → {status}")
+    if winner:
+        lines.extend([
+            "",
+            f"**Recommendation:** promote {winner['branch_id']}, abandon the rest.",
+        ])
+    return "\n".join(lines)
+def save_fork_report(report: dict, output_dir: str = "experiments/forks") -> Path:
+    """Save fork report to YAML."""
+    out_path = Path(output_dir)
+    out_path.mkdir(parents=True, exist_ok=True)
+    parent_id = report.get("parent_id", "unknown")
+    filepath = out_path / f"{parent_id}-fork.yaml"
+    with open(filepath, "w") as f:
+        yaml.dump(report, f, default_flow_style=False, sort_keys=False)
+    return filepath
+def run_fork(
+    exp_id: str,
+    branch_descriptions: list[str],
+    auto_promote: bool = False,
+    config_path: str = "config.yaml",
+    log_path: str = "experiments/log.jsonl",
+    timeout: int = 600,
+) -> dict:
+    """Fork an experiment into multiple branches and run all.
+    Args:
+        exp_id: Parent experiment ID.
+        branch_descriptions: List of branch descriptions.
+        auto_promote: Automatically keep winner and discard rest.
+        config_path: Path to config.yaml.
+        log_path: Path to experiment log.
+        timeout: Per-branch timeout.
+    Returns:
+        Fork result dict with branches, winner, and recommendation.
+    """
+    config = load_config(config_path)
+    eval_cfg = config.get("evaluation", {})
+    primary_metric = eval_cfg.get("primary_metric", "accuracy")
+    lower_is_better = eval_cfg.get("lower_is_better", False)
+    experiments = load_experiments(log_path)
+    parent = find_experiment(experiments, exp_id)
+    if not parent:
+        return {"error": f"Experiment {exp_id} not found"}
+    if not branch_descriptions:
+        return {"error": "No branches specified. Use --branches 'A' 'B'"}
+    # Create branches
+    branches = []
+    for i, desc in enumerate(branch_descriptions):
+        branches.append(create_branch(parent, desc, i))
+    print(f"Forking {exp_id} into {len(branches)} branches:", file=sys.stderr)
+    for b in branches:
+        print(f"  {b['branch_id']}: {b['description']}", file=sys.stderr)
+    print(file=sys.stderr)
+    # Execute branches
+    for i, branch in enumerate(branches):
+        print(f"  [{i+1}/{len(branches)}] Running {branch['branch_id']}...", end=" ",
+              flush=True, file=sys.stderr)
+        run_branch(branch, seed=42 + i, timeout=timeout)
+        if branch["status"] == "completed":
+            metric_val = branch.get("metrics", {}).get(primary_metric, "N/A")
+            print(f"{primary_metric}={metric_val}", file=sys.stderr)
+        else:
+            print(f"FAILED", file=sys.stderr)
+    # Determine winner
+    winner = determine_winner(branches, primary_metric, lower_is_better)
+    result = {
+        "parent_id": exp_id,
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "metric": primary_metric,
+        "lower_is_better": lower_is_better,
+        "branches": branches,
+        "winner": winner["branch_id"] if winner else None,
+        "winner_metric": winner["metrics"].get(primary_metric) if winner else None,
+        "auto_promote": auto_promote,
+        "total_branches": len(branches),
+        "completed": sum(1 for b in branches if b["status"] == "completed"),
+        "failed": sum(1 for b in branches if b["status"] == "failed"),
+    }
+    return result
+def main() -> None:
+    """CLI entry point."""
+    parser = argparse.ArgumentParser(description="Fork experiment into parallel branches")
+    parser.add_argument("exp_id", help="Parent experiment ID")
+    parser.add_argument("--branches", nargs="+", required=True, help="Branch descriptions")
+    parser.add_argument("--auto-promote", action="store_true", help="Auto-keep winner")
+    parser.add_argument("--config", default="config.yaml")
+    parser.add_argument("--log", default="experiments/log.jsonl")
+    parser.add_argument("--timeout", type=int, default=600)
+    parser.add_argument("--json", action="store_true")
+    args = parser.parse_args()
+    result = run_fork(
+        args.exp_id, args.branches, args.auto_promote,
+        args.config, args.log, args.timeout,
+    )
+    if "error" not in result:
+        filepath = save_fork_report(result)
+        print(f"\nSaved to {filepath}", file=sys.stderr)
+    if args.json:
+        print(json.dumps(result, indent=2, default=str))
+    else:
+        if "error" in result:
+            print(f"ERROR: {result['error']}")
+        else:
+            print(format_fork_report(
+                result["parent_id"], result["branches"],
+                next((b for b in result["branches"] if b["branch_id"] == result.get("winner")), None),
+                result["metric"],
+            ))
+if __name__ == "__main__":
+    main()

package/templates/scripts/generate_brief.py CHANGED Viewed

@@ -212,6 +212,18 @@ def detect_environment_drift(experiments: list[dict]) -> list[str]:
     return warnings
+def load_queue_summary(queue_path: str = "experiments/queue-summary.yaml") -> dict | None:
+    """Load the most recent queue execution summary."""
+    path = Path(queue_path)
+    if not path.exists():
+        return None
+    try:
+        with open(path) as f:
+            return yaml.safe_load(f)
+    except (yaml.YAMLError, OSError):
+        return None
 def load_profiles(profile_dir: str = "experiments/profiles") -> list[dict]:
     """Load all profiling results from YAML files."""
     path = Path(profile_dir)
@@ -296,6 +308,7 @@ def format_brief(
     reproductions: list[dict] | None = None,
     diagnoses: list[dict] | None = None,
     profiles: list[dict] | None = None,
+    queue_summary: dict | None = None,
 ) -> str:
     """Format the research briefing as markdown."""
     direction = "lower" if lower_is_better else "higher"
@@ -472,6 +485,16 @@ def format_brief(
         if failed:
             lines.extend(["", f"*{len(failed)} experiment(s) failed reproducibility checks.*"])
+    # Queue report
+    if queue_summary and queue_summary.get("total"):
+        qs = queue_summary
+        lines.extend(["", "## Queue Report", ""])
+        lines.append(
+            f"**{qs.get('status', '?')}** — {qs.get('completed', 0)} completed, "
+            f"{qs.get('failed', 0)} failed, {qs.get('skipped', 0)} skipped "
+            f"of {qs.get('total', 0)} queued"
+        )
     # Profiles
     if profiles:
         lines.extend(["", "## Performance Profile", ""])
@@ -569,6 +592,7 @@ def generate_brief(
     reproductions = load_reproductions()
     diagnoses = load_diagnoses()
     profiles = load_profiles()
+    queue_summary = load_queue_summary()
     return format_brief(
         campaign, best, trajectory, model_types, hypotheses,
@@ -579,6 +603,7 @@ def generate_brief(
         reproductions=reproductions if reproductions else None,
         diagnoses=diagnoses if diagnoses else None,
         profiles=profiles if profiles else None,
+        queue_summary=queue_summary,
     )