npm - claude-turing - Versions diffs - 1.0.0 - Mend

claude-turing 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/.claude-plugin/plugin.json +34 -0
package/LICENSE +21 -0
package/README.md +457 -0
package/agents/ml-evaluator.md +43 -0
package/agents/ml-researcher.md +74 -0
package/bin/cli.js +46 -0
package/bin/turing-init.sh +57 -0
package/commands/brief.md +83 -0
package/commands/compare.md +24 -0
package/commands/design.md +97 -0
package/commands/init.md +123 -0
package/commands/logbook.md +51 -0
package/commands/mode.md +43 -0
package/commands/poster.md +89 -0
package/commands/preflight.md +75 -0
package/commands/report.md +97 -0
package/commands/rules/loop-protocol.md +91 -0
package/commands/status.md +24 -0
package/commands/suggest.md +95 -0
package/commands/sweep.md +45 -0
package/commands/train.md +66 -0
package/commands/try.md +63 -0
package/commands/turing.md +54 -0
package/commands/validate.md +34 -0
package/config/defaults.yaml +45 -0
package/config/experiment_archetypes.yaml +127 -0
package/config/lifecycle.toml +31 -0
package/config/novelty_aliases.yaml +107 -0
package/config/relationships.toml +125 -0
package/config/state.toml +24 -0
package/config/task_taxonomy.yaml +110 -0
package/config/taxonomy.toml +37 -0
package/package.json +54 -0
package/src/claude-md.js +55 -0
package/src/install.js +107 -0
package/src/paths.js +20 -0
package/src/postinstall.js +22 -0
package/src/verify.js +109 -0
package/templates/MEMORY.md +36 -0
package/templates/README.md +93 -0
package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
package/templates/config.yaml +48 -0
package/templates/evaluate.py +237 -0
package/templates/features/__init__.py +0 -0
package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
package/templates/features/featurizers.py +138 -0
package/templates/prepare.py +171 -0
package/templates/program.md +216 -0
package/templates/pyproject.toml +8 -0
package/templates/requirements.txt +8 -0
package/templates/scripts/__init__.py +0 -0
package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
package/templates/scripts/check_convergence.py +230 -0
package/templates/scripts/compare_runs.py +124 -0
package/templates/scripts/critique_hypothesis.py +350 -0
package/templates/scripts/experiment_index.py +288 -0
package/templates/scripts/generate_brief.py +389 -0
package/templates/scripts/generate_logbook.py +423 -0
package/templates/scripts/log_experiment.py +243 -0
package/templates/scripts/manage_hypotheses.py +543 -0
package/templates/scripts/novelty_guard.py +343 -0
package/templates/scripts/parse_metrics.py +139 -0
package/templates/scripts/post-train-hook.sh +74 -0
package/templates/scripts/preflight.py +549 -0
package/templates/scripts/scaffold.py +409 -0
package/templates/scripts/show_environment.py +92 -0
package/templates/scripts/show_experiment_tree.py +144 -0
package/templates/scripts/show_families.py +133 -0
package/templates/scripts/show_metrics.py +157 -0
package/templates/scripts/statistical_compare.py +259 -0
package/templates/scripts/stop-hook.sh +34 -0
package/templates/scripts/suggest_next.py +301 -0
package/templates/scripts/sweep.py +276 -0
package/templates/scripts/synthesize_decision.py +300 -0
package/templates/scripts/turing_io.py +76 -0
package/templates/scripts/update_state.py +296 -0
package/templates/scripts/validate_stability.py +167 -0
package/templates/scripts/verify_placeholders.py +119 -0
package/templates/sweep_config.yaml +14 -0
package/templates/tests/__init__.py +0 -0
package/templates/tests/conftest.py +91 -0
package/templates/train.py +240 -0

package/templates/scripts/suggest_next.py ADDED Viewed

@@ -0,0 +1,301 @@
+#!/usr/bin/env python3
+"""Bayesian-guided hypothesis suggestion for the autoresearch pipeline.
+Reads experiment history from log.jsonl, builds a surrogate model
+(Random Forest) over the hyperparameter space, and suggests the
+configurations most likely to improve the primary metric.
+This is the data-driven complement to human taste: the human selects
+which room to search, this script suggests which coins in that room
+are most likely to be biased toward heads.
+Usage:
+    python scripts/suggest_next.py [--log experiments/log.jsonl] [--config config.yaml] [--top 3]
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+import numpy as np
+import yaml
+from scripts.turing_io import load_experiments
+def extract_features_and_targets(
+    experiments: list[dict],
+    metric_name: str,
+) -> tuple[list[dict], list[float], list[str]]:
+    """Extract hyperparameter features and metric targets from experiments.
+    Returns (feature_dicts, metric_values, experiment_ids).
+    Only includes experiments with valid metric values.
+    """
+    features = []
+    targets = []
+    ids = []
+    for exp in experiments:
+        metric_val = exp.get("metrics", {}).get(metric_name)
+        if metric_val is None or not isinstance(metric_val, (int, float)):
+            continue
+        # Extract hyperparameters as features
+        config = exp.get("config", {})
+        hyperparams = config.get("hyperparams", {})
+        if not hyperparams:
+            # Try to find hyperparams at top level of config
+            hyperparams = {k: v for k, v in config.items()
+                         if k not in ("model_type",) and isinstance(v, (int, float))}
+        if hyperparams:
+            features.append(hyperparams)
+            targets.append(float(metric_val))
+            ids.append(exp.get("experiment_id", "?"))
+    return features, targets, ids
+def features_to_matrix(feature_dicts: list[dict]) -> tuple[np.ndarray, list[str]]:
+    """Convert list of feature dicts to a numpy matrix.
+    Handles missing keys by filling with NaN.
+    Returns (matrix, column_names).
+    """
+    if not feature_dicts:
+        return np.array([]).reshape(0, 0), []
+    # Collect all keys
+    all_keys = sorted(set(k for d in feature_dicts for k in d if isinstance(d[k], (int, float))))
+    if not all_keys:
+        return np.array([]).reshape(0, 0), []
+    matrix = np.full((len(feature_dicts), len(all_keys)), np.nan)
+    for i, d in enumerate(feature_dicts):
+        for j, key in enumerate(all_keys):
+            val = d.get(key)
+            if isinstance(val, (int, float)):
+                matrix[i, j] = val
+    return matrix, all_keys
+def suggest_configurations(
+    experiments: list[dict],
+    metric_name: str,
+    lower_is_better: bool = False,
+    n_suggestions: int = 3,
+    sweep_config_path: str | None = None,
+) -> list[dict]:
+    """Suggest promising configurations using a Random Forest surrogate.
+    If a sweep config exists, generates candidates from the untried region.
+    Otherwise, generates perturbations of the best-known configuration.
+    Returns list of suggestion dicts with predicted_metric and config.
+    """
+    features, targets, ids = extract_features_and_targets(experiments, metric_name)
+    if len(features) < 3:
+        return [{
+            "reason": "insufficient_data",
+            "detail": f"Need at least 3 experiments with hyperparameters, have {len(features)}",
+            "suggestion": "Run more experiments before requesting data-driven suggestions",
+        }]
+    X, col_names = features_to_matrix(features)
+    y = np.array(targets)
+    if X.shape[1] == 0:
+        return [{
+            "reason": "no_numeric_hyperparameters",
+            "detail": "No numeric hyperparameters found in experiment configs",
+            "suggestion": "Ensure config includes numeric hyperparams like n_estimators, max_depth, learning_rate",
+        }]
+    # Handle NaN in features
+    from sklearn.impute import SimpleImputer
+    imputer = SimpleImputer(strategy="median")
+    X_clean = imputer.fit_transform(X)
+    # Fit surrogate model
+    from sklearn.ensemble import RandomForestRegressor
+    surrogate = RandomForestRegressor(
+        n_estimators=100,
+        random_state=42,
+        n_jobs=-1,
+    )
+    surrogate.fit(X_clean, y)
+    # Generate candidates
+    candidates = _generate_candidates(X_clean, col_names, sweep_config_path, n_candidates=200)
+    if len(candidates) == 0:
+        return [{
+            "reason": "no_candidates",
+            "detail": "Could not generate candidate configurations",
+            "suggestion": "Check sweep_config.yaml or experiment hyperparameter ranges",
+        }]
+    # Predict with surrogate
+    preds = surrogate.predict(candidates)
+    # Also get uncertainty (std across trees)
+    tree_preds = np.array([tree.predict(candidates) for tree in surrogate.estimators_])
+    uncertainties = np.std(tree_preds, axis=0)
+    # Acquisition function: UCB (Upper Confidence Bound)
+    # For higher-is-better: score = predicted + kappa * uncertainty
+    # For lower-is-better: score = -predicted + kappa * uncertainty
+    kappa = 1.5  # exploration-exploitation tradeoff
+    if lower_is_better:
+        scores = -preds + kappa * uncertainties
+    else:
+        scores = preds + kappa * uncertainties
+    # Select top-N by acquisition score
+    top_indices = np.argsort(scores)[-n_suggestions:][::-1]
+    suggestions = []
+    for idx in top_indices:
+        config = {col_names[j]: round(float(candidates[idx, j]), 6) for j in range(len(col_names))}
+        suggestions.append({
+            "config": config,
+            "predicted_metric": round(float(preds[idx]), 6),
+            "uncertainty": round(float(uncertainties[idx]), 6),
+            "acquisition_score": round(float(scores[idx]), 6),
+        })
+    return suggestions
+def _generate_candidates(
+    X: np.ndarray,
+    col_names: list[str],
+    sweep_config_path: str | None,
+    n_candidates: int = 200,
+) -> np.ndarray:
+    """Generate candidate configurations for the surrogate to evaluate.
+    Uses sweep config ranges if available, otherwise perturbs existing data.
+    """
+    if sweep_config_path and Path(sweep_config_path).exists():
+        return _candidates_from_sweep(sweep_config_path, col_names, n_candidates)
+    return _candidates_from_perturbation(X, n_candidates)
+def _candidates_from_sweep(
+    sweep_config_path: str,
+    col_names: list[str],
+    n_candidates: int,
+) -> np.ndarray:
+    """Generate random candidates from sweep parameter ranges."""
+    with open(sweep_config_path) as f:
+        sweep_config = yaml.safe_load(f)
+    sweep_params = sweep_config.get("sweep", {})
+    if not sweep_params:
+        return np.array([]).reshape(0, len(col_names))
+    # Map sweep param names to column indices
+    candidates = np.random.RandomState(42).uniform(size=(n_candidates, len(col_names)))
+    for j, col in enumerate(col_names):
+        # Try to find matching sweep param
+        matching_key = None
+        for key in sweep_params:
+            if key.endswith(col):
+                matching_key = key
+                break
+        if matching_key and isinstance(sweep_params[matching_key], list):
+            values = [v for v in sweep_params[matching_key] if isinstance(v, (int, float))]
+            if values:
+                lo, hi = min(values), max(values)
+                # Expand range slightly for exploration
+                margin = (hi - lo) * 0.2 if hi != lo else abs(lo) * 0.5
+                candidates[:, j] = np.random.RandomState(42 + j).uniform(
+                    lo - margin, hi + margin, size=n_candidates,
+                )
+    return candidates
+def _candidates_from_perturbation(
+    X: np.ndarray,
+    n_candidates: int,
+) -> np.ndarray:
+    """Generate candidates by perturbing existing observations."""
+    rng = np.random.RandomState(42)
+    # Compute column ranges
+    col_min = np.nanmin(X, axis=0)
+    col_max = np.nanmax(X, axis=0)
+    col_range = col_max - col_min
+    col_range[col_range == 0] = np.abs(col_min[col_range == 0]) * 0.5 + 1e-6
+    candidates = np.zeros((n_candidates, X.shape[1]))
+    for i in range(n_candidates):
+        # Pick a random existing point and perturb it
+        base_idx = rng.randint(0, X.shape[0])
+        perturbation = rng.normal(0, 0.3, size=X.shape[1]) * col_range
+        candidates[i] = X[base_idx] + perturbation
+    return candidates
+def format_suggestions(suggestions: list[dict], metric_name: str) -> str:
+    """Format suggestions for display."""
+    if not suggestions:
+        return "No suggestions available."
+    if "reason" in suggestions[0]:
+        return f"Cannot suggest: {suggestions[0]['detail']}\n{suggestions[0].get('suggestion', '')}"
+    lines = [f"Top {len(suggestions)} suggested configurations (by expected {metric_name}):", ""]
+    for i, s in enumerate(suggestions, 1):
+        config_str = ", ".join(f"{k}={v}" for k, v in s["config"].items())
+        lines.append(f"  {i}. {config_str}")
+        lines.append(f"     Predicted {metric_name}: {s['predicted_metric']:.4f} (uncertainty: {s['uncertainty']:.4f})")
+        lines.append("")
+    return "\n".join(lines)
+def main() -> None:
+    """CLI entry point."""
+    parser = argparse.ArgumentParser(description="Suggest next experiment configuration")
+    parser.add_argument("--log", default="experiments/log.jsonl")
+    parser.add_argument("--config", default="config.yaml")
+    parser.add_argument("--sweep", default="sweep_config.yaml", help="Sweep config for candidate ranges")
+    parser.add_argument("--top", type=int, default=3, help="Number of suggestions")
+    args = parser.parse_args()
+    # Load config
+    config = {}
+    if Path(args.config).exists():
+        with open(args.config) as f:
+            config = yaml.safe_load(f) or {}
+    eval_cfg = config.get("evaluation", {})
+    metric = eval_cfg.get("primary_metric", "accuracy")
+    lower_is_better = eval_cfg.get("lower_is_better", False)
+    experiments = load_experiments(args.log)
+    suggestions = suggest_configurations(
+        experiments, metric, lower_is_better, args.top,
+        sweep_config_path=args.sweep if Path(args.sweep).exists() else None,
+    )
+    print(format_suggestions(suggestions, metric))
+if __name__ == "__main__":
+    main()

package/templates/scripts/sweep.py ADDED Viewed

@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+"""Hyperparameter sweep tool for the autoresearch pipeline.
+Systematic exploration of the hyperparameter space via cartesian product.
+Generates all combinations of configured parameter ranges and writes a
+persistent queue that the agent processes sequentially.
+This is grid search, not random search or Bayesian optimization — deliberate
+simplicity for reproducibility and interpretability. Every point in the grid
+is evaluated, making the results a complete map of the explored region.
+Usage:
+    python scripts/sweep.py [sweep_config.yaml]
+    python scripts/sweep.py --status  # Show queue progress
+    python scripts/sweep.py --next    # Print next pending experiment as JSON
+    python scripts/sweep.py --mark <name> <status>  # Mark experiment complete/failed
+"""
+from __future__ import annotations
+import argparse
+import copy
+import itertools
+import json
+import sys
+from pathlib import Path
+import yaml
+def apply_overrides(config: dict, overrides: dict) -> dict:
+    """Apply dotted-path overrides to a config dict.
+    Takes dotted-path keys like "model.hyperparams.n_estimators" and sets
+    nested values. Returns a new config dict with overrides applied.
+    Args:
+        config: Base configuration dictionary.
+        overrides: Dict mapping dotted-path keys to values.
+    Returns:
+        New config dict with overrides applied (original is not mutated).
+    """
+    result = copy.deepcopy(config)
+    for dotted_key, value in overrides.items():
+        parts = dotted_key.split(".")
+        target = result
+        for part in parts[:-1]:
+            if part not in target:
+                target[part] = {}
+            target = target[part]
+        target[parts[-1]] = value
+    return result
+def _make_experiment_name(overrides: dict) -> str:
+    """Generate a short descriptive name from parameter overrides.
+    Example: {"model.hyperparams.n_estimators": 100, "model.hyperparams.max_depth": 4}
+    becomes "n100_d4"
+    """
+    abbreviations = {
+        "n_estimators": "n",
+        "max_depth": "d",
+        "learning_rate": "lr",
+        "min_child_weight": "mcw",
+        "subsample": "ss",
+        "colsample_bytree": "cs",
+        "gamma": "g",
+        "reg_alpha": "a",
+        "reg_lambda": "l",
+        "epochs": "ep",
+        "batch_size": "bs",
+        "hidden_size": "hs",
+        "dropout": "do",
+        "weight_decay": "wd",
+    }
+    parts = []
+    for key, value in overrides.items():
+        param_name = key.split(".")[-1]
+        abbrev = abbreviations.get(param_name, param_name[:3])
+        parts.append(f"{abbrev}{value}")
+    return "_".join(parts)
+def generate_queue(sweep_config_path: str) -> None:
+    """Generate cartesian product experiment queue from sweep config.
+    Reads sweep parameters, computes the cartesian product of all value lists,
+    and writes a queue YAML file with one entry per combination.
+    Args:
+        sweep_config_path: Path to sweep config YAML file.
+    """
+    config_path = Path(sweep_config_path)
+    if not config_path.exists():
+        print(f"Error: Sweep config not found: {config_path}", file=sys.stderr)
+        sys.exit(1)
+    with open(config_path) as f:
+        sweep_config = yaml.safe_load(f)
+    sweep_params = sweep_config.get("sweep", {})
+    output_path = sweep_config.get("output", "experiments/queue.yaml")
+    if not sweep_params:
+        print("Error: No sweep parameters defined in config", file=sys.stderr)
+        sys.exit(1)
+    # Extract parameter names and value lists
+    param_names = list(sweep_params.keys())
+    param_values = list(sweep_params.values())
+    # Generate cartesian product
+    combinations = list(itertools.product(*param_values))
+    # Build experiment queue
+    queue = []
+    for combo in combinations:
+        overrides = dict(zip(param_names, combo))
+        name = _make_experiment_name(overrides)
+        queue.append({
+            "experiment_name": name,
+            "config_overrides": overrides,
+            "status": "pending",
+        })
+    # Write queue
+    out = Path(output_path)
+    out.parent.mkdir(parents=True, exist_ok=True)
+    with open(out, "w") as f:
+        yaml.dump(queue, f, default_flow_style=False, sort_keys=False)
+    num_params = len(param_names)
+    num_experiments = len(queue)
+    print(f"Generated {num_experiments} experiments from {num_params} parameters")
+    print(f"Queue written to: {output_path}")
+def show_status(queue_path: str) -> None:
+    """Show queue progress: counts of pending/running/complete/failed experiments.
+    Args:
+        queue_path: Path to experiments/queue.yaml.
+    """
+    path = Path(queue_path)
+    if not path.exists():
+        print("No queue found. Run sweep.py to generate one.", file=sys.stderr)
+        sys.exit(1)
+    with open(path) as f:
+        queue = yaml.safe_load(f) or []
+    counts: dict[str, int] = {}
+    for entry in queue:
+        status = entry.get("status", "unknown")
+        counts[status] = counts.get(status, 0) + 1
+    total = len(queue)
+    print(f"Queue: {total} experiments")
+    for status, count in sorted(counts.items()):
+        print(f"  {status}: {count}")
+def get_next(queue_path: str) -> None:
+    """Print the next pending experiment as JSON for agent consumption.
+    Args:
+        queue_path: Path to experiments/queue.yaml.
+    """
+    path = Path(queue_path)
+    if not path.exists():
+        print("No queue found.", file=sys.stderr)
+        sys.exit(1)
+    with open(path) as f:
+        queue = yaml.safe_load(f) or []
+    for entry in queue:
+        if entry.get("status") == "pending":
+            print(json.dumps(entry, indent=2))
+            return
+    print("No pending experiments.", file=sys.stderr)
+    sys.exit(1)
+def mark_experiment(queue_path: str, name: str, new_status: str) -> None:
+    """Mark an experiment as complete or failed in the queue.
+    Args:
+        queue_path: Path to experiments/queue.yaml.
+        name: Experiment name to mark.
+        new_status: New status (complete, failed, running).
+    """
+    path = Path(queue_path)
+    if not path.exists():
+        print("No queue found.", file=sys.stderr)
+        sys.exit(1)
+    with open(path) as f:
+        queue = yaml.safe_load(f) or []
+    found = False
+    for entry in queue:
+        if entry.get("experiment_name") == name:
+            entry["status"] = new_status
+            found = True
+            break
+    if not found:
+        print(f"Error: Experiment '{name}' not found in queue", file=sys.stderr)
+        sys.exit(1)
+    with open(path, "w") as f:
+        yaml.dump(queue, f, default_flow_style=False, sort_keys=False)
+    print(f"Marked '{name}' as {new_status}")
+def _find_queue_path(sweep_config_path: str) -> str:
+    """Extract the queue output path from the sweep config."""
+    config_path = Path(sweep_config_path)
+    if config_path.exists():
+        with open(config_path) as f:
+            sweep_config = yaml.safe_load(f)
+        return sweep_config.get("output", "experiments/queue.yaml")
+    return "experiments/queue.yaml"
+def main() -> None:
+    """CLI entry point."""
+    parser = argparse.ArgumentParser(
+        description="Hyperparameter sweep tool for the autoresearch pipeline"
+    )
+    parser.add_argument(
+        "sweep_config",
+        nargs="?",
+        default="sweep_config.yaml",
+        help="Path to sweep config YAML (default: sweep_config.yaml)",
+    )
+    parser.add_argument(
+        "--status",
+        action="store_true",
+        help="Show queue progress",
+    )
+    parser.add_argument(
+        "--next",
+        action="store_true",
+        help="Print the next pending experiment as JSON",
+    )
+    parser.add_argument(
+        "--mark",
+        nargs=2,
+        metavar=("NAME", "STATUS"),
+        help="Mark an experiment as complete/failed",
+    )
+    args = parser.parse_args()
+    queue_path = _find_queue_path(args.sweep_config)
+    if args.status:
+        show_status(queue_path)
+    elif args.next:
+        get_next(queue_path)
+    elif args.mark:
+        mark_experiment(queue_path, args.mark[0], args.mark[1])
+    else:
+        generate_queue(args.sweep_config)
+if __name__ == "__main__":
+    main()