npm - claude-turing - Versions diffs - 3.3.0 → 3.5.0 - Mend

claude-turing 3.3.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/.claude-plugin/plugin.json +2 -2
package/README.md +13 -2
package/commands/annotate.md +23 -0
package/commands/archive.md +23 -0
package/commands/cite.md +23 -0
package/commands/flashback.md +22 -0
package/commands/merge.md +24 -0
package/commands/present.md +23 -0
package/commands/prune.md +26 -0
package/commands/quantize.md +24 -0
package/commands/replay.md +23 -0
package/commands/search.md +22 -0
package/commands/surgery.md +27 -0
package/commands/template.md +22 -0
package/commands/trend.md +21 -0
package/commands/turing.md +22 -0
package/package.json +1 -1
package/src/install.js +2 -0
package/src/verify.js +11 -0
package/templates/scripts/__pycache__/architecture_surgery.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/experiment_annotations.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/experiment_archive.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/experiment_replay.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/experiment_search.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/experiment_templates.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/model_merger.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/model_pruning.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/model_quantization.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/session_flashback.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/trend_analysis.cpython-314.pyc +0 -0
package/templates/scripts/architecture_surgery.py +238 -0
package/templates/scripts/citation_manager.py +436 -0
package/templates/scripts/experiment_annotations.py +392 -0
package/templates/scripts/experiment_archive.py +534 -0
package/templates/scripts/experiment_replay.py +592 -0
package/templates/scripts/experiment_search.py +451 -0
package/templates/scripts/experiment_templates.py +501 -0
package/templates/scripts/generate_changelog.py +464 -0
package/templates/scripts/generate_figures.py +597 -0
package/templates/scripts/model_merger.py +277 -0
package/templates/scripts/model_pruning.py +182 -0
package/templates/scripts/model_quantization.py +177 -0
package/templates/scripts/scaffold.py +20 -0
package/templates/scripts/session_flashback.py +461 -0
package/templates/scripts/trend_analysis.py +503 -0

package/templates/scripts/model_merger.py ADDED Viewed

@@ -0,0 +1,277 @@
+#!/usr/bin/env python3
+"""Model merging for the autoresearch pipeline.
+Average or merge weights from multiple fine-tuned checkpoints into a
+single model (model soups, TIES, DARE, greedy soup). Often beats any
+individual model with zero additional training cost and no latency overhead.
+Usage:
+    python scripts/model_merger.py exp-042 exp-053 exp-067
+    python scripts/model_merger.py exp-042 exp-053 --method greedy
+    python scripts/model_merger.py --json
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+import numpy as np
+import yaml
+from scripts.turing_io import load_config, load_experiments
+DEFAULT_LOG_PATH = "experiments/log.jsonl"
+MERGE_METHODS = ["uniform", "greedy", "ties", "dare"]
+def check_compatibility(experiments: list[dict]) -> dict:
+    """Check that all models share the same architecture."""
+    model_types = {e.get("config", {}).get("model_type", "?") for e in experiments}
+    compatible = len(model_types) == 1
+    return {
+        "compatible": compatible,
+        "model_types": list(model_types),
+        "n_models": len(experiments),
+        "reason": "All models share same architecture" if compatible else f"Mixed architectures: {model_types}",
+    }
+def plan_uniform_merge(
+    experiments: list[dict],
+    primary_metric: str,
+) -> dict:
+    """Plan uniform weight averaging (model soup)."""
+    metrics = [e.get("metrics", {}).get(primary_metric, 0) for e in experiments]
+    return {
+        "method": "uniform",
+        "description": "Simple average of all model weights",
+        "n_models": len(experiments),
+        "individual_metrics": [{"exp_id": e.get("experiment_id"), primary_metric: m} for e, m in zip(experiments, metrics)],
+        "weights": [round(1.0 / len(experiments), 4)] * len(experiments),
+    }
+def plan_greedy_merge(
+    experiments: list[dict],
+    primary_metric: str,
+    merge_results: list[dict] | None = None,
+) -> dict:
+    """Plan greedy soup — iteratively add models only if they improve the merge."""
+    sorted_exps = sorted(experiments, key=lambda e: e.get("metrics", {}).get(primary_metric, 0), reverse=True)
+    included = [sorted_exps[0].get("experiment_id")]
+    excluded = []
+    if merge_results:
+        # Use actual results to determine inclusion
+        for r in merge_results[1:]:
+            if r.get("improved", True):
+                included.append(r.get("exp_id"))
+            else:
+                excluded.append(r.get("exp_id"))
+    else:
+        # Plan: include all by default, actual filtering done at execution
+        included = [e.get("experiment_id") for e in sorted_exps]
+    return {
+        "method": "greedy",
+        "description": "Iteratively add models only if they improve the merged result",
+        "included": included,
+        "excluded": excluded,
+        "n_included": len(included),
+        "n_excluded": len(excluded),
+    }
+def plan_ties_merge(experiments: list[dict]) -> dict:
+    """Plan TIES merging (Trim, Elect sign, disjoint Merge)."""
+    return {
+        "method": "ties",
+        "description": "Trim redundant params, elect sign consensus, disjoint merge",
+        "n_models": len(experiments),
+        "steps": [
+            "1. Compute task vectors (delta from base) for each model",
+            "2. Trim: zero out smallest magnitude deltas",
+            "3. Elect: resolve sign conflicts by majority vote",
+            "4. Merge: average the surviving, sign-consistent deltas",
+        ],
+    }
+def plan_dare_merge(experiments: list[dict]) -> dict:
+    """Plan DARE merging (Drop And REscale)."""
+    return {
+        "method": "dare",
+        "description": "Randomly drop parameters and rescale survivors to reduce interference",
+        "n_models": len(experiments),
+        "drop_rate": 0.5,
+        "steps": [
+            "1. Compute task vectors for each model",
+            "2. Randomly drop 50% of parameters per model",
+            "3. Rescale surviving parameters by 1/(1-drop_rate)",
+            "4. Average the rescaled task vectors",
+        ],
+    }
+def compare_merge_methods(
+    method_results: dict[str, dict] | None = None,
+    experiments: list[dict] | None = None,
+    primary_metric: str = "accuracy",
+) -> dict:
+    """Compare merge method results."""
+    if not experiments:
+        return {"error": "No experiments provided"}
+    # Best single model
+    best_single = max(experiments, key=lambda e: e.get("metrics", {}).get(primary_metric, 0))
+    best_metric = best_single.get("metrics", {}).get(primary_metric, 0)
+    results = [{
+        "method": "best_single",
+        "metric_value": best_metric,
+        "delta": 0.0,
+        "experiment_id": best_single.get("experiment_id"),
+    }]
+    if method_results:
+        for method_name, data in method_results.items():
+            metric = data.get("metric_value", data.get(primary_metric, 0))
+            results.append({
+                "method": method_name,
+                "metric_value": metric,
+                "delta": round(metric - best_metric, 6),
+            })
+    best_merge = max(results, key=lambda r: r.get("metric_value", 0))
+    return {
+        "results": results,
+        "best_method": best_merge.get("method"),
+        "best_metric": best_merge.get("metric_value"),
+        "improvement": best_merge.get("delta", 0),
+    }
+def merge_analysis(
+    exp_ids: list[str] | None = None,
+    method_results: dict[str, dict] | None = None,
+    config_path: str = "config.yaml",
+    log_path: str = DEFAULT_LOG_PATH,
+) -> dict:
+    """Run merge analysis."""
+    config = load_config(config_path)
+    primary_metric = config.get("evaluation", {}).get("primary_metric", "accuracy")
+    experiments = load_experiments(log_path)
+    if exp_ids:
+        selected = [e for e in experiments if e.get("experiment_id") in exp_ids]
+    else:
+        # Default: top 3 kept experiments
+        kept = sorted(
+            [e for e in experiments if e.get("status") == "kept"],
+            key=lambda e: e.get("metrics", {}).get(primary_metric, 0), reverse=True,
+        )
+        selected = kept[:3]
+    if len(selected) < 2:
+        return {"error": "Need at least 2 experiments for model merging"}
+    compat = check_compatibility(selected)
+    plans = {
+        "uniform": plan_uniform_merge(selected, primary_metric),
+        "greedy": plan_greedy_merge(selected, primary_metric),
+        "ties": plan_ties_merge(selected),
+        "dare": plan_dare_merge(selected),
+    }
+    comparison = compare_merge_methods(method_results, selected, primary_metric) if method_results else None
+    return {
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "primary_metric": primary_metric,
+        "compatibility": compat,
+        "base_models": [{"exp_id": e.get("experiment_id"),
+                        "model_type": e.get("config", {}).get("model_type"),
+                        primary_metric: e.get("metrics", {}).get(primary_metric)}
+                       for e in selected],
+        "plans": plans,
+        "comparison": comparison,
+    }
+def save_merge_report(report: dict, output_dir: str = "experiments/merges") -> Path:
+    out = Path(output_dir); out.mkdir(parents=True, exist_ok=True)
+    ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+    fp = out / f"merge-{ts}.yaml"
+    with open(fp, "w") as f: yaml.dump(json.loads(json.dumps(report, default=str)), f, default_flow_style=False, sort_keys=False)
+    return fp
+def format_merge_report(report: dict) -> str:
+    if "error" in report: return f"ERROR: {report['error']}"
+    metric = report.get("primary_metric", "metric")
+    lines = ["# Model Merge Analysis", "",
+             f"*Generated {report.get('generated_at', 'N/A')[:19]}*", ""]
+    # Compatibility
+    compat = report.get("compatibility", {})
+    lines.append(f"**Compatibility:** {'✓' if compat.get('compatible') else '✗'} {compat.get('reason', '')}")
+    lines.append("")
+    # Base models
+    lines.extend(["## Base Models", "",
+                  f"| Experiment | Model Type | {metric} |",
+                  "|------------|------------|--------|"])
+    for m in report.get("base_models", []):
+        val = m.get(metric, "N/A")
+        val_str = f"{val:.4f}" if isinstance(val, float) else str(val)
+        lines.append(f"| {m.get('exp_id', '?')} | {m.get('model_type', '?')} | {val_str} |")
+    lines.append("")
+    # Methods
+    plans = report.get("plans", {})
+    if plans:
+        lines.extend(["## Available Methods", ""])
+        for name, plan in plans.items():
+            lines.append(f"- **{name}:** {plan.get('description', '')}")
+        lines.append("")
+    # Comparison (if results available)
+    comparison = report.get("comparison")
+    if comparison:
+        lines.extend(["## Results", "",
+                      f"| Method | {metric} | Δ vs Best Single |",
+                      "|--------|--------|------------------|"])
+        for r in comparison.get("results", []):
+            val = f"{r.get('metric_value', 0):.4f}"
+            delta = f"{r.get('delta', 0):+.4f}" if r.get("delta") is not None else "—"
+            marker = " ← BEST" if r["method"] == comparison.get("best_method") and r["method"] != "best_single" else ""
+            lines.append(f"| {r['method']} | {val} | {delta} |{marker}")
+        lines.append("")
+        imp = comparison.get("improvement", 0)
+        if imp > 0:
+            lines.append(f"**{comparison['best_method']} improves by {imp:+.4f} over best single model — zero latency cost.**")
+    return "\n".join(lines)
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Model merging")
+    parser.add_argument("exp_ids", nargs="*", help="Experiment IDs to merge")
+    parser.add_argument("--method", choices=MERGE_METHODS, help="Specific merge method")
+    parser.add_argument("--config", default="config.yaml")
+    parser.add_argument("--log", default=DEFAULT_LOG_PATH)
+    parser.add_argument("--json", action="store_true")
+    args = parser.parse_args()
+    report = merge_analysis(exp_ids=args.exp_ids or None, config_path=args.config, log_path=args.log)
+    if "error" not in report:
+        fp = save_merge_report(report); print(f"Saved to {fp}", file=sys.stderr)
+    print(json.dumps(report, indent=2, default=str) if args.json else format_merge_report(report))
+if __name__ == "__main__": main()

package/templates/scripts/model_pruning.py ADDED Viewed

@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+"""Weight pruning for the autoresearch pipeline.
+Structured and unstructured weight pruning. Measures accuracy at different
+sparsity levels, finds the knee point, and plans pruned model production.
+Usage:
+    python scripts/model_pruning.py exp-042
+    python scripts/model_pruning.py exp-042 --sparsity 0.5,0.75,0.9
+    python scripts/model_pruning.py exp-042 --method magnitude
+    python scripts/model_pruning.py --json
+"""
+from __future__ import annotations
+import argparse
+import json
+import math
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+import numpy as np
+import yaml
+from scripts.turing_io import load_config, load_experiments
+DEFAULT_LOG_PATH = "experiments/log.jsonl"
+DEFAULT_SPARSITY_LEVELS = [0.0, 0.50, 0.75, 0.90, 0.95]
+PRUNING_METHODS = ["magnitude", "structured", "lottery"]
+def plan_sparsity_sweep(
+    sparsity_levels: list[float] | None = None,
+) -> list[dict]:
+    if sparsity_levels is None:
+        sparsity_levels = DEFAULT_SPARSITY_LEVELS
+    return [{"sparsity": s, "description": f"{s*100:.0f}% weights removed"} for s in sparsity_levels]
+def compute_pruning_plan(
+    model_type: str,
+    hyperparams: dict,
+    method: str,
+    sparsity: float,
+) -> dict:
+    plan = {"method": method, "sparsity": sparsity, "config_changes": {}}
+    if "xgboost" in model_type.lower() or "lightgbm" in model_type.lower() or "forest" in model_type.lower():
+        n_est = hyperparams.get("n_estimators", 100)
+        plan["config_changes"]["n_estimators"] = max(1, int(n_est * (1 - sparsity)))
+        plan["strategy"] = "reduce_estimators"
+    elif method == "magnitude":
+        plan["strategy"] = "zero_small_weights"
+        plan["description"] = f"Zero out smallest {sparsity*100:.0f}% of weights by absolute value"
+    elif method == "structured":
+        plan["strategy"] = "remove_neurons"
+        plan["description"] = f"Remove {sparsity*100:.0f}% of neurons/filters by importance"
+    elif method == "lottery":
+        plan["strategy"] = "iterative_magnitude_with_rewind"
+        plan["description"] = f"Iterative pruning to {sparsity*100:.0f}% with weight rewinding"
+    return plan
+def find_knee_point(sweep_results: list[dict], metric_key: str = "accuracy") -> dict | None:
+    if len(sweep_results) < 3:
+        return None
+    sparsities = [r["sparsity"] for r in sweep_results]
+    metrics = [r.get(metric_key, 0) for r in sweep_results]
+    max_drop = 0
+    knee_idx = None
+    for i in range(1, len(metrics)):
+        drop = metrics[i - 1] - metrics[i]
+        if drop > max_drop:
+            max_drop = drop
+            knee_idx = i
+    if knee_idx and knee_idx > 0:
+        return {"sparsity": sparsities[knee_idx - 1],
+                "metric_before_knee": round(metrics[knee_idx - 1], 6),
+                "metric_after_knee": round(metrics[knee_idx], 6),
+                "drop_at_knee": round(max_drop, 6)}
+    return None
+def estimate_speedup(sparsity: float) -> float:
+    if sparsity <= 0:
+        return 1.0
+    return round(1.0 / (1.0 - sparsity * 0.7), 2)
+def estimate_size_reduction(sparsity: float) -> float:
+    return round(sparsity * 100, 1)
+def analyze_pruning(
+    sweep_results: list[dict] | None = None,
+    exp_id: str | None = None,
+    method: str = "magnitude",
+    config_path: str = "config.yaml",
+    log_path: str = DEFAULT_LOG_PATH,
+) -> dict:
+    config = load_config(config_path)
+    primary_metric = config.get("evaluation", {}).get("primary_metric", "accuracy")
+    if sweep_results:
+        knee = find_knee_point(sweep_results, primary_metric)
+        for r in sweep_results:
+            r["speedup"] = estimate_speedup(r["sparsity"])
+            r["size_reduction_pct"] = estimate_size_reduction(r["sparsity"])
+        recommended = None
+        for r in sweep_results:
+            delta = abs(r.get(primary_metric, 0) - sweep_results[0].get(primary_metric, 0))
+            if delta < 0.005 and r["sparsity"] > 0:
+                recommended = r
+        return {
+            "generated_at": datetime.now(timezone.utc).isoformat(),
+            "experiment_id": exp_id, "method": method, "primary_metric": primary_metric,
+            "sweep_results": sweep_results, "knee_point": knee,
+            "recommended": recommended,
+        }
+    experiments = load_experiments(log_path)
+    exp = next((e for e in experiments if e.get("experiment_id") == exp_id), None) if exp_id else None
+    model_type = exp.get("config", {}).get("model_type", "unknown") if exp else "unknown"
+    hyperparams = exp.get("config", {}).get("hyperparams", {}) if exp else {}
+    levels = plan_sparsity_sweep()
+    plans = [compute_pruning_plan(model_type, hyperparams, method, s["sparsity"]) for s in levels]
+    return {
+        "action": "plan", "generated_at": datetime.now(timezone.utc).isoformat(),
+        "experiment_id": exp_id, "model_type": model_type, "method": method,
+        "sparsity_levels": levels, "plans": plans,
+        "message": f"Run {len(levels)} experiments at sparsity levels: {', '.join(s['description'] for s in levels)}",
+    }
+def save_pruning_report(report: dict, output_dir: str = "experiments/pruning") -> Path:
+    out = Path(output_dir); out.mkdir(parents=True, exist_ok=True)
+    exp_id = report.get("experiment_id", "unknown")
+    fp = out / f"{exp_id}-pruning.yaml"
+    with open(fp, "w") as f: yaml.dump(json.loads(json.dumps(report, default=str)), f, default_flow_style=False, sort_keys=False)
+    return fp
+def format_pruning_report(report: dict) -> str:
+    if "error" in report: return f"ERROR: {report['error']}"
+    if report.get("action") == "plan":
+        lines = ["# Pruning Plan", "", f"**Model:** {report.get('model_type', '?')}", f"**Method:** {report.get('method', '?')}", ""]
+        for p in report.get("plans", []):
+            lines.append(f"- {p.get('sparsity', 0)*100:.0f}%: {p.get('strategy', '?')}")
+        return "\n".join(lines)
+    metric = report.get("primary_metric", "metric")
+    lines = [f"# Pruning Results: {report.get('experiment_id', '?')}", "",
+             f"| Sparsity | {metric} | Speedup | Size Reduction |",
+             "|----------|--------|---------|----------------|"]
+    for r in report.get("sweep_results", []):
+        val = f"{r.get(metric, 0):.4f}" if isinstance(r.get(metric), (int, float)) else "N/A"
+        lines.append(f"| {r['sparsity']*100:.0f}% | {val} | {r.get('speedup', '?')}x | {r.get('size_reduction_pct', '?')}% |")
+    knee = report.get("knee_point")
+    if knee:
+        lines.extend(["", f"**Knee point:** {knee['sparsity']*100:.0f}% sparsity (accuracy drops {knee['drop_at_knee']:.4f})"])
+    rec = report.get("recommended")
+    if rec:
+        lines.extend(["", f"**Recommended:** {rec['sparsity']*100:.0f}% sparsity ({rec.get('speedup', '?')}x speedup, <0.5% accuracy loss)"])
+    return "\n".join(lines)
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Weight pruning")
+    parser.add_argument("exp_id", nargs="?")
+    parser.add_argument("--sparsity", help="Comma-separated sparsity levels")
+    parser.add_argument("--method", choices=PRUNING_METHODS, default="magnitude")
+    parser.add_argument("--config", default="config.yaml")
+    parser.add_argument("--log", default=DEFAULT_LOG_PATH)
+    parser.add_argument("--json", action="store_true")
+    args = parser.parse_args()
+    report = analyze_pruning(exp_id=args.exp_id, method=args.method, config_path=args.config, log_path=args.log)
+    if "error" not in report:
+        fp = save_pruning_report(report); print(f"Saved to {fp}", file=sys.stderr)
+    print(json.dumps(report, indent=2, default=str) if args.json else format_pruning_report(report))
+if __name__ == "__main__": main()

package/templates/scripts/model_quantization.py ADDED Viewed

@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+"""Post-training quantization for the autoresearch pipeline.
+Quantize model weights from FP32 to INT8/FP16, measure accuracy loss
+per precision level, and plan quantization-aware training if needed.
+Usage:
+    python scripts/model_quantization.py exp-042
+    python scripts/model_quantization.py exp-042 --precision int8
+    python scripts/model_quantization.py --json
+"""
+from __future__ import annotations
+import argparse
+import json
+import math
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+import numpy as np
+import yaml
+from scripts.turing_io import load_config, load_experiments
+DEFAULT_LOG_PATH = "experiments/log.jsonl"
+PRECISION_LEVELS = ["fp32", "fp16", "int8_dynamic", "int8_static"]
+QAT_THRESHOLD = 0.01  # If PTQ accuracy loss > 1%, suggest QAT
+def compute_quantization_plan(
+    precision: str,
+    model_size_bytes: int | None = None,
+    latency_ms: float | None = None,
+) -> dict:
+    size_factors = {"fp32": 1.0, "fp16": 0.5, "int8_dynamic": 0.25, "int8_static": 0.25}
+    latency_factors = {"fp32": 1.0, "fp16": 0.58, "int8_dynamic": 0.39, "int8_static": 0.37}
+    factor_s = size_factors.get(precision, 1.0)
+    factor_l = latency_factors.get(precision, 1.0)
+    plan = {
+        "precision": precision,
+        "size_factor": factor_s,
+        "latency_factor": factor_l,
+        "estimated_size_bytes": int(model_size_bytes * factor_s) if model_size_bytes else None,
+        "estimated_latency_ms": round(latency_ms * factor_l, 2) if latency_ms else None,
+        "size_reduction_pct": round((1 - factor_s) * 100, 1),
+        "speedup": round(1 / factor_l, 2) if factor_l > 0 else None,
+    }
+    if precision == "fp16":
+        plan["description"] = "Half-precision floating point — GPU inference"
+        plan["method"] = "cast_to_fp16"
+    elif precision == "int8_dynamic":
+        plan["description"] = "Dynamic INT8 — weights quantized, activations at runtime"
+        plan["method"] = "dynamic_quantization"
+    elif precision == "int8_static":
+        plan["description"] = "Static INT8 — calibrated activation ranges, best accuracy"
+        plan["method"] = "static_quantization"
+        plan["requires_calibration"] = True
+    else:
+        plan["description"] = "Full precision (baseline)"
+        plan["method"] = "none"
+    return plan
+def compare_precision_levels(
+    sweep_results: list[dict] | None = None,
+    model_size_bytes: int | None = None,
+    latency_ms: float | None = None,
+    primary_metric: str = "accuracy",
+) -> dict:
+    """Compare quantization results across precision levels."""
+    if sweep_results:
+        baseline = next((r for r in sweep_results if r.get("precision") == "fp32"), sweep_results[0])
+        baseline_metric = baseline.get(primary_metric, 0)
+        for r in sweep_results:
+            r["delta"] = round(r.get(primary_metric, 0) - baseline_metric, 6)
+            plan = compute_quantization_plan(r["precision"], model_size_bytes, latency_ms)
+            r.update({k: v for k, v in plan.items() if k not in r})
+        best = min(
+            [r for r in sweep_results if r["precision"] != "fp32"],
+            key=lambda r: abs(r.get("delta", 0)) + (1 - r.get("speedup", 1)) * 0.1,
+            default=None,
+        )
+        needs_qat = any(abs(r.get("delta", 0)) > QAT_THRESHOLD for r in sweep_results if "int8" in r.get("precision", ""))
+        return {
+            "sweep_results": sweep_results,
+            "recommended": best,
+            "needs_qat": needs_qat,
+        }
+    # Plan mode
+    plans = [compute_quantization_plan(p, model_size_bytes, latency_ms) for p in PRECISION_LEVELS]
+    return {"action": "plan", "plans": plans}
+def analyze_quantization(
+    sweep_results: list[dict] | None = None,
+    exp_id: str | None = None,
+    config_path: str = "config.yaml",
+    log_path: str = DEFAULT_LOG_PATH,
+) -> dict:
+    config = load_config(config_path)
+    primary_metric = config.get("evaluation", {}).get("primary_metric", "accuracy")
+    experiments = load_experiments(log_path)
+    exp = next((e for e in experiments if e.get("experiment_id") == exp_id), None) if exp_id else None
+    model_size = exp.get("metrics", {}).get("model_size_bytes") if exp else None
+    latency = exp.get("metrics", {}).get("latency_ms", exp.get("metrics", {}).get("inference_ms")) if exp else None
+    comparison = compare_precision_levels(sweep_results, model_size, latency, primary_metric)
+    return {
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "experiment_id": exp_id,
+        "primary_metric": primary_metric,
+        **comparison,
+    }
+def save_quantization_report(report: dict, output_dir: str = "experiments/quantization") -> Path:
+    out = Path(output_dir); out.mkdir(parents=True, exist_ok=True)
+    exp_id = report.get("experiment_id", "unknown")
+    fp = out / f"{exp_id}-quantization.yaml"
+    with open(fp, "w") as f: yaml.dump(json.loads(json.dumps(report, default=str)), f, default_flow_style=False, sort_keys=False)
+    return fp
+def format_quantization_report(report: dict) -> str:
+    if "error" in report: return f"ERROR: {report['error']}"
+    if report.get("action") == "plan":
+        lines = ["# Quantization Plan", ""]
+        for p in report.get("plans", []):
+            lines.append(f"- **{p['precision']}**: {p['description']} (size: {p['size_reduction_pct']}% reduction, speedup: {p.get('speedup', '?')}x)")
+        return "\n".join(lines)
+    metric = report.get("primary_metric", "metric")
+    lines = [f"# Quantization Results: {report.get('experiment_id', '?')}", "",
+             f"| Precision | {metric} | Delta | Speedup | Size Reduction |",
+             "|-----------|--------|-------|---------|----------------|"]
+    for r in report.get("sweep_results", []):
+        val = f"{r.get(metric, 0):.4f}" if isinstance(r.get(metric), (int, float)) else "N/A"
+        delta = f"{r.get('delta', 0):+.4f}" if r.get("delta") is not None else "—"
+        lines.append(f"| {r['precision']} | {val} | {delta} | {r.get('speedup', '?')}x | {r.get('size_reduction_pct', '?')}% |")
+    rec = report.get("recommended")
+    if rec:
+        lines.extend(["", f"**Recommended:** {rec['precision']} ({rec.get('delta', 0):+.4f} accuracy, {rec.get('speedup', '?')}x speedup)"])
+    if report.get("needs_qat"):
+        lines.extend(["", "**Note:** INT8 accuracy loss > 1% — consider quantization-aware training (QAT)"])
+    return "\n".join(lines)
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Post-training quantization")
+    parser.add_argument("exp_id", nargs="?")
+    parser.add_argument("--precision", help="Specific precision level")
+    parser.add_argument("--config", default="config.yaml")
+    parser.add_argument("--log", default=DEFAULT_LOG_PATH)
+    parser.add_argument("--json", action="store_true")
+    args = parser.parse_args()
+    report = analyze_quantization(exp_id=args.exp_id, config_path=args.config, log_path=args.log)
+    if "error" not in report:
+        fp = save_quantization_report(report); print(f"Saved to {fp}", file=sys.stderr)
+    print(json.dumps(report, indent=2, default=str) if args.json else format_quantization_report(report))
+if __name__ == "__main__": main()

package/templates/scripts/scaffold.py CHANGED Viewed

@@ -126,6 +126,17 @@ TEMPLATE_DIRS = {
         "calibration.py",
         "feature_intelligence.py",
         "curriculum_optimizer.py",
+        "model_pruning.py",
+        "model_quantization.py",
+        "model_merger.py",
+        "architecture_surgery.py",
+        "trend_analysis.py",
+        "session_flashback.py",
+        "experiment_archive.py",
+        "experiment_annotations.py",
+        "experiment_search.py",
+        "experiment_templates.py",
+        "experiment_replay.py",
     ],
     "tests": ["__init__.py", "conftest.py"],
 }
@@ -164,6 +175,15 @@ DIRECTORIES_TO_CREATE = [
     "experiments/calibration",
     "experiments/features",
     "experiments/curriculum",
+    "experiments/pruning",
+    "experiments/quantization",
+    "experiments/merges",
+    "experiments/surgery",
+    "experiments/trends",
+    "experiments/flashbacks",
+    "experiments/archive",
+    "experiments/searches",
+    "experiments/replays",
     "experiments/logs",
     "models/best",
     "models/archive",