npm - harness-evolver - Versions diffs - 0.1.0 - Mend

harness-evolver 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/LICENSE +21 -0
package/README.md +252 -0
package/agents/harness-evolver-proposer.md +163 -0
package/bin/install.js +125 -0
package/examples/classifier/README.md +25 -0
package/examples/classifier/config.json +3 -0
package/examples/classifier/eval.py +58 -0
package/examples/classifier/harness.py +111 -0
package/examples/classifier/tasks/task_001.json +1 -0
package/examples/classifier/tasks/task_002.json +1 -0
package/examples/classifier/tasks/task_003.json +1 -0
package/examples/classifier/tasks/task_004.json +1 -0
package/examples/classifier/tasks/task_005.json +1 -0
package/examples/classifier/tasks/task_006.json +1 -0
package/examples/classifier/tasks/task_007.json +1 -0
package/examples/classifier/tasks/task_008.json +1 -0
package/examples/classifier/tasks/task_009.json +1 -0
package/examples/classifier/tasks/task_010.json +1 -0
package/package.json +29 -0
package/skills/harness-evolve/SKILL.md +93 -0
package/skills/harness-evolve-init/SKILL.md +53 -0
package/skills/harness-evolve-status/SKILL.md +25 -0
package/tools/__pycache__/detect_stack.cpython-313.pyc +0 -0
package/tools/__pycache__/langsmith_adapter.cpython-313.pyc +0 -0
package/tools/__pycache__/langsmith_api.cpython-313.pyc +0 -0
package/tools/__pycache__/trace_logger.cpython-313.pyc +0 -0
package/tools/detect_stack.py +173 -0
package/tools/evaluate.py +214 -0
package/tools/init.py +231 -0
package/tools/state.py +219 -0
package/tools/trace_logger.py +42 -0

package/tools/evaluate.py ADDED Viewed

@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+"""Evaluation orchestrator for Harness Evolver.
+Commands:
+    validate --harness PATH [--config PATH]
+    run      --harness PATH --tasks-dir PATH --eval PATH --traces-dir PATH --scores PATH
+             [--config PATH] [--timeout SECONDS]
+Runs harness per task, captures traces (stdout/stderr/timing), then calls user's eval script.
+Stdlib-only. No external dependencies.
+"""
+import argparse
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+def _run_harness_on_task(harness, config, task_input_path, output_path, task_traces_dir, timeout, env=None):
+    """Run the harness on a single task. Returns (success, elapsed_ms, stdout, stderr)."""
+    cmd = ["python3", harness, "--input", task_input_path, "--output", output_path]
+    if task_traces_dir:
+        extra_dir = os.path.join(task_traces_dir, "extra")
+        os.makedirs(extra_dir, exist_ok=True)
+        cmd.extend(["--traces-dir", extra_dir])
+    if config and os.path.exists(config):
+        cmd.extend(["--config", config])
+    start = time.time()
+    try:
+        result = subprocess.run(
+            cmd, capture_output=True, text=True, timeout=timeout, env=env,
+        )
+        elapsed_ms = (time.time() - start) * 1000
+        return result.returncode == 0, elapsed_ms, result.stdout, result.stderr
+    except subprocess.TimeoutExpired:
+        elapsed_ms = (time.time() - start) * 1000
+        return False, elapsed_ms, "", f"TIMEOUT after {timeout}s"
+    except Exception as e:
+        elapsed_ms = (time.time() - start) * 1000
+        return False, elapsed_ms, "", str(e)
+def cmd_validate(args):
+    harness = args.harness
+    config = getattr(args, "config", None)
+    if not os.path.exists(harness):
+        print(f"FAIL: harness not found: {harness}", file=sys.stderr)
+        sys.exit(1)
+    with tempfile.TemporaryDirectory() as tmpdir:
+        dummy_task = {"id": "validation", "input": "test input for validation", "metadata": {}}
+        input_path = os.path.join(tmpdir, "input.json")
+        output_path = os.path.join(tmpdir, "output.json")
+        with open(input_path, "w") as f:
+            json.dump(dummy_task, f)
+        success, elapsed, stdout, stderr = _run_harness_on_task(
+            harness, config, input_path, output_path, None, timeout=30,
+        )
+        if not success:
+            print(f"FAIL: harness exited with error.\nstderr: {stderr}", file=sys.stderr)
+            sys.exit(1)
+        if not os.path.exists(output_path):
+            print("FAIL: harness did not create output file.", file=sys.stderr)
+            sys.exit(1)
+        try:
+            with open(output_path) as f:
+                output = json.load(f)
+        except (json.JSONDecodeError, ValueError) as e:
+            print(f"FAIL: output is not valid JSON: {e}", file=sys.stderr)
+            sys.exit(1)
+        if "id" not in output or "output" not in output:
+            print(f"FAIL: output missing 'id' or 'output' fields. Got: {output}", file=sys.stderr)
+            sys.exit(1)
+        print(f"OK: harness validated in {elapsed:.0f}ms. Output: {output}")
+def cmd_run(args):
+    harness = args.harness
+    config = getattr(args, "config", None)
+    tasks_dir = args.tasks_dir
+    eval_script = getattr(args, "eval")
+    traces_dir = args.traces_dir
+    scores_path = args.scores
+    timeout = args.timeout
+    os.makedirs(traces_dir, exist_ok=True)
+    task_files = sorted(f for f in os.listdir(tasks_dir) if f.endswith(".json"))
+    if not task_files:
+        print(f"FAIL: no .json task files in {tasks_dir}", file=sys.stderr)
+        sys.exit(1)
+    all_stdout = []
+    all_stderr = []
+    timing = {"per_task": {}}
+    results_dir = tempfile.mkdtemp()
+    # LangSmith: setup auto-tracing env vars if configured
+    langsmith_env = None
+    project_config_path = os.path.join(os.path.dirname(os.path.dirname(traces_dir)), "config.json")
+    if os.path.exists(project_config_path):
+        with open(project_config_path) as f:
+            project_config = json.load(f)
+        ls = project_config.get("eval", {}).get("langsmith", {})
+        if ls.get("enabled"):
+            api_key = os.environ.get(ls.get("api_key_env", "LANGSMITH_API_KEY"), "")
+            if api_key:
+                version = os.path.basename(os.path.dirname(traces_dir))
+                langsmith_env = {
+                    **os.environ,
+                    "LANGCHAIN_TRACING_V2": "true",
+                    "LANGCHAIN_API_KEY": api_key,
+                    "LANGCHAIN_PROJECT": f"{ls.get('project_prefix', 'harness-evolver')}-{version}",
+                }
+    for task_file in task_files:
+        task_path = os.path.join(tasks_dir, task_file)
+        with open(task_path) as f:
+            task = json.load(f)
+        task_id = task["id"]
+        task_input = {k: v for k, v in task.items() if k != "expected"}
+        task_traces_dir = os.path.join(traces_dir, task_id)
+        os.makedirs(task_traces_dir, exist_ok=True)
+        input_path = os.path.join(task_traces_dir, "input.json")
+        with open(input_path, "w") as f:
+            json.dump(task_input, f, indent=2)
+        output_path = os.path.join(results_dir, task_file)
+        success, elapsed_ms, stdout, stderr = _run_harness_on_task(
+            harness, config, input_path, output_path, task_traces_dir, timeout,
+            env=langsmith_env,
+        )
+        if os.path.exists(output_path):
+            shutil.copy2(output_path, os.path.join(task_traces_dir, "output.json"))
+        else:
+            with open(os.path.join(task_traces_dir, "output.json"), "w") as f:
+                json.dump({"id": task_id, "output": "", "error": "harness failed"}, f)
+        timing["per_task"][task_id] = round(elapsed_ms, 1)
+        all_stdout.append(f"--- {task_id} ---\n{stdout}")
+        all_stderr.append(f"--- {task_id} ---\n{stderr}")
+    timing["total_ms"] = round(sum(timing["per_task"].values()), 1)
+    with open(os.path.join(traces_dir, "timing.json"), "w") as f:
+        json.dump(timing, f, indent=2)
+    with open(os.path.join(traces_dir, "stdout.log"), "w") as f:
+        f.write("\n".join(all_stdout))
+    with open(os.path.join(traces_dir, "stderr.log"), "w") as f:
+        f.write("\n".join(all_stderr))
+    eval_cmd = [
+        "python3", eval_script,
+        "--results-dir", results_dir,
+        "--tasks-dir", tasks_dir,
+        "--scores", scores_path,
+    ]
+    result = subprocess.run(eval_cmd, capture_output=True, text=True, timeout=120)
+    if result.returncode != 0:
+        print(f"FAIL: eval script failed.\nstderr: {result.stderr}", file=sys.stderr)
+        sys.exit(1)
+    if os.path.exists(scores_path):
+        scores = json.load(open(scores_path))
+        print(f"Evaluation complete. combined_score: {scores.get('combined_score', 'N/A')}")
+    else:
+        print("WARNING: eval script did not produce scores file.", file=sys.stderr)
+def main():
+    parser = argparse.ArgumentParser(description="Harness Evolver evaluation orchestrator")
+    sub = parser.add_subparsers(dest="command")
+    p_val = sub.add_parser("validate")
+    p_val.add_argument("--harness", required=True)
+    p_val.add_argument("--config", default=None)
+    p_run = sub.add_parser("run")
+    p_run.add_argument("--harness", required=True)
+    p_run.add_argument("--config", default=None)
+    p_run.add_argument("--tasks-dir", required=True)
+    p_run.add_argument("--eval", required=True)
+    p_run.add_argument("--traces-dir", required=True)
+    p_run.add_argument("--scores", required=True)
+    p_run.add_argument("--timeout", type=int, default=60)
+    args = parser.parse_args()
+    if args.command == "validate":
+        cmd_validate(args)
+    elif args.command == "run":
+        cmd_run(args)
+    else:
+        parser.print_help()
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

package/tools/init.py ADDED Viewed

@@ -0,0 +1,231 @@
+#!/usr/bin/env python3
+"""Project initializer for Harness Evolver.
+Usage:
+    init.py --harness PATH --eval PATH --tasks PATH --base-dir PATH
+            [--harness-config PATH] [--tools-dir PATH]
+Creates the .harness-evolver/ directory structure, copies baseline files,
+runs validation, evaluates the baseline, and initializes state.
+Stdlib-only. No external dependencies.
+"""
+import argparse
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+def _detect_langsmith():
+    """Auto-detect LangSmith API key and return config section."""
+    if os.environ.get("LANGSMITH_API_KEY"):
+        return {
+            "enabled": True,
+            "api_key_env": "LANGSMITH_API_KEY",
+            "project_prefix": "harness-evolver",
+        }
+    return {"enabled": False}
+def _check_langsmith_cli():
+    """Check if langsmith-cli is installed."""
+    try:
+        r = subprocess.run(["langsmith-cli", "self", "detect"],
+                          capture_output=True, text=True, timeout=5)
+        return r.returncode == 0
+    except FileNotFoundError:
+        return False
+def _detect_stack(harness_path):
+    """Detect technology stack from harness imports."""
+    detect_stack_py = os.path.join(os.path.dirname(__file__), "detect_stack.py")
+    if not os.path.exists(detect_stack_py):
+        return {}
+    try:
+        r = subprocess.run(
+            ["python3", detect_stack_py, harness_path],
+            capture_output=True, text=True, timeout=30,
+        )
+        if r.returncode == 0 and r.stdout.strip():
+            return json.loads(r.stdout)
+    except Exception:
+        pass
+    return {}
+def _check_context7_available():
+    """Check if Context7 MCP is configured in Claude Code."""
+    settings_paths = [
+        os.path.expanduser("~/.claude/settings.json"),
+        os.path.expanduser("~/.claude.json"),
+    ]
+    for path in settings_paths:
+        if os.path.exists(path):
+            try:
+                with open(path) as f:
+                    settings = json.load(f)
+                mcp = settings.get("mcpServers", {})
+                if "context7" in mcp or "Context7" in mcp:
+                    return True
+            except (json.JSONDecodeError, KeyError):
+                pass
+    return False
+def main():
+    parser = argparse.ArgumentParser(description="Initialize Harness Evolver project")
+    parser.add_argument("--harness", required=True, help="Path to harness script")
+    parser.add_argument("--eval", required=True, help="Path to eval script")
+    parser.add_argument("--tasks", required=True, help="Path to tasks directory")
+    parser.add_argument("--base-dir", required=True, help="Path for .harness-evolver/")
+    parser.add_argument("--harness-config", default=None, help="Path to harness config.json")
+    parser.add_argument("--tools-dir", default=None, help="Path to tools directory")
+    args = parser.parse_args()
+    base = args.base_dir
+    tools = args.tools_dir or os.path.dirname(__file__)
+    evaluate_py = os.path.join(tools, "evaluate.py")
+    state_py = os.path.join(tools, "state.py")
+    # 1. Create directory structure
+    for d in ["baseline", "eval/tasks", "harnesses"]:
+        os.makedirs(os.path.join(base, d), exist_ok=True)
+    # 2. Copy baseline harness
+    shutil.copy2(args.harness, os.path.join(base, "baseline", "harness.py"))
+    if args.harness_config and os.path.exists(args.harness_config):
+        shutil.copy2(args.harness_config, os.path.join(base, "baseline", "config.json"))
+    # 3. Copy eval script and tasks
+    shutil.copy2(args.eval, os.path.join(base, "eval", "eval.py"))
+    for f in os.listdir(args.tasks):
+        src = os.path.join(args.tasks, f)
+        if os.path.isfile(src):
+            shutil.copy2(src, os.path.join(base, "eval", "tasks", f))
+    # 4. Generate config.json
+    harness_name = os.path.basename(args.harness)
+    eval_name = os.path.basename(args.eval)
+    config = {
+        "version": "0.1.0",
+        "harness": {
+            "command": f"python3 {harness_name}",
+            "args": ["--input", "{input}", "--output", "{output}",
+                     "--traces-dir", "{traces_dir}", "--config", "{config}"],
+            "timeout_per_task_sec": 60,
+        },
+        "eval": {
+            "command": f"python3 {eval_name}",
+            "args": ["--results-dir", "{results_dir}", "--tasks-dir", "{tasks_dir}",
+                     "--scores", "{scores}"],
+            "langsmith": _detect_langsmith(),
+        },
+        "evolution": {
+            "max_iterations": 10,
+            "candidates_per_iter": 1,
+            "stagnation_limit": 3,
+            "stagnation_threshold": 0.01,
+            "target_score": None,
+        },
+        "paths": {
+            "baseline": "baseline/",
+            "eval_tasks": "eval/tasks/",
+            "eval_script": "eval/eval.py",
+            "harnesses": "harnesses/",
+        },
+    }
+    with open(os.path.join(base, "config.json"), "w") as f:
+        json.dump(config, f, indent=2)
+    ls_config = config["eval"].get("langsmith", {})
+    if ls_config.get("enabled"):
+        print("  LangSmith tracing enabled (LANGSMITH_API_KEY detected)")
+        if _check_langsmith_cli():
+            print("  langsmith-cli detected — proposer will use it for trace analysis")
+        else:
+            print("  Recommendation: install langsmith-cli for rich trace analysis:")
+            print("    uv tool install langsmith-cli && langsmith-cli auth login")
+    # Detect stack
+    stack = _detect_stack(args.harness)
+    config["stack"] = {
+        "detected": stack,
+        "documentation_hint": "use context7",
+        "auto_detected": True,
+    }
+    # Re-write config.json with stack section added
+    with open(os.path.join(base, "config.json"), "w") as f:
+        json.dump(config, f, indent=2)
+    if stack:
+        print("Stack detected:")
+        for lib_info in stack.values():
+            print(f"  {lib_info['display']}")
+        if not _check_context7_available():
+            print("\nRecommendation: install Context7 MCP for up-to-date documentation:")
+            print("  claude mcp add context7 -- npx -y @upstash/context7-mcp@latest")
+    # 5. Validate baseline harness
+    print("Validating baseline harness...")
+    val_args = ["python3", evaluate_py, "validate",
+                "--harness", os.path.join(base, "baseline", "harness.py")]
+    config_path = os.path.join(base, "baseline", "config.json")
+    if os.path.exists(config_path):
+        val_args.extend(["--config", config_path])
+    r = subprocess.run(val_args, capture_output=True, text=True)
+    if r.returncode != 0:
+        print(f"FAIL: baseline harness validation failed.\n{r.stderr}", file=sys.stderr)
+        sys.exit(1)
+    print(r.stdout.strip())
+    # 6. Evaluate baseline
+    print("Evaluating baseline harness...")
+    baseline_traces = tempfile.mkdtemp()
+    baseline_scores = os.path.join(base, "baseline_scores.json")
+    eval_args = [
+        "python3", evaluate_py, "run",
+        "--harness", os.path.join(base, "baseline", "harness.py"),
+        "--tasks-dir", os.path.join(base, "eval", "tasks"),
+        "--eval", os.path.join(base, "eval", "eval.py"),
+        "--traces-dir", baseline_traces,
+        "--scores", baseline_scores,
+        "--timeout", "60",
+    ]
+    if os.path.exists(config_path):
+        eval_args.extend(["--config", config_path])
+    r = subprocess.run(eval_args, capture_output=True, text=True, timeout=300)
+    if r.returncode != 0:
+        print(f"WARNING: baseline evaluation failed. Using score 0.0.\n{r.stderr}", file=sys.stderr)
+        baseline_score = 0.0
+    else:
+        print(r.stdout.strip())
+        scores = json.load(open(baseline_scores))
+        baseline_score = scores.get("combined_score", 0.0)
+    if os.path.exists(baseline_scores):
+        os.remove(baseline_scores)
+    # 7. Initialize state with baseline score
+    print(f"Baseline score: {baseline_score:.2f}")
+    r = subprocess.run(
+        ["python3", state_py, "init",
+         "--base-dir", base,
+         "--baseline-score", str(baseline_score)],
+        capture_output=True, text=True,
+    )
+    if r.returncode != 0:
+        print(f"FAIL: state init failed.\n{r.stderr}", file=sys.stderr)
+        sys.exit(1)
+    print(f"\nInitialized .harness-evolver/ at {base}")
+    print(f"Baseline score: {baseline_score:.2f}")
+    print("Run /harness-evolve to start the optimization loop.")
+if __name__ == "__main__":
+    main()

package/tools/state.py ADDED Viewed

@@ -0,0 +1,219 @@
+#!/usr/bin/env python3
+"""State manager for Harness Evolver.
+Commands:
+    init   --base-dir DIR --baseline-score FLOAT
+    update --base-dir DIR --version VER --scores PATH --proposal PATH
+    show   --base-dir DIR
+Manages: summary.json (source of truth), STATE.md (human view), PROPOSER_HISTORY.md (log).
+Stdlib-only. No external dependencies.
+"""
+import argparse
+import json
+import os
+import re
+import sys
+def _read_json(path):
+    with open(path) as f:
+        return json.load(f)
+def _write_json(path, data):
+    with open(path, "w") as f:
+        json.dump(data, f, indent=2)
+def _read_text(path):
+    with open(path) as f:
+        return f.read()
+def _write_text(path, text):
+    with open(path, "w") as f:
+        f.write(text)
+def _summary_path(base_dir):
+    return os.path.join(base_dir, "summary.json")
+def _state_md_path(base_dir):
+    return os.path.join(base_dir, "STATE.md")
+def _history_path(base_dir):
+    return os.path.join(base_dir, "PROPOSER_HISTORY.md")
+def _detect_parent(proposal_text, current_best):
+    """Parse 'Based on vXXX' or 'Based on baseline' from proposal text."""
+    match = re.search(r"[Bb]ased on (v\d+|baseline)", proposal_text)
+    if match:
+        return match.group(1)
+    return current_best
+def _render_state_md(summary):
+    """Generate STATE.md from summary.json data."""
+    lines = ["# Harness Evolver Status", ""]
+    best = summary["best"]
+    worst = summary["worst"]
+    lines.append(f"**Iterations:** {summary['iterations']}")
+    lines.append(f"**Best:** {best['version']} ({best['combined_score']:.2f})")
+    lines.append(f"**Worst:** {worst['version']} ({worst['combined_score']:.2f})")
+    if summary["history"]:
+        last = summary["history"][-1]
+        lines.append(f"**Latest:** {last['version']} ({last['combined_score']:.2f})")
+    lines.append("")
+    lines.append("## History")
+    lines.append("")
+    lines.append("| Version | Score | Parent | Delta |")
+    lines.append("|---------|-------|--------|-------|")
+    prev_score = None
+    for entry in summary["history"]:
+        v = entry["version"]
+        s = entry["combined_score"]
+        p = entry["parent"] or "-"
+        if prev_score is not None and v != "baseline":
+            delta = s - prev_score
+            if delta < -0.01:
+                delta_str = f"{delta:+.2f} REGRESSION"
+            elif delta > 0.01:
+                delta_str = f"{delta:+.2f}"
+            else:
+                delta_str = f"{delta:+.2f} (stagnant)"
+        else:
+            delta_str = "-"
+        lines.append(f"| {v} | {s:.2f} | {p} | {delta_str} |")
+        prev_score = s
+    return "\n".join(lines) + "\n"
+def cmd_init(args):
+    base_dir = args.base_dir
+    score = args.baseline_score
+    os.makedirs(base_dir, exist_ok=True)
+    summary = {
+        "iterations": 0,
+        "best": {"version": "baseline", "combined_score": score},
+        "worst": {"version": "baseline", "combined_score": score},
+        "history": [
+            {"version": "baseline", "combined_score": score, "parent": None}
+        ],
+    }
+    _write_json(_summary_path(base_dir), summary)
+    _write_text(_state_md_path(base_dir), _render_state_md(summary))
+    _write_text(_history_path(base_dir), "# Proposer History\n")
+def cmd_update(args):
+    base_dir = args.base_dir
+    version = args.version
+    scores = _read_json(args.scores)
+    proposal_text = _read_text(args.proposal) if args.proposal else ""
+    summary = _read_json(_summary_path(base_dir))
+    combined = scores.get("combined_score", 0.0)
+    parent = _detect_parent(proposal_text, summary["best"]["version"])
+    entry = {
+        "version": version,
+        "combined_score": combined,
+        "parent": parent,
+    }
+    summary["history"].append(entry)
+    summary["iterations"] = len(summary["history"]) - 1
+    non_baseline = [h for h in summary["history"] if h["version"] != "baseline"]
+    if non_baseline:
+        best_entry = max(non_baseline, key=lambda h: h["combined_score"])
+        worst_entry = min(non_baseline, key=lambda h: h["combined_score"])
+        summary["best"] = {
+            "version": best_entry["version"],
+            "combined_score": best_entry["combined_score"],
+        }
+        summary["worst"] = {
+            "version": worst_entry["version"],
+            "combined_score": worst_entry["combined_score"],
+        }
+    _write_json(_summary_path(base_dir), summary)
+    _write_text(_state_md_path(base_dir), _render_state_md(summary))
+    parent_score = None
+    for h in summary["history"]:
+        if h["version"] == parent:
+            parent_score = h["combined_score"]
+            break
+    is_regression = parent_score is not None and combined < parent_score - 0.01
+    regression_tag = " <- REGRESSION" if is_regression else ""
+    proposal_lines = proposal_text.strip().split("\n")
+    summary_line = ""
+    for line in proposal_lines:
+        stripped = line.strip()
+        if stripped and not re.match(r"^[Bb]ased on", stripped):
+            summary_line = stripped
+            break
+    history_entry = f"\n## {version} (score: {combined:.2f}){regression_tag}\n{summary_line}\n"
+    history_path = _history_path(base_dir)
+    with open(history_path, "a") as f:
+        f.write(history_entry)
+def cmd_show(args):
+    base_dir = args.base_dir
+    summary = _read_json(_summary_path(base_dir))
+    best = summary["best"]
+    worst = summary["worst"]
+    print(f"Harness Evolver — Iteration {summary['iterations']}")
+    print(f"Best:  {best['version']}  score: {best['combined_score']:.2f}")
+    print(f"Worst: {worst['version']}  score: {worst['combined_score']:.2f}")
+    print()
+    for entry in summary["history"]:
+        v = entry["version"]
+        s = entry["combined_score"]
+        bar_len = int(s * 30)
+        bar = "\u2588" * bar_len
+        print(f"  {v:>10}: {s:.2f} {bar}")
+def main():
+    parser = argparse.ArgumentParser(description="Harness Evolver state manager")
+    sub = parser.add_subparsers(dest="command")
+    p_init = sub.add_parser("init")
+    p_init.add_argument("--base-dir", required=True)
+    p_init.add_argument("--baseline-score", type=float, required=True)
+    p_update = sub.add_parser("update")
+    p_update.add_argument("--base-dir", required=True)
+    p_update.add_argument("--version", required=True)
+    p_update.add_argument("--scores", required=True)
+    p_update.add_argument("--proposal", default=None)
+    p_show = sub.add_parser("show")
+    p_show.add_argument("--base-dir", required=True)
+    args = parser.parse_args()
+    if args.command == "init":
+        cmd_init(args)
+    elif args.command == "update":
+        cmd_update(args)
+    elif args.command == "show":
+        cmd_show(args)
+    else:
+        parser.print_help()
+        sys.exit(1)
+if __name__ == "__main__":
+    main()