npm - jumpstart-mode - Versions diffs - 1.1.12 → 1.1.13 - Mend

jumpstart-mode 1.1.12 → 1.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

package/.github/agents/jumpstart-adversary.agent.md +2 -1
package/.github/agents/jumpstart-architect.agent.md +5 -6
package/.github/agents/jumpstart-challenger.agent.md +2 -1
package/.github/agents/jumpstart-devops.agent.md +2 -2
package/.github/agents/jumpstart-diagram-verifier.agent.md +2 -1
package/.github/agents/jumpstart-maintenance.agent.md +1 -0
package/.github/agents/jumpstart-performance.agent.md +1 -0
package/.github/agents/jumpstart-pm.agent.md +1 -1
package/.github/agents/jumpstart-refactor.agent.md +1 -0
package/.github/agents/jumpstart-requirements-extractor.agent.md +1 -0
package/.github/agents/jumpstart-researcher.agent.md +1 -0
package/.github/agents/jumpstart-retrospective.agent.md +1 -0
package/.github/agents/jumpstart-reviewer.agent.md +2 -0
package/.github/agents/jumpstart-scout.agent.md +1 -1
package/.github/agents/jumpstart-scrum-master.agent.md +1 -0
package/.github/agents/jumpstart-security.agent.md +2 -1
package/.github/agents/jumpstart-tech-writer.agent.md +1 -0
package/.github/workflows/quality.yml +19 -2
package/.jumpstart/agents/analyst.md +38 -0
package/.jumpstart/agents/architect.md +38 -0
package/.jumpstart/agents/challenger.md +38 -0
package/.jumpstart/agents/developer.md +41 -0
package/.jumpstart/agents/pm.md +38 -0
package/.jumpstart/agents/scout.md +33 -0
package/.jumpstart/agents/ux-designer.md +4 -0
package/.jumpstart/config.yaml +24 -0
package/.jumpstart/schemas/timeline.schema.json +1 -0
package/.jumpstart/skills/skill-creator/SKILL.md +485 -357
package/.jumpstart/skills/skill-creator/agents/analyzer.md +274 -0
package/.jumpstart/skills/skill-creator/agents/comparator.md +202 -0
package/.jumpstart/skills/skill-creator/agents/grader.md +223 -0
package/.jumpstart/skills/skill-creator/assets/eval_review.html +146 -0
package/.jumpstart/skills/skill-creator/eval-viewer/generate_review.py +471 -0
package/.jumpstart/skills/skill-creator/eval-viewer/viewer.html +1325 -0
package/.jumpstart/skills/skill-creator/references/schemas.md +430 -0
package/.jumpstart/skills/skill-creator/scripts/__init__.py +0 -0
package/.jumpstart/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
package/.jumpstart/skills/skill-creator/scripts/generate_report.py +326 -0
package/.jumpstart/skills/skill-creator/scripts/improve_description.py +247 -0
package/.jumpstart/skills/skill-creator/scripts/package_skill.py +136 -110
package/.jumpstart/skills/skill-creator/scripts/run_eval.py +310 -0
package/.jumpstart/skills/skill-creator/scripts/run_loop.py +328 -0
package/.jumpstart/skills/skill-creator/scripts/utils.py +47 -0
package/.jumpstart/state/timeline.json +659 -0
package/.jumpstart/usage-log.json +74 -3
package/README.md +62 -1
package/bin/cli.js +3217 -1
package/bin/headless-runner.js +62 -2
package/bin/lib/agent-checkpoint.js +168 -0
package/bin/lib/ai-evaluation.js +104 -0
package/bin/lib/ai-intake.js +152 -0
package/bin/lib/ambiguity-heatmap.js +152 -0
package/bin/lib/artifact-comparison.js +104 -0
package/bin/lib/ast-edit-engine.js +157 -0
package/bin/lib/backlog-sync.js +338 -0
package/bin/lib/bcdr-planning.js +158 -0
package/bin/lib/bidirectional-trace.js +199 -0
package/bin/lib/branch-workflow.js +266 -0
package/bin/lib/cab-output.js +119 -0
package/bin/lib/chat-integration.js +122 -0
package/bin/lib/ci-cd-integration.js +208 -0
package/bin/lib/codebase-retrieval.js +125 -0
package/bin/lib/collaboration.js +168 -0
package/bin/lib/compliance-packs.js +213 -0
package/bin/lib/context-chunker.js +128 -0
package/bin/lib/context-onboarding.js +122 -0
package/bin/lib/contract-first.js +124 -0
package/bin/lib/cost-router.js +148 -0
package/bin/lib/credential-boundary.js +155 -0
package/bin/lib/data-classification.js +180 -0
package/bin/lib/data-contracts.js +129 -0
package/bin/lib/db-evolution.js +158 -0
package/bin/lib/decision-conflicts.js +299 -0
package/bin/lib/delivery-confidence.js +361 -0
package/bin/lib/dependency-upgrade.js +153 -0
package/bin/lib/design-system.js +133 -0
package/bin/lib/deterministic-artifacts.js +151 -0
package/bin/lib/diagram-studio.js +115 -0
package/bin/lib/domain-ontology.js +140 -0
package/bin/lib/ea-review-packet.js +151 -0
package/bin/lib/enterprise-search.js +123 -0
package/bin/lib/enterprise-templates.js +140 -0
package/bin/lib/environment-promotion.js +220 -0
package/bin/lib/estimation-studio.js +130 -0
package/bin/lib/event-modeling.js +133 -0
package/bin/lib/evidence-collector.js +179 -0
package/bin/lib/finops-planner.js +182 -0
package/bin/lib/fitness-functions.js +279 -0
package/bin/lib/focus.js +448 -0
package/bin/lib/governance-dashboard.js +165 -0
package/bin/lib/guided-handoff.js +120 -0
package/bin/lib/impact-analysis.js +190 -0
package/bin/lib/incident-feedback.js +157 -0
package/bin/lib/integrate.js +1 -1
package/bin/lib/knowledge-graph.js +122 -0
package/bin/lib/legacy-modernizer.js +160 -0
package/bin/lib/migration-planner.js +144 -0
package/bin/lib/model-governance.js +185 -0
package/bin/lib/model-router.js +144 -0
package/bin/lib/multi-repo.js +272 -0
package/bin/lib/next-phase.js +53 -8
package/bin/lib/ops-ownership.js +152 -0
package/bin/lib/parallel-agents.js +257 -0
package/bin/lib/pattern-library.js +115 -0
package/bin/lib/persona-packs.js +99 -0
package/bin/lib/plan-executor.js +366 -0
package/bin/lib/platform-engineering.js +119 -0
package/bin/lib/playback-summaries.js +126 -0
package/bin/lib/policy-engine.js +240 -0
package/bin/lib/portfolio-reporting.js +357 -0
package/bin/lib/pr-package.js +197 -0
package/bin/lib/project-memory.js +235 -0
package/bin/lib/prompt-governance.js +130 -0
package/bin/lib/promptless-mode.js +128 -0
package/bin/lib/quality-graph.js +193 -0
package/bin/lib/raci-matrix.js +188 -0
package/bin/lib/refactor-planner.js +167 -0
package/bin/lib/reference-architectures.js +304 -0
package/bin/lib/release-readiness.js +171 -0
package/bin/lib/repo-graph.js +262 -0
package/bin/lib/requirements-baseline.js +358 -0
package/bin/lib/risk-register.js +211 -0
package/bin/lib/role-approval.js +249 -0
package/bin/lib/role-views.js +142 -0
package/bin/lib/root-cause-analysis.js +132 -0
package/bin/lib/runtime-debugger.js +154 -0
package/bin/lib/safe-rename.js +135 -0
package/bin/lib/semantic-diff.js +335 -0
package/bin/lib/sla-slo.js +210 -0
package/bin/lib/spec-comments.js +147 -0
package/bin/lib/spec-maturity.js +287 -0
package/bin/lib/sre-integration.js +154 -0
package/bin/lib/structured-elicitation.js +174 -0
package/bin/lib/telemetry-feedback.js +118 -0
package/bin/lib/test-generator.js +146 -0
package/bin/lib/timeline.js +2 -1
package/bin/lib/tool-bridge.js +107 -0
package/bin/lib/tool-guardrails.js +139 -0
package/bin/lib/tool-schemas.js +172 -3
package/bin/lib/transcript-ingestion.js +150 -0
package/bin/lib/vendor-risk.js +173 -0
package/bin/lib/waiver-workflow.js +174 -0
package/bin/lib/web-dashboard.js +126 -0
package/bin/lib/workshop-mode.js +165 -0
package/bin/lib/workstream-ownership.js +104 -0
package/package.json +1 -1

package/.jumpstart/skills/skill-creator/scripts/package_skill.py CHANGED Viewed

@@ -1,110 +1,136 @@
-#!/usr/bin/env python3
-"""
-Skill Packager - Creates a distributable .skill file of a skill folder
-Usage:
-    python utils/package_skill.py <path/to/skill-folder> [output-directory]
-Example:
-    python utils/package_skill.py skills/public/my-skill
-    python utils/package_skill.py skills/public/my-skill ./dist
-"""
-import sys
-import zipfile
-from pathlib import Path
-from quick_validate import validate_skill
-def package_skill(skill_path, output_dir=None):
-    """
-    Package a skill folder into a .skill file.
-    Args:
-        skill_path: Path to the skill folder
-        output_dir: Optional output directory for the .skill file (defaults to current directory)
-    Returns:
-        Path to the created .skill file, or None if error
-    """
-    skill_path = Path(skill_path).resolve()
-    # Validate skill folder exists
-    if not skill_path.exists():
-        print(f"❌ Error: Skill folder not found: {skill_path}")
-        return None
-    if not skill_path.is_dir():
-        print(f"❌ Error: Path is not a directory: {skill_path}")
-        return None
-    # Validate SKILL.md exists
-    skill_md = skill_path / "SKILL.md"
-    if not skill_md.exists():
-        print(f"❌ Error: SKILL.md not found in {skill_path}")
-        return None
-    # Run validation before packaging
-    print("🔍 Validating skill...")
-    valid, message = validate_skill(skill_path)
-    if not valid:
-        print(f"❌ Validation failed: {message}")
-        print("   Please fix the validation errors before packaging.")
-        return None
-    print(f"✅ {message}\n")
-    # Determine output location
-    skill_name = skill_path.name
-    if output_dir:
-        output_path = Path(output_dir).resolve()
-        output_path.mkdir(parents=True, exist_ok=True)
-    else:
-        output_path = Path.cwd()
-    skill_filename = output_path / f"{skill_name}.skill"
-    # Create the .skill file (zip format)
-    try:
-        with zipfile.ZipFile(skill_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            # Walk through the skill directory
-            for file_path in skill_path.rglob('*'):
-                if file_path.is_file():
-                    # Calculate the relative path within the zip
-                    arcname = file_path.relative_to(skill_path.parent)
-                    zipf.write(file_path, arcname)
-                    print(f"  Added: {arcname}")
-        print(f"\n✅ Successfully packaged skill to: {skill_filename}")
-        return skill_filename
-    except Exception as e:
-        print(f"❌ Error creating .skill file: {e}")
-        return None
-def main():
-    if len(sys.argv) < 2:
-        print("Usage: python utils/package_skill.py <path/to/skill-folder> [output-directory]")
-        print("\nExample:")
-        print("  python utils/package_skill.py skills/public/my-skill")
-        print("  python utils/package_skill.py skills/public/my-skill ./dist")
-        sys.exit(1)
-    skill_path = sys.argv[1]
-    output_dir = sys.argv[2] if len(sys.argv) > 2 else None
-    print(f"📦 Packaging skill: {skill_path}")
-    if output_dir:
-        print(f"   Output directory: {output_dir}")
-    print()
-    result = package_skill(skill_path, output_dir)
-    if result:
-        sys.exit(0)
-    else:
-        sys.exit(1)
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python3
+"""
+Skill Packager - Creates a distributable .skill file of a skill folder
+Usage:
+    python utils/package_skill.py <path/to/skill-folder> [output-directory]
+Example:
+    python utils/package_skill.py skills/public/my-skill
+    python utils/package_skill.py skills/public/my-skill ./dist
+"""
+import fnmatch
+import sys
+import zipfile
+from pathlib import Path
+from scripts.quick_validate import validate_skill
+# Patterns to exclude when packaging skills.
+EXCLUDE_DIRS = {"__pycache__", "node_modules"}
+EXCLUDE_GLOBS = {"*.pyc"}
+EXCLUDE_FILES = {".DS_Store"}
+# Directories excluded only at the skill root (not when nested deeper).
+ROOT_EXCLUDE_DIRS = {"evals"}
+def should_exclude(rel_path: Path) -> bool:
+    """Check if a path should be excluded from packaging."""
+    parts = rel_path.parts
+    if any(part in EXCLUDE_DIRS for part in parts):
+        return True
+    # rel_path is relative to skill_path.parent, so parts[0] is the skill
+    # folder name and parts[1] (if present) is the first subdir.
+    if len(parts) > 1 and parts[1] in ROOT_EXCLUDE_DIRS:
+        return True
+    name = rel_path.name
+    if name in EXCLUDE_FILES:
+        return True
+    return any(fnmatch.fnmatch(name, pat) for pat in EXCLUDE_GLOBS)
+def package_skill(skill_path, output_dir=None):
+    """
+    Package a skill folder into a .skill file.
+    Args:
+        skill_path: Path to the skill folder
+        output_dir: Optional output directory for the .skill file (defaults to current directory)
+    Returns:
+        Path to the created .skill file, or None if error
+    """
+    skill_path = Path(skill_path).resolve()
+    # Validate skill folder exists
+    if not skill_path.exists():
+        print(f"❌ Error: Skill folder not found: {skill_path}")
+        return None
+    if not skill_path.is_dir():
+        print(f"❌ Error: Path is not a directory: {skill_path}")
+        return None
+    # Validate SKILL.md exists
+    skill_md = skill_path / "SKILL.md"
+    if not skill_md.exists():
+        print(f"❌ Error: SKILL.md not found in {skill_path}")
+        return None
+    # Run validation before packaging
+    print("🔍 Validating skill...")
+    valid, message = validate_skill(skill_path)
+    if not valid:
+        print(f"❌ Validation failed: {message}")
+        print("   Please fix the validation errors before packaging.")
+        return None
+    print(f"✅ {message}\n")
+    # Determine output location
+    skill_name = skill_path.name
+    if output_dir:
+        output_path = Path(output_dir).resolve()
+        output_path.mkdir(parents=True, exist_ok=True)
+    else:
+        output_path = Path.cwd()
+    skill_filename = output_path / f"{skill_name}.skill"
+    # Create the .skill file (zip format)
+    try:
+        with zipfile.ZipFile(skill_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
+            # Walk through the skill directory, excluding build artifacts
+            for file_path in skill_path.rglob('*'):
+                if not file_path.is_file():
+                    continue
+                arcname = file_path.relative_to(skill_path.parent)
+                if should_exclude(arcname):
+                    print(f"  Skipped: {arcname}")
+                    continue
+                zipf.write(file_path, arcname)
+                print(f"  Added: {arcname}")
+        print(f"\n✅ Successfully packaged skill to: {skill_filename}")
+        return skill_filename
+    except Exception as e:
+        print(f"❌ Error creating .skill file: {e}")
+        return None
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python utils/package_skill.py <path/to/skill-folder> [output-directory]")
+        print("\nExample:")
+        print("  python utils/package_skill.py skills/public/my-skill")
+        print("  python utils/package_skill.py skills/public/my-skill ./dist")
+        sys.exit(1)
+    skill_path = sys.argv[1]
+    output_dir = sys.argv[2] if len(sys.argv) > 2 else None
+    print(f"📦 Packaging skill: {skill_path}")
+    if output_dir:
+        print(f"   Output directory: {output_dir}")
+    print()
+    result = package_skill(skill_path, output_dir)
+    if result:
+        sys.exit(0)
+    else:
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

package/.jumpstart/skills/skill-creator/scripts/run_eval.py ADDED Viewed

@@ -0,0 +1,310 @@
+#!/usr/bin/env python3
+"""Run trigger evaluation for a skill description.
+Tests whether a skill's description causes Claude to trigger (read the skill)
+for a set of queries. Outputs results as JSON.
+"""
+import argparse
+import json
+import os
+import select
+import subprocess
+import sys
+import time
+import uuid
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from pathlib import Path
+from scripts.utils import parse_skill_md
+def find_project_root() -> Path:
+    """Find the project root by walking up from cwd looking for .claude/.
+    Mimics how Claude Code discovers its project root, so the command file
+    we create ends up where claude -p will look for it.
+    """
+    current = Path.cwd()
+    for parent in [current, *current.parents]:
+        if (parent / ".claude").is_dir():
+            return parent
+    return current
+def run_single_query(
+    query: str,
+    skill_name: str,
+    skill_description: str,
+    timeout: int,
+    project_root: str,
+    model: str | None = None,
+) -> bool:
+    """Run a single query and return whether the skill was triggered.
+    Creates a command file in .claude/commands/ so it appears in Claude's
+    available_skills list, then runs `claude -p` with the raw query.
+    Uses --include-partial-messages to detect triggering early from
+    stream events (content_block_start) rather than waiting for the
+    full assistant message, which only arrives after tool execution.
+    """
+    unique_id = uuid.uuid4().hex[:8]
+    clean_name = f"{skill_name}-skill-{unique_id}"
+    project_commands_dir = Path(project_root) / ".claude" / "commands"
+    command_file = project_commands_dir / f"{clean_name}.md"
+    try:
+        project_commands_dir.mkdir(parents=True, exist_ok=True)
+        # Use YAML block scalar to avoid breaking on quotes in description
+        indented_desc = "\n  ".join(skill_description.split("\n"))
+        command_content = (
+            f"---\n"
+            f"description: |\n"
+            f"  {indented_desc}\n"
+            f"---\n\n"
+            f"# {skill_name}\n\n"
+            f"This skill handles: {skill_description}\n"
+        )
+        command_file.write_text(command_content)
+        cmd = [
+            "claude",
+            "-p", query,
+            "--output-format", "stream-json",
+            "--verbose",
+            "--include-partial-messages",
+        ]
+        if model:
+            cmd.extend(["--model", model])
+        # Remove CLAUDECODE env var to allow nesting claude -p inside a
+        # Claude Code session. The guard is for interactive terminal conflicts;
+        # programmatic subprocess usage is safe.
+        env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
+        process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+            cwd=project_root,
+            env=env,
+        )
+        triggered = False
+        start_time = time.time()
+        buffer = ""
+        # Track state for stream event detection
+        pending_tool_name = None
+        accumulated_json = ""
+        try:
+            while time.time() - start_time < timeout:
+                if process.poll() is not None:
+                    remaining = process.stdout.read()
+                    if remaining:
+                        buffer += remaining.decode("utf-8", errors="replace")
+                    break
+                ready, _, _ = select.select([process.stdout], [], [], 1.0)
+                if not ready:
+                    continue
+                chunk = os.read(process.stdout.fileno(), 8192)
+                if not chunk:
+                    break
+                buffer += chunk.decode("utf-8", errors="replace")
+                while "\n" in buffer:
+                    line, buffer = buffer.split("\n", 1)
+                    line = line.strip()
+                    if not line:
+                        continue
+                    try:
+                        event = json.loads(line)
+                    except json.JSONDecodeError:
+                        continue
+                    # Early detection via stream events
+                    if event.get("type") == "stream_event":
+                        se = event.get("event", {})
+                        se_type = se.get("type", "")
+                        if se_type == "content_block_start":
+                            cb = se.get("content_block", {})
+                            if cb.get("type") == "tool_use":
+                                tool_name = cb.get("name", "")
+                                if tool_name in ("Skill", "Read"):
+                                    pending_tool_name = tool_name
+                                    accumulated_json = ""
+                                else:
+                                    return False
+                        elif se_type == "content_block_delta" and pending_tool_name:
+                            delta = se.get("delta", {})
+                            if delta.get("type") == "input_json_delta":
+                                accumulated_json += delta.get("partial_json", "")
+                                if clean_name in accumulated_json:
+                                    return True
+                        elif se_type in ("content_block_stop", "message_stop"):
+                            if pending_tool_name:
+                                return clean_name in accumulated_json
+                            if se_type == "message_stop":
+                                return False
+                    # Fallback: full assistant message
+                    elif event.get("type") == "assistant":
+                        message = event.get("message", {})
+                        for content_item in message.get("content", []):
+                            if content_item.get("type") != "tool_use":
+                                continue
+                            tool_name = content_item.get("name", "")
+                            tool_input = content_item.get("input", {})
+                            if tool_name == "Skill" and clean_name in tool_input.get("skill", ""):
+                                triggered = True
+                            elif tool_name == "Read" and clean_name in tool_input.get("file_path", ""):
+                                triggered = True
+                            return triggered
+                    elif event.get("type") == "result":
+                        return triggered
+        finally:
+            # Clean up process on any exit path (return, exception, timeout)
+            if process.poll() is None:
+                process.kill()
+                process.wait()
+        return triggered
+    finally:
+        if command_file.exists():
+            command_file.unlink()
+def run_eval(
+    eval_set: list[dict],
+    skill_name: str,
+    description: str,
+    num_workers: int,
+    timeout: int,
+    project_root: Path,
+    runs_per_query: int = 1,
+    trigger_threshold: float = 0.5,
+    model: str | None = None,
+) -> dict:
+    """Run the full eval set and return results."""
+    results = []
+    with ProcessPoolExecutor(max_workers=num_workers) as executor:
+        future_to_info = {}
+        for item in eval_set:
+            for run_idx in range(runs_per_query):
+                future = executor.submit(
+                    run_single_query,
+                    item["query"],
+                    skill_name,
+                    description,
+                    timeout,
+                    str(project_root),
+                    model,
+                )
+                future_to_info[future] = (item, run_idx)
+        query_triggers: dict[str, list[bool]] = {}
+        query_items: dict[str, dict] = {}
+        for future in as_completed(future_to_info):
+            item, _ = future_to_info[future]
+            query = item["query"]
+            query_items[query] = item
+            if query not in query_triggers:
+                query_triggers[query] = []
+            try:
+                query_triggers[query].append(future.result())
+            except Exception as e:
+                print(f"Warning: query failed: {e}", file=sys.stderr)
+                query_triggers[query].append(False)
+    for query, triggers in query_triggers.items():
+        item = query_items[query]
+        trigger_rate = sum(triggers) / len(triggers)
+        should_trigger = item["should_trigger"]
+        if should_trigger:
+            did_pass = trigger_rate >= trigger_threshold
+        else:
+            did_pass = trigger_rate < trigger_threshold
+        results.append({
+            "query": query,
+            "should_trigger": should_trigger,
+            "trigger_rate": trigger_rate,
+            "triggers": sum(triggers),
+            "runs": len(triggers),
+            "pass": did_pass,
+        })
+    passed = sum(1 for r in results if r["pass"])
+    total = len(results)
+    return {
+        "skill_name": skill_name,
+        "description": description,
+        "results": results,
+        "summary": {
+            "total": total,
+            "passed": passed,
+            "failed": total - passed,
+        },
+    }
+def main():
+    parser = argparse.ArgumentParser(description="Run trigger evaluation for a skill description")
+    parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
+    parser.add_argument("--skill-path", required=True, help="Path to skill directory")
+    parser.add_argument("--description", default=None, help="Override description to test")
+    parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers")
+    parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
+    parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
+    parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
+    parser.add_argument("--model", default=None, help="Model to use for claude -p (default: user's configured model)")
+    parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
+    args = parser.parse_args()
+    eval_set = json.loads(Path(args.eval_set).read_text())
+    skill_path = Path(args.skill_path)
+    if not (skill_path / "SKILL.md").exists():
+        print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr)
+        sys.exit(1)
+    name, original_description, content = parse_skill_md(skill_path)
+    description = args.description or original_description
+    project_root = find_project_root()
+    if args.verbose:
+        print(f"Evaluating: {description}", file=sys.stderr)
+    output = run_eval(
+        eval_set=eval_set,
+        skill_name=name,
+        description=description,
+        num_workers=args.num_workers,
+        timeout=args.timeout,
+        project_root=project_root,
+        runs_per_query=args.runs_per_query,
+        trigger_threshold=args.trigger_threshold,
+        model=args.model,
+    )
+    if args.verbose:
+        summary = output["summary"]
+        print(f"Results: {summary['passed']}/{summary['total']} passed", file=sys.stderr)
+        for r in output["results"]:
+            status = "PASS" if r["pass"] else "FAIL"
+            rate_str = f"{r['triggers']}/{r['runs']}"
+            print(f"  [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:70]}", file=sys.stderr)
+    print(json.dumps(output, indent=2))
+if __name__ == "__main__":
+    main()