npm - ma-agents - Versions diffs - 3.12.2 → 3.13.0 - Mend

ma-agents 3.12.2 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (255) hide show

package/lib/bmad-cache/bmb/skills/bmad-eval-runner/scripts/run_triggers.py ADDED Viewed

@@ -0,0 +1,366 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.9"
+# ///
+"""Run trigger evals: does the skill's description fire on each query?
+Adapted from Anthropic skill-creator's run_eval.py
+(https://github.com/anthropics/skills/tree/main/skills/skill-creator) with two
+adaptations:
+  1. Isolation. Each query runs in either a fresh Docker container off
+     bmad-eval-runner:latest, or a fresh local tmp dir under ~/bmad-evals/<run-id>/
+     with HOME overridden to a clean directory. This prevents the host's global
+     CLAUDE.md and auto-memory from biasing whether the skill fires.
+  2. Output. Results are written to a run folder alongside the artifact eval
+     run-folder layout (so triggers and artifacts can share a single report).
+Usage:
+  python3 run_triggers.py \\
+    --skill-path PATH \\
+    --triggers-file PATH/triggers.json \\
+    --output-dir PATH \\
+    --isolation docker|local \\
+    [--workers N] [--runs-per-query N] [--timeout SECS] [--threshold 0.5]
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import shutil
+import subprocess
+import sys
+import time
+import uuid
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+SCRIPT_DIR = Path(__file__).resolve().parent
+sys.path.insert(0, str(SCRIPT_DIR))
+from utils import (  # noqa: E402
+    new_run_id,
+    parse_skill_md,
+    read_json,
+    read_macos_keychain_credentials,
+    stage_credentials,
+    utc_now_iso,
+    write_json,
+)
+DOCKER_IMAGE = "bmad-eval-runner:latest"
+_KEYCHAIN_CREDS: str | None = read_macos_keychain_credentials()
+def write_synthetic_skill(skills_dir: Path, skill_name: str, description: str, unique_id: str) -> tuple[Path, str]:
+    """Place a synthetic skill at <skills_dir>/<clean_name>/SKILL.md.
+    The Skill tool only fires for entries discovered as actual skills (frontmatter
+    `name` + `description` under a `.claude/skills/<name>/SKILL.md`). Slash-commands
+    under `.claude/commands/` do not auto-invoke the Skill tool, so the previous
+    implementation could never observe a positive trigger. This places the synthetic
+    skill where Claude Code looks for skills, with a unique name so the detector
+    can disambiguate it from any pre-existing skill of the same display name.
+    """
+    clean_name = f"{skill_name}-skill-{unique_id}"
+    skill_root = skills_dir / clean_name
+    skill_root.mkdir(parents=True, exist_ok=True)
+    path = skill_root / "SKILL.md"
+    indented_desc = "\n  ".join(description.split("\n"))
+    path.write_text(
+        f"---\n"
+        f"name: {clean_name}\n"
+        f"description: |\n"
+        f"  {indented_desc}\n"
+        f"---\n\n"
+        f"# {skill_name}\n\n"
+        f"This skill handles: {description}\n",
+        encoding="utf-8",
+    )
+    return path, clean_name
+def parse_stream_for_trigger(buffer: str, clean_name: str) -> tuple[bool | None, str]:
+    """Return (triggered_or_none, leftover_buffer). None means undecided yet."""
+    triggered: bool | None = None
+    pending_tool: str | None = None
+    accumulated_json = ""
+    leftover = ""
+    while "\n" in buffer:
+        line, buffer = buffer.split("\n", 1)
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            evt = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if evt.get("type") == "stream_event":
+            se = evt.get("event", {})
+            t = se.get("type", "")
+            if t == "content_block_start":
+                cb = se.get("content_block", {})
+                if cb.get("type") == "tool_use":
+                    name = cb.get("name", "")
+                    if name in ("Skill", "Read"):
+                        pending_tool = name
+                        accumulated_json = ""
+                    else:
+                        return False, ""
+            elif t == "content_block_delta" and pending_tool:
+                delta = se.get("delta", {})
+                if delta.get("type") == "input_json_delta":
+                    accumulated_json += delta.get("partial_json", "")
+                    if clean_name in accumulated_json:
+                        return True, ""
+            elif t in ("content_block_stop", "message_stop"):
+                if pending_tool:
+                    return clean_name in accumulated_json, ""
+                if t == "message_stop":
+                    return False, ""
+        elif evt.get("type") == "assistant":
+            for item in evt.get("message", {}).get("content", []):
+                if item.get("type") != "tool_use":
+                    continue
+                tname = item.get("name", "")
+                tinput = item.get("input", {})
+                if tname == "Skill" and clean_name in tinput.get("skill", ""):
+                    return True, ""
+                if tname == "Read" and clean_name in tinput.get("file_path", ""):
+                    return True, ""
+            return False, ""
+        elif evt.get("type") == "result":
+            return triggered if triggered is not None else False, ""
+    leftover = buffer
+    return triggered, leftover
+def run_query_local(query: str, skill_name: str, description: str,
+                    workspace_root: Path, timeout: int) -> bool:
+    workspace_root.mkdir(parents=True, exist_ok=True)
+    home_dir = workspace_root / ".home"
+    (home_dir / ".claude").mkdir(parents=True, exist_ok=True)
+    stage_credentials(home_dir / ".claude", _KEYCHAIN_CREDS)
+    project_dir = workspace_root / "project"
+    skills_dir = project_dir / ".claude" / "skills"
+    project_dir.mkdir(parents=True, exist_ok=True)
+    unique = uuid.uuid4().hex[:8]
+    cmd_file, clean_name = write_synthetic_skill(skills_dir, skill_name, description, unique)
+    env = {
+        "HOME": str(home_dir),
+        "CLAUDE_CONFIG_DIR": str(home_dir / ".claude"),
+        "PATH": os.environ.get("PATH", ""),
+        "ANTHROPIC_API_KEY": os.environ.get("ANTHROPIC_API_KEY", ""),
+    }
+    cmd = [
+        "claude", "-p", query,
+        "--output-format", "stream-json",
+        "--verbose",
+        "--include-partial-messages",
+        "--dangerously-skip-permissions",
+    ]
+    try:
+        proc = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+            cwd=str(project_dir),
+            env=env,
+        )
+        buffer = ""
+        triggered: bool | None = None
+        start = time.time()
+        try:
+            while time.time() - start < timeout:
+                if proc.poll() is not None:
+                    rest = proc.stdout.read()
+                    if rest:
+                        buffer += rest.decode("utf-8", errors="replace")
+                    break
+                chunk = proc.stdout.read1(8192) if hasattr(proc.stdout, "read1") else proc.stdout.read(8192)
+                if not chunk:
+                    time.sleep(0.05)
+                    continue
+                buffer += chunk.decode("utf-8", errors="replace")
+                decided, buffer = parse_stream_for_trigger(buffer, clean_name)
+                if decided is not None:
+                    triggered = decided
+                    break
+        finally:
+            if proc.poll() is None:
+                proc.kill()
+                proc.wait()
+        if triggered is None:
+            decided, _ = parse_stream_for_trigger(buffer + "\n", clean_name)
+            triggered = bool(decided)
+        return bool(triggered)
+    finally:
+        try:
+            shutil.rmtree(cmd_file.parent, ignore_errors=True)
+        except OSError:
+            pass
+def run_query_docker(query: str, skill_name: str, description: str,
+                     workspace_root: Path, timeout: int) -> bool:
+    workspace_root.mkdir(parents=True, exist_ok=True)
+    unique = uuid.uuid4().hex[:8]
+    skills_in = workspace_root / "skills_in"
+    skills_in.mkdir(parents=True, exist_ok=True)
+    _, clean_name = write_synthetic_skill(skills_in, skill_name, description, unique)
+    creds_dir: Path | None = None
+    if _KEYCHAIN_CREDS:
+        creds_dir = workspace_root / "creds_in"
+        creds_dir.mkdir(parents=True, exist_ok=True)
+        (creds_dir / ".credentials.json").write_text(_KEYCHAIN_CREDS, encoding="utf-8")
+    container_script = f"""
+set -e
+mkdir -p /workspace/.claude/skills
+cp -R /skills/. /workspace/.claude/skills/ 2>/dev/null || true
+if [ -f /creds/.credentials.json ]; then
+  mkdir -p /home/evaluator/.claude
+  cp /creds/.credentials.json /home/evaluator/.claude/.credentials.json
+fi
+cd /workspace
+claude -p "$EVAL_QUERY" \\
+  --output-format stream-json --verbose --include-partial-messages \\
+  --dangerously-skip-permissions \\
+  > /output/stream.jsonl 2>/dev/null || true
+"""
+    output_dir = workspace_root / "output"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    cmd = [
+        "docker", "run", "--rm",
+        "-v", f"{skills_in}:/skills:ro",
+        "-v", f"{output_dir}:/output",
+        "-e", "ANTHROPIC_API_KEY",
+        "-e", f"EVAL_QUERY={query}",
+    ]
+    if creds_dir:
+        cmd += ["-v", f"{creds_dir}:/creds:ro"]
+    cmd += [DOCKER_IMAGE, "bash", "-c", container_script]
+    try:
+        subprocess.run(cmd, capture_output=True, timeout=timeout + 30)
+    except subprocess.TimeoutExpired:
+        pass
+    stream_file = output_dir / "stream.jsonl"
+    if not stream_file.is_file():
+        return False
+    decided, _ = parse_stream_for_trigger(stream_file.read_text(encoding="utf-8", errors="replace") + "\n", clean_name)
+    return bool(decided)
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Run trigger evals in isolation")
+    parser.add_argument("--skill-path", required=True, type=Path)
+    parser.add_argument("--triggers-file", required=True, type=Path)
+    parser.add_argument("--output-dir", required=True, type=Path)
+    parser.add_argument("--isolation", choices=("docker", "local"), required=True)
+    parser.add_argument("--workers", type=int, default=8)
+    parser.add_argument("--runs-per-query", type=int, default=3)
+    parser.add_argument("--timeout", type=int, default=45)
+    parser.add_argument("--threshold", type=float, default=0.5)
+    parser.add_argument("--quiet", action="store_true")
+    args = parser.parse_args()
+    skill_path = args.skill_path.resolve()
+    triggers_file = args.triggers_file.resolve()
+    if not triggers_file.is_file():
+        print(f"triggers file not found: {triggers_file}", file=sys.stderr)
+        return 2
+    skill_name, description, _ = parse_skill_md(skill_path)
+    queries = read_json(triggers_file)
+    run_id = new_run_id(f"{skill_name}-triggers")
+    run_dir = (args.output_dir / run_id).resolve()
+    (run_dir / "queries").mkdir(parents=True, exist_ok=True)
+    write_json(run_dir / "run.json", {
+        "run_id": run_id,
+        "skill_name": skill_name,
+        "description": description,
+        "isolation": args.isolation,
+        "started_at": utc_now_iso(),
+        "query_count": len(queries),
+        "runs_per_query": args.runs_per_query,
+        "threshold": args.threshold,
+    })
+    runner = run_query_docker if args.isolation == "docker" else run_query_local
+    def run_one(idx: int, q: dict, run_idx: int) -> tuple[int, bool]:
+        ws = run_dir / "queries" / f"q{idx:03d}-r{run_idx}"
+        triggered = runner(q["query"], skill_name, description, ws, args.timeout)
+        return idx, triggered
+    per_query: dict[int, list[bool]] = {}
+    if not args.quiet:
+        print(f"[run_triggers] {len(queries)} queries × {args.runs_per_query} runs, isolation={args.isolation}", file=sys.stderr)
+    with ThreadPoolExecutor(max_workers=args.workers) as pool:
+        futures = []
+        for idx, q in enumerate(queries):
+            for run_idx in range(args.runs_per_query):
+                futures.append(pool.submit(run_one, idx, q, run_idx))
+        for fut in as_completed(futures):
+            try:
+                idx, triggered = fut.result()
+            except Exception as e:
+                print(f"Warning: query failed: {e}", file=sys.stderr)
+                continue
+            per_query.setdefault(idx, []).append(triggered)
+    results = []
+    for idx, q in enumerate(queries):
+        triggers = per_query.get(idx, [])
+        rate = (sum(triggers) / len(triggers)) if triggers else 0.0
+        should = bool(q["should_trigger"])
+        if should:
+            passed = rate >= args.threshold
+        else:
+            passed = rate < args.threshold
+        results.append({
+            "query": q["query"],
+            "should_trigger": should,
+            "trigger_rate": rate,
+            "triggers": int(sum(triggers)),
+            "runs": len(triggers),
+            "pass": passed,
+        })
+    output = {
+        "run_id": run_id,
+        "completed_at": utc_now_iso(),
+        "skill_name": skill_name,
+        "description": description,
+        "isolation": args.isolation,
+        "results": results,
+        "summary": {
+            "total": len(results),
+            "passed": sum(1 for r in results if r["pass"]),
+            "failed": sum(1 for r in results if not r["pass"]),
+        },
+    }
+    write_json(run_dir / "triggers-result.json", output)
+    print(json.dumps(output, indent=2))
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

package/lib/bmad-cache/bmb/skills/bmad-eval-runner/scripts/utils.py ADDED Viewed

@@ -0,0 +1,260 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.9"
+# ///
+"""Shared helpers for the eval runner."""
+from __future__ import annotations
+import json
+import re
+import shutil
+import subprocess
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+def parse_skill_md(skill_path: Path) -> tuple[str, str, str]:
+    """Return (name, description, body) from the skill's SKILL.md frontmatter."""
+    text = (skill_path / "SKILL.md").read_text(encoding="utf-8")
+    fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n(.*)$", text, re.DOTALL)
+    if not fm_match:
+        raise ValueError(f"SKILL.md at {skill_path} is missing frontmatter")
+    frontmatter, body = fm_match.group(1), fm_match.group(2)
+    name = None
+    description_lines: list[str] = []
+    in_description = False
+    for line in frontmatter.splitlines():
+        if line.startswith("name:"):
+            name = line.split(":", 1)[1].strip()
+            in_description = False
+        elif line.startswith("description:"):
+            value = line.split(":", 1)[1].strip()
+            if value in ("|", ">"):
+                in_description = True
+            else:
+                description_lines = [value]
+                in_description = False
+        elif in_description and line.startswith(("  ", "\t")):
+            description_lines.append(line.strip())
+        elif in_description:
+            in_description = False
+    if not name:
+        raise ValueError(f"SKILL.md at {skill_path} is missing a name")
+    return name, " ".join(description_lines).strip(), body
+def discover_project_root(skill_path: Path) -> Path:
+    """Walk up from the skill looking for _bmad/ or .git; default to skill's grandparent."""
+    for parent in [skill_path, *skill_path.parents]:
+        if (parent / "_bmad").is_dir() or (parent / ".git").exists():
+            return parent
+    return skill_path.parent.parent
+def discover_evals(
+    skill_path: Path,
+    project_root: Path,
+    explicit: Path | None,
+) -> dict[str, Path]:
+    """Locate evals.json and triggers.json. Return dict with keys 'evals' and/or 'triggers'."""
+    found: dict[str, Path] = {}
+    def check_dir(d: Path) -> None:
+        if not d.is_dir():
+            return
+        for key, fname in (("evals", "evals.json"), ("triggers", "triggers.json")):
+            candidate = d / fname
+            if candidate.is_file() and key not in found:
+                found[key] = candidate
+    if explicit is not None:
+        explicit = explicit.resolve()
+        if explicit.is_file():
+            if explicit.name == "evals.json":
+                found["evals"] = explicit
+            elif explicit.name == "triggers.json":
+                found["triggers"] = explicit
+        elif explicit.is_dir():
+            check_dir(explicit)
+        return found
+    skill_name = skill_path.name
+    candidates: list[Path] = [
+        skill_path / "evals",
+        skill_path.parent.parent / "evals" / skill_name,
+        project_root / "evals" / skill_name,
+    ]
+    for d in candidates:
+        check_dir(d)
+        if found:
+            break
+    if not found:
+        evals_root = project_root / "evals"
+        if evals_root.is_dir():
+            for sub in evals_root.rglob(skill_name):
+                if sub.is_dir():
+                    check_dir(sub)
+                    if found:
+                        break
+    return found
+def utc_now_iso() -> str:
+    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+def new_run_id(skill_name: str) -> str:
+    return f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{skill_name}"
+def have_docker() -> bool:
+    if shutil.which("docker") is None:
+        return False
+    try:
+        result = subprocess.run(
+            ["docker", "info"],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            timeout=5,
+        )
+        return result.returncode == 0
+    except Exception:
+        return False
+def docker_image_present(image: str = "bmad-eval-runner:latest") -> bool:
+    if not have_docker():
+        return False
+    try:
+        result = subprocess.run(
+            ["docker", "image", "inspect", image],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            timeout=10,
+        )
+        return result.returncode == 0
+    except Exception:
+        return False
+def read_macos_keychain_credentials() -> str | None:
+    """Read the Claude Code OAuth credentials JSON from the macOS Keychain.
+    Returns the raw JSON string stored under service "Claude Code-credentials",
+    or None if unavailable (non-macOS, entry missing, or access denied).
+    Called in the parent process — which owns the Keychain ACL — so the credential
+    can be staged into each isolated workspace's `.claude/.credentials.json` before
+    `claude -p` is launched. Without this, an isolated subprocess with HOME pointed
+    at an empty dir has no auth and every eval fails with "Not logged in."
+    """
+    if sys.platform != "darwin":
+        return None
+    try:
+        result = subprocess.run(
+            ["security", "find-generic-password", "-s", "Claude Code-credentials", "-w"],
+            capture_output=True,
+            timeout=5,
+        )
+        if result.returncode != 0:
+            return None
+        val = result.stdout.decode("utf-8", errors="replace").strip()
+        return val if val else None
+    except Exception:
+        return None
+def stage_credentials(claude_dir: Path, credentials_json: str | None) -> None:
+    """Write credentials_json to <claude_dir>/.credentials.json. No-op if None."""
+    if not credentials_json:
+        return
+    claude_dir.mkdir(parents=True, exist_ok=True)
+    (claude_dir / ".credentials.json").write_text(credentials_json, encoding="utf-8")
+def write_json(path: Path, data: object) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(data, indent=2) + "\n", encoding="utf-8")
+def read_json(path: Path) -> object:
+    return json.loads(path.read_text(encoding="utf-8"))
+def parse_skill_dependencies(skill_path: Path) -> list[str]:
+    """Return skill names declared under 'dependencies:' in SKILL.md frontmatter."""
+    try:
+        text = (skill_path / "SKILL.md").read_text(encoding="utf-8")
+    except (FileNotFoundError, OSError):
+        return []
+    fm = re.match(r"^---\s*\n(.*?)\n---", text, re.DOTALL)
+    if not fm:
+        return []
+    deps: list[str] = []
+    in_deps = False
+    for line in fm.group(1).splitlines():
+        if re.match(r"^dependencies\s*:", line):
+            in_deps = True
+        elif in_deps:
+            m = re.match(r"^\s+-\s+(\S+)", line)
+            if m:
+                deps.append(m.group(1))
+            elif not line.startswith((" ", "\t")):
+                break
+    return deps
+def discover_setup_dirs(evals_file: Path, eval_id: str | None = None) -> list[Path]:
+    """Return ordered list of setup overlay dirs that exist.
+    base:     <evals_dir>/setup/
+    per-eval: <evals_dir>/<eval_id>/setup/
+    Applied base-first so per-eval overlays win on conflict.
+    """
+    evals_dir = evals_file.parent
+    dirs: list[Path] = []
+    base = evals_dir / "setup"
+    if base.is_dir():
+        dirs.append(base)
+    if eval_id:
+        per_eval = evals_dir / eval_id / "setup"
+        if per_eval.is_dir():
+            dirs.append(per_eval)
+    return dirs
+def apply_setup_overlay(setup_dirs: list[Path], dest: Path) -> None:
+    """Rsync each setup dir onto dest in order (base first, per-eval last)."""
+    dest.mkdir(parents=True, exist_ok=True)
+    for src in setup_dirs:
+        if not src.is_dir():
+            continue
+        subprocess.run(
+            ["rsync", "-a", f"{src}/", f"{dest}/"],
+            check=False,
+        )
+__all__ = [
+    "parse_skill_md",
+    "discover_project_root",
+    "discover_evals",
+    "utc_now_iso",
+    "new_run_id",
+    "have_docker",
+    "docker_image_present",
+    "read_macos_keychain_credentials",
+    "stage_credentials",
+    "write_json",
+    "read_json",
+    "parse_skill_dependencies",
+    "discover_setup_dirs",
+    "apply_setup_overlay",
+]

package/lib/bmad-cache/bmb/skills/bmad-module-builder/assets/setup-skill-template/assets/module-help.csv CHANGED Viewed

	@@ -1 +1 @@
1	- module,skill,display-name,menu-code,description,action,args,phase,~~after~~,~~before~~,required,output-location,outputs
1	+ module,skill,display-name,menu-code,description,action,args,phase,preceded-by,followed-by,required,output-location,outputs

package/lib/bmad-cache/bmb/skills/bmad-module-builder/assets/setup-skill-template/scripts/cleanup-legacy.py CHANGED Viewed

@@ -197,9 +197,37 @@ def cleanup_directories(
     return removed, not_found, total_files
+def reject_unresolved_paths(named_paths: list[tuple[str, str]]) -> None:
+    """Exit with a clear error if any path argument still contains the literal
+    ``{project-root}`` token. That token is meaningful only inside config
+    values; filesystem path arguments must be resolved by the caller. Failing
+    loudly here prevents silently operating on a junk ``{project-root}/`` directory.
+    """
+    for name, value in named_paths:
+        if value and "{project-root}" in value:
+            print(
+                json.dumps(
+                    {
+                        "status": "error",
+                        "error": (
+                            f"Unresolved '{{project-root}}' token in {name} path: {value!r}. "
+                            "Resolve '{project-root}' to the actual project root before running "
+                            "this script — it is a filesystem path, not a config value."
+                        ),
+                    },
+                    indent=2,
+                )
+            )
+            sys.exit(1)
 def main():
     args = parse_args()
+    reject_unresolved_paths(
+        [("--bmad-dir", args.bmad_dir), ("--skills-dir", args.skills_dir)]
+    )
     bmad_dir = args.bmad_dir
     module_code = args.module_code