npm - xtrm-tools - Versions diffs - 2.4.1 → 2.4.3 - Mend

xtrm-tools 2.4.1 → 2.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

package/skills/sync-docs/scripts/validate_doc.py ADDED Viewed

@@ -0,0 +1,365 @@
+#!/usr/bin/env python3
+"""
+Validate and generate schema-compliant docs/ files.
+Schema for docs/ files:
+  Required: title, scope, category, version, updated
+  Optional: description, source_of_truth_for (glob list), domain (tag list)
+Usage:
+  validate_doc.py <file_or_dir>           # validate one file or all *.md in dir
+  validate_doc.py --generate <path>       # generate scaffold + required flags below
+    --title="..."
+    --scope="..."
+    --category="reference|guide|architecture|api"
+    --source-for="glob1,glob2"            # optional
+    --description="..."                   # optional
+"""
+import sys
+import re
+import json
+from pathlib import Path
+from datetime import date
+# ── Schema ────────────────────────────────────────────────────────────────────
+REQUIRED_FIELDS = ["title", "scope", "category", "version", "updated"]
+VALID_CATEGORIES = ["reference", "guide", "architecture", "api", "plan", "overview"]
+CATEGORY_DESCRIPTIONS = {
+    "reference": "Look-up table, cheat sheet, or technical specification",
+    "guide": "How-to documentation with step-by-step instructions",
+    "architecture": "System design, component relationships, high-level overview",
+    "api": "API contracts, interfaces, or data schemas",
+    "plan": "Implementation plan or roadmap",
+    "overview": "Summary introduction to a subsystem",
+}
+# ── Frontmatter helpers ───────────────────────────────────────────────────────
+def extract_frontmatter(content: str) -> dict | None:
+    """Parse simple YAML frontmatter without external dependencies.
+    Handles scalar values, quoted strings, and list fields (- item syntax).
+    """
+    m = re.match(r"^---\n(.*?)\n---\n", content, re.DOTALL)
+    if not m:
+        return None
+    result: dict[str, object] = {}
+    current_key: str | None = None
+    current_list: list[str] | None = None
+    for line in m.group(1).splitlines():
+        # List item under current key
+        if current_list is not None and re.match(r"^\s+-\s+", line):
+            current_list.append(re.sub(r"^\s+-\s+", "", line).strip().strip('"\''))
+            continue
+        # New key: value line
+        kv = re.match(r'^([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*(.*)', line)
+        if kv:
+            # Flush previous list
+            if current_key is not None and current_list is not None:
+                result[current_key] = current_list
+            current_key = kv.group(1)
+            raw_val = kv.group(2).strip()
+            if raw_val == "" or raw_val == ">-":
+                # Value on following lines (list or multiline) — start list
+                current_list = []
+            elif raw_val.startswith("["):
+                # Inline list: [a, b, c]
+                current_list = None
+                inner = raw_val.strip("[]")
+                result[current_key] = [v.strip().strip('"\'') for v in inner.split(",") if v.strip()]
+            else:
+                current_list = None
+                result[current_key] = raw_val.strip('"\'')
+        else:
+            # Continuation line for multiline scalar — ignore for our purposes
+            pass
+    # Flush trailing list
+    if current_key is not None and current_list is not None:
+        result[current_key] = current_list
+    return result
+def extract_headings(content: str) -> list[tuple[str, str]]:
+    """Return (heading, first_sentence) for every ## section."""
+    results = []
+    lines = content.splitlines()
+    i = 0
+    while i < len(lines):
+        line = lines[i]
+        if line.startswith("## ") and not line.startswith("### "):
+            heading = line[3:].strip()
+            summary = ""
+            j = i + 1
+            in_code = False
+            while j < len(lines):
+                ln = lines[j].strip()
+                if ln.startswith("```"):
+                    in_code = not in_code
+                    j += 1
+                    continue
+                if not in_code and ln and not ln.startswith("#"):
+                    summary = ln.split(".")[0].strip()[:120]
+                    break
+                j += 1
+            results.append((heading, summary))
+        i += 1
+    return results
+def make_anchor(heading: str) -> str:
+    """Generate a GitHub-compatible anchor from a heading string."""
+    anchor = heading.lower()
+    anchor = re.sub(r"\s+", "-", anchor)          # spaces → hyphens
+    anchor = re.sub(r"[^a-z0-9\-]", "", anchor)   # strip non-alphanumeric (except -)
+    anchor = re.sub(r"-+", "-", anchor)            # collapse runs
+    return anchor.strip("-")
+def generate_index_table(headings: list[tuple[str, str]]) -> str:
+    rows = ["| Section | Summary |", "|---|---|"]
+    for heading, summary in headings:
+        rows.append(f"| [{heading}](#{make_anchor(heading)}) | {summary or '_no summary_'} |")
+    return "\n".join(rows) + "\n"
+def inject_index(content: str, table: str) -> str:
+    header = "<!-- INDEX: auto-generated by validate_doc.py — do not edit manually -->\n"
+    footer = "<!-- END INDEX -->"
+    block = f"{header}{table}{footer}"
+    existing = re.search(r"<!-- INDEX:.*?-->.*?<!-- END INDEX -->", content, re.DOTALL)
+    if existing:
+        return content[: existing.start()] + block + content[existing.end() :]
+    fm_match = re.match(r"^(---\n.*?\n---\n)(.*)", content, re.DOTALL)
+    if fm_match:
+        return fm_match.group(1) + "\n" + block + "\n" + fm_match.group(2)
+    return block + "\n" + content
+# ── Validation ────────────────────────────────────────────────────────────────
+def validate_file(path: Path) -> tuple[bool, list[str], list[str]]:
+    """Returns (passed, errors, warnings)."""
+    errors: list[str] = []
+    warnings: list[str] = []
+    if not path.exists():
+        return False, [f"File not found: {path}"], []
+    content = path.read_text(encoding="utf-8")
+    fm = extract_frontmatter(content)
+    if fm is None:
+        errors.append("Missing or invalid YAML frontmatter (wrap in --- markers)")
+        return False, errors, warnings
+    # Required fields
+    for field in REQUIRED_FIELDS:
+        if field not in fm:
+            errors.append(f"Missing required field: {field}")
+    # version format
+    if "version" in fm:
+        if not re.match(r"^\d+\.\d+\.\d+$", str(fm["version"])):
+            errors.append(f"version must be semver (x.y.z), got: {fm['version']}")
+    # updated format
+    if "updated" in fm:
+        if not re.match(r"^\d{4}-\d{2}-\d{2}", str(fm["updated"])):
+            warnings.append(f"updated should be ISO date (YYYY-MM-DD), got: {fm['updated']}")
+    # category valid
+    if "category" in fm and fm["category"] not in VALID_CATEGORIES:
+        errors.append(
+            f"category '{fm['category']}' not valid. Choose from: {', '.join(VALID_CATEGORIES)}"
+        )
+    # domain is list
+    if "domain" in fm and not isinstance(fm["domain"], list):
+        errors.append("domain must be a list, e.g. [hooks, claude]")
+    # source_of_truth_for and tracks are lists of globs
+    if "source_of_truth_for" in fm and not isinstance(fm["source_of_truth_for"], list):
+        errors.append("source_of_truth_for must be a list of glob patterns")
+    if "tracks" in fm and not isinstance(fm["tracks"], list):
+        errors.append("tracks must be a list of glob patterns")
+    # Regenerate INDEX if valid
+    if not errors:
+        headings = extract_headings(content)
+        if headings:
+            table = generate_index_table(headings)
+            new_content = inject_index(content, table)
+            if new_content != content:
+                path.write_text(new_content, encoding="utf-8")
+                warnings.append("INDEX regenerated")
+    return len(errors) == 0, errors, warnings
+def validate_directory(docs_dir: Path) -> dict:
+    results = {}
+    for md_file in sorted(docs_dir.glob("*.md")):
+        passed, errors, warnings = validate_file(md_file)
+        results[str(md_file.relative_to(docs_dir.parent))] = {
+            "passed": passed,
+            "errors": errors,
+            "warnings": warnings,
+        }
+    return results
+def print_file_result(path: str, passed: bool, errors: list[str], warnings: list[str]) -> None:
+    status = "PASS" if passed else "FAIL"
+    mark = "" if passed else ""
+    print(f"\n{mark} {path} [{status}]")
+    for e in errors:
+        print(f"    ERROR: {e}")
+    for w in warnings:
+        print(f"    WARN:  {w}")
+    if passed and not warnings:
+        print("    All checks passed.")
+# ── Generator ─────────────────────────────────────────────────────────────────
+SCAFFOLD_TEMPLATE = """\
+---
+title: {title}
+scope: {scope}
+category: {category}
+version: 1.0.0
+updated: {today}
+{source_field}{tracks_field}{description_field}domain: []
+---
+<!-- INDEX: auto-generated by validate_doc.py — do not edit manually -->
+<!-- END INDEX -->
+# {title}
+> {category_desc}
+## Overview
+_Describe what this document covers._
+"""
+def generate_scaffold(output_path: Path, title: str, scope: str, category: str,
+                      source_for: list[str], description: str) -> None:
+    source_field = ""
+    tracks_field = ""
+    if source_for:
+        items = "\n".join(f'  - "{g}"' for g in source_for)
+        source_field = f"source_of_truth_for:\n{items}\n"
+        # tracks: mirrors source_of_truth_for so drift_detector.py picks up changes
+        tracks_field = f"tracks:\n{items}\n"
+    desc_field = f'description: "{description}"\n' if description else ""
+    category_desc = CATEGORY_DESCRIPTIONS.get(category, category)
+    content = SCAFFOLD_TEMPLATE.format(
+        title=title,
+        scope=scope,
+        category=category,
+        today=date.today().isoformat(),
+        source_field=source_field,
+        tracks_field=tracks_field,
+        description_field=desc_field,
+        category_desc=category_desc,
+    )
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(content, encoding="utf-8")
+    print(f"Generated: {output_path}")
+# ── Entry point ───────────────────────────────────────────────────────────────
+def main() -> None:
+    args = sys.argv[1:]
+    if not args:
+        print("Usage:")
+        print("  validate_doc.py <file_or_dir>")
+        print("  validate_doc.py --generate <path> --title=... --scope=... --category=...")
+        sys.exit(1)
+    # Generate mode
+    if "--generate" in args:
+        idx = args.index("--generate")
+        if idx + 1 >= len(args):
+            print("ERROR: --generate requires a path argument")
+            sys.exit(1)
+        output_path = Path(args[idx + 1])
+        kw: dict[str, str] = {}
+        source_for: list[str] = []
+        for arg in args:
+            if arg.startswith("--title="):
+                kw["title"] = arg.split("=", 1)[1]
+            elif arg.startswith("--scope="):
+                kw["scope"] = arg.split("=", 1)[1]
+            elif arg.startswith("--category="):
+                kw["category"] = arg.split("=", 1)[1]
+            elif arg.startswith("--source-for="):
+                source_for = [g.strip() for g in arg.split("=", 1)[1].split(",")]
+            elif arg.startswith("--description="):
+                kw["description"] = arg.split("=", 1)[1]
+        for req in ["title", "scope", "category"]:
+            if req not in kw:
+                print(f"ERROR: --{req} is required for --generate")
+                sys.exit(1)
+        generate_scaffold(
+            output_path,
+            title=kw["title"],
+            scope=kw["scope"],
+            category=kw["category"],
+            source_for=source_for,
+            description=kw.get("description", ""),
+        )
+        sys.exit(0)
+    # Validate mode
+    target = Path(args[0])
+    all_passed = True
+    if target.is_dir():
+        results = validate_directory(target)
+        for path_str, res in results.items():
+            print_file_result(path_str, res["passed"], res["errors"], res["warnings"])
+            if not res["passed"]:
+                all_passed = False
+        if results:
+            total = len(results)
+            passed = sum(1 for r in results.values() if r["passed"])
+            print(f"\nResult: {passed}/{total} files passed")
+        else:
+            print(f"No .md files found in {target}")
+    else:
+        passed, errors, warnings = validate_file(target)
+        print_file_result(str(target), passed, errors, warnings)
+        all_passed = passed
+    sys.exit(0 if all_passed else 1)
+if __name__ == "__main__":
+    main()

package/skills/sync-docs-workspace/iteration-1/benchmark.json ADDED Viewed

@@ -0,0 +1,293 @@
+{
+  "metadata": {
+    "skill_name": "sync-docs",
+    "skill_path": "<path/to/skill>",
+    "executor_model": "<model-name>",
+    "analyzer_model": "<model-name>",
+    "timestamp": "2026-03-18T07:43:29Z",
+    "evals_run": [
+      1,
+      2,
+      3
+    ],
+    "runs_per_configuration": 3
+  },
+  "runs": [
+    {
+      "eval_id": 3,
+      "configuration": "with_skill",
+      "run_number": 1,
+      "result": {
+        "pass_rate": 0.75,
+        "passed": 3,
+        "failed": 1,
+        "total": 4,
+        "time_seconds": 0.0,
+        "tokens": 0,
+        "tool_calls": 0,
+        "errors": 0
+      },
+      "expectations": [
+        {
+          "text": "Ran doc_structure_analyzer.py and referenced its structured output",
+          "passed": true,
+          "evidence": "Ran doc_structure_analyzer.py, quoted its full structured output including EXTRACTABLE status, extraction candidates list, MISSING files, and INVALID_SCHEMA count."
+        },
+        {
+          "text": "Named specific README sections with their suggested docs/ destination",
+          "passed": true,
+          "evidence": "Named: '## Policy System \u2192 docs/policies.md', '## MCP Servers \u2192 docs/mcp-servers.md', pi-extensions.md, plus context about CHANGELOG 6-day gap."
+        },
+        {
+          "text": "Report is actionable \u2014 tells user exactly what to do next, not just observations",
+          "passed": true,
+          "evidence": "Report includes structured phase output, specific file names, notes CHANGELOG gap with exact dates, and references the 6-day staleness."
+        },
+        {
+          "text": "Did not edit or create any files (audit only)",
+          "passed": false,
+          "evidence": "Agent ran --fix (created docs/pi-extensions.md, docs/mcp-servers.md, docs/policies.md) despite task being audit-only. Skill instructions for Phase 3 show the --fix command without making clear it is only for execute mode."
+        }
+      ],
+      "notes": []
+    },
+    {
+      "eval_id": 2,
+      "configuration": "with_skill",
+      "run_number": 1,
+      "result": {
+        "pass_rate": 0.75,
+        "passed": 3,
+        "failed": 1,
+        "total": 4,
+        "time_seconds": 0.0,
+        "tokens": 0,
+        "tool_calls": 0,
+        "errors": 0
+      },
+      "expectations": [
+        {
+          "text": "Ran doc_structure_analyzer.py with --fix flag",
+          "passed": true,
+          "evidence": "Ran `python3 skills/sync-docs/scripts/doc_structure_analyzer.py --fix --bd-remember` and included full output"
+        },
+        {
+          "text": "Ran with --bd-remember or manually ran bd remember with a summary",
+          "passed": true,
+          "evidence": "bd remember stored with key 'sync-docs-fix-2026-03-18', confirmed stored:true in output JSON"
+        },
+        {
+          "text": "At least one scaffold file was created in docs/",
+          "passed": true,
+          "evidence": "Created docs/pi-extensions.md, docs/mcp-servers.md, docs/policies.md with valid frontmatter"
+        },
+        {
+          "text": "Ran validate_doc.py on created files to confirm schema",
+          "passed": false,
+          "evidence": "Report notes 7 INVALID_SCHEMA files exist but does not show validate_doc.py being run explicitly to confirm the 3 new files pass. Only the JSON output showing valid frontmatter is evidence."
+        }
+      ],
+      "notes": []
+    },
+    {
+      "eval_id": 1,
+      "configuration": "with_skill",
+      "run_number": 1,
+      "result": {
+        "pass_rate": 1.0,
+        "passed": 4,
+        "failed": 0,
+        "total": 4,
+        "time_seconds": 0.0,
+        "tokens": 0,
+        "tool_calls": 0,
+        "errors": 0
+      },
+      "expectations": [
+        {
+          "text": "Ran context_gatherer.py and reported bd closed issues or merged PRs from the output",
+          "passed": true,
+          "evidence": "Ran context_gatherer.py, reported 20 bd closed issues with IDs and titles, 3 merged PRs with SHAs and dates, 15 recent commits"
+        },
+        {
+          "text": "Ran doc_structure_analyzer.py and used its output to identify doc issues",
+          "passed": true,
+          "evidence": "Ran doc_structure_analyzer.py, referenced MISSING status for docs/pi-extensions.md, hooks.md, mcp-servers.md, policies.md, skills.md and EXTRACTABLE for README"
+        },
+        {
+          "text": "Produced at least one concrete recommendation or action (not just a vague summary)",
+          "passed": true,
+          "evidence": "Named specific files: docs/pi-extensions.md, docs/hooks.md, docs/mcp-servers.md, docs/policies.md with explicit next steps for each"
+        },
+        {
+          "text": "Used the skill scripts rather than just reading files manually",
+          "passed": true,
+          "evidence": "Ran 3 scripts (context_gatherer.py, drift_detector.py, doc_structure_analyzer.py) with explicit output included in report"
+        }
+      ],
+      "notes": []
+    },
+    {
+      "eval_id": 3,
+      "configuration": "without_skill",
+      "run_number": 1,
+      "result": {
+        "pass_rate": 0.75,
+        "passed": 3,
+        "failed": 1,
+        "total": 4,
+        "time_seconds": 72.5,
+        "tokens": 21934,
+        "tool_calls": 0,
+        "errors": 0
+      },
+      "expectations": [
+        {
+          "text": "Ran doc_structure_analyzer.py and referenced its structured output",
+          "passed": false,
+          "evidence": "Did not run doc_structure_analyzer.py. All findings came from manual README.md reads with line numbers."
+        },
+        {
+          "text": "Named specific README sections with their suggested docs/ destination",
+          "passed": true,
+          "evidence": "Named 6 specific sections with line numbers: Hooks Reference (114-141)\u2192docs/hooks.md, Policy System (66-87)\u2192new docs/policies.md, MCP Servers (143-158)\u2192docs/mcp.md, CLI Commands (89-111)\u2192XTRM-GUIDE.md, Version History (179-188)\u2192remove, Plugin Structure (52-63)\u2192borderline."
+        },
+        {
+          "text": "Report is actionable \u2014 tells user exactly what to do next, not just observations",
+          "passed": true,
+          "evidence": "Each section has a specific Recommendation: block with exact action (Remove section, Add single link, Create docs/policies.md, etc.). Estimated README would shrink from 193 to 60-70 lines."
+        },
+        {
+          "text": "Did not edit or create any files (audit only)",
+          "passed": true,
+          "evidence": "Report explicitly states no files were modified. Audit-only as instructed."
+        }
+      ],
+      "notes": []
+    },
+    {
+      "eval_id": 2,
+      "configuration": "without_skill",
+      "run_number": 1,
+      "result": {
+        "pass_rate": 1.0,
+        "passed": 4,
+        "failed": 0,
+        "total": 4,
+        "time_seconds": 0.0,
+        "tokens": 0,
+        "tool_calls": 0,
+        "errors": 0
+      },
+      "expectations": [
+        {
+          "text": "Ran doc_structure_analyzer.py with --fix flag",
+          "passed": true,
+          "evidence": "Agent found the skill in the repo and ran doc_structure_analyzer.py --fix. However, found no MISSING gaps because with_skill run had already created those files (confounded test)."
+        },
+        {
+          "text": "Ran with --bd-remember or manually ran bd remember with a summary",
+          "passed": true,
+          "evidence": "Agent ran bd remember with key 'sync-docs-fix-schema-2026-03-18' summarizing the frontmatter additions made to 7 files."
+        },
+        {
+          "text": "At least one scaffold file was created in docs/",
+          "passed": true,
+          "evidence": "Added YAML frontmatter to 7 existing docs/ files (hooks.md, mcp.md, pre-install-cleanup.md, project-skills.md, skills.md, testing.md, todo.md). Different action than creating scaffolds but valid given scaffolds already existed."
+        },
+        {
+          "text": "Ran validate_doc.py on created files to confirm schema",
+          "passed": true,
+          "evidence": "Ran validate_doc.py docs/ \u2014 7/7 files passed after frontmatter additions."
+        }
+      ],
+      "notes": []
+    },
+    {
+      "eval_id": 1,
+      "configuration": "without_skill",
+      "run_number": 1,
+      "result": {
+        "pass_rate": 0.25,
+        "passed": 1,
+        "failed": 3,
+        "total": 4,
+        "time_seconds": 0.0,
+        "tokens": 0,
+        "tool_calls": 0,
+        "errors": 0
+      },
+      "expectations": [
+        {
+          "text": "Ran context_gatherer.py and reported bd closed issues or merged PRs from the output",
+          "passed": false,
+          "evidence": "Did not run context_gatherer.py. Used git log manually. Reported 'No .beads/ DB was found' which is wrong \u2014 .beads/ exists. Missed all 20 closed bd issues."
+        },
+        {
+          "text": "Ran doc_structure_analyzer.py and used its output to identify doc issues",
+          "passed": false,
+          "evidence": "Did not run doc_structure_analyzer.py. Manually read README.md, package.json, and CHANGELOG.md."
+        },
+        {
+          "text": "Produced at least one concrete recommendation or action (not just a vague summary)",
+          "passed": true,
+          "evidence": "Found version mismatch (2.3.0 vs 2.4.1 in package.json), identified 7 undocumented branch commits in CHANGELOG, named specific line references."
+        },
+        {
+          "text": "Used the skill scripts rather than just reading files manually",
+          "passed": false,
+          "evidence": "No skill scripts were used. All findings came from manual git log, file reads, and README inspection."
+        }
+      ],
+      "notes": []
+    }
+  ],
+  "run_summary": {
+    "with_skill": {
+      "pass_rate": {
+        "mean": 0.8333,
+        "stddev": 0.1443,
+        "min": 0.75,
+        "max": 1.0
+      },
+      "time_seconds": {
+        "mean": 0.0,
+        "stddev": 0.0,
+        "min": 0.0,
+        "max": 0.0
+      },
+      "tokens": {
+        "mean": 0.0,
+        "stddev": 0.0,
+        "min": 0,
+        "max": 0
+      }
+    },
+    "without_skill": {
+      "pass_rate": {
+        "mean": 0.6667,
+        "stddev": 0.3819,
+        "min": 0.25,
+        "max": 1.0
+      },
+      "time_seconds": {
+        "mean": 24.1667,
+        "stddev": 41.8579,
+        "min": 0.0,
+        "max": 72.5
+      },
+      "tokens": {
+        "mean": 7311.3333,
+        "stddev": 12663.6008,
+        "min": 0,
+        "max": 21934
+      }
+    },
+    "delta": {
+      "pass_rate": "+0.17",
+      "time_seconds": "-24.2",
+      "tokens": "-7311"
+    }
+  },
+  "notes": []
+}

package/skills/sync-docs-workspace/iteration-1/benchmark.md ADDED Viewed

@@ -0,0 +1,13 @@
+# Skill Benchmark: sync-docs
+**Model**: <model-name>
+**Date**: 2026-03-18T07:43:29Z
+**Evals**: 1, 2, 3 (3 runs each per configuration)
+## Summary
+| Metric | With Skill | Without Skill | Delta |
+|--------|------------|---------------|-------|
+| Pass Rate | 83% ± 14% | 67% ± 38% | +0.17 |
+| Time | 0.0s ± 0.0s | 24.2s ± 41.9s | -24.2s |
+| Tokens | 0 ± 0 | 7311 ± 12664 | -7311 |