npm - @event4u/agent-config - Versions diffs - 1.33.0 → 1.35.0 - Mend

@event4u/agent-config 1.33.0 → 1.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (218) hide show

package/scripts/skill_tools/audit_persona_coverage.py ADDED Viewed

@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+"""Block D · D3 — audit_persona_coverage.
+Build a citation matrix of personas across the SKILL.md corpus and flag
+under-cited personas using **tier-aware thresholds** (council iter-1
+D-OQ4 verdict):
+  - **specialist** persona < 3 citations  → under-cited
+  - **core**       persona < 5 citations  → under-cited
+Inputs:
+  --skills-dir DIR   — directory holding SKILL.md files
+  --personas-dir DIR — directory holding persona Markdown files
+  --json             — machine-readable output
+Output: per-persona citation count + tier + status (ok / under-cited / orphan).
+Exit code: 0 always (this is an advisory tool, not a CI gate).
+Stdlib-only. ≤ 120 LOC. Embedded `_SAMPLE` for self-demo.
+"""
+from __future__ import annotations
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+from typing import Dict, List
+ROOT = Path(__file__).resolve().parents[2]
+DEFAULT_SKILLS = ROOT / ".agent-src.uncompressed" / "skills"
+DEFAULT_PERSONAS = ROOT / ".agent-src.uncompressed" / "personas"
+THRESHOLDS = {"core": 5, "specialist": 3}
+def _read_block(path: Path) -> str:
+    text = path.read_text(encoding="utf-8", errors="replace")
+    if not text.startswith("---"):
+        return ""
+    end = text.find("\n---", 3)
+    return text[3:end] if end != -1 else ""
+def _frontmatter_value(block: str, key: str) -> str | None:
+    m = re.search(rf"^{re.escape(key)}\s*:\s*(.+)$", block, re.MULTILINE)
+    if not m:
+        return None
+    val = m.group(1).strip()
+    if val.startswith('"') and val.endswith('"'):
+        val = val[1:-1]
+    return val
+def _frontmatter_list(block: str, key: str) -> List[str]:
+    m = re.search(rf"^{re.escape(key)}\s*:\s*$", block, re.MULTILINE)
+    if not m:
+        return []
+    items: List[str] = []
+    for line in block[m.end():].splitlines():
+        if line.startswith("  - "):
+            items.append(line[4:].strip())
+        elif line and not line.startswith(" "):
+            break
+    return items
+def _load_personas(personas_dir: Path) -> Dict[str, str]:
+    """slug → tier (core | specialist | unknown)."""
+    personas: Dict[str, str] = {}
+    if not personas_dir.is_dir():
+        return personas
+    for md in sorted(personas_dir.glob("*.md")):
+        if md.name.lower() == "readme.md":
+            continue
+        block = _read_block(md)
+        slug = _frontmatter_value(block, "id") or md.stem
+        tier = _frontmatter_value(block, "tier") or "unknown"
+        personas[slug] = tier
+    return personas
+def _count_citations(skills_dir: Path) -> Dict[str, int]:
+    counts: Dict[str, int] = {}
+    if not skills_dir.is_dir():
+        return counts
+    for skill_md in skills_dir.glob("*/SKILL.md"):
+        block = _read_block(skill_md)
+        for slug in _frontmatter_list(block, "personas"):
+            counts[slug] = counts.get(slug, 0) + 1
+    return counts
+def audit(skills_dir: Path, personas_dir: Path) -> List[Dict[str, object]]:
+    personas = _load_personas(personas_dir)
+    citations = _count_citations(skills_dir)
+    rows: List[Dict[str, object]] = []
+    for slug, tier in sorted(personas.items()):
+        count = citations.get(slug, 0)
+        threshold = THRESHOLDS.get(tier, 3)
+        status = "under-cited" if count < threshold else "ok"
+        rows.append({"persona": slug, "tier": tier, "citations": count,
+                     "threshold": threshold, "status": status})
+    # Surface citations that point at unknown personas (typos, deletions).
+    for slug in sorted(citations.keys()):
+        if slug not in personas:
+            rows.append({"persona": slug, "tier": "unknown",
+                         "citations": citations[slug], "threshold": 0,
+                         "status": "orphan"})
+    return rows
+def _print_human(rows: List[Dict[str, object]]) -> None:
+    if not rows:
+        print("(no personas found)")
+        return
+    width = max(len(str(r["persona"])) for r in rows)
+    print(f"  {'persona':<{width}}  tier        cites  status")
+    print(f"  {'-' * width}  ----------  -----  -----------")
+    for r in rows:
+        print(f"  {str(r['persona']):<{width}}  {str(r['tier']):<10}  "
+              f"{int(r['citations']):>5}  {r['status']}")
+    flagged = [r for r in rows if r["status"] != "ok"]
+    if flagged:
+        print(f"\n  {len(flagged)} persona(s) flagged "
+              f"(under-cited or orphan).")
+def main(argv: List[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0])
+    parser.add_argument("--skills-dir", default=str(DEFAULT_SKILLS))
+    parser.add_argument("--personas-dir", default=str(DEFAULT_PERSONAS))
+    parser.add_argument("--json", action="store_true",
+                        help="emit JSON instead of text")
+    args = parser.parse_args(argv)
+    rows = audit(Path(args.skills_dir), Path(args.personas_dir))
+    if args.json:
+        json.dump({"rows": rows}, sys.stdout, indent=2)
+        sys.stdout.write("\n")
+    else:
+        _print_human(rows)
+    return 0
+_SAMPLE = {"thresholds": THRESHOLDS}
+if __name__ == "__main__":
+    raise SystemExit(main())

package/scripts/skill_tools/run_block_d_eval.py ADDED Viewed

@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+"""Block D · D5 — eval gate runner.
+Runs D2 (`score_skill_relevance`), D3 (`audit_persona_coverage`), and
+D4 (`suggest_skill_for_task`) against the corpora in
+`agents/eval-corpora/block-d/` and emits a pass/fail summary per the
+council verdict targets:
+  - **D2**: ≥ 85 % of corpus tasks have an `expected_top3` skill in
+    the actual top-3 ranking.
+  - **D3**: ≥ 2 personas flagged as `under-cited`.
+  - **D4**: ≥ 3 / 5 blind tasks where suggestion #1 matches the
+    human-curated top-1.
+Pilot pass = ≥ 2 / 3 tools pass. Anything less → kill switch.
+Stdlib-only. Embedded `_SAMPLE` for self-demo.
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Dict, List
+from skill_tools.audit_persona_coverage import audit  # type: ignore
+from skill_tools.score_skill_relevance import (  # type: ignore
+    DEFAULT_SKILLS_DIR,
+    rank,
+)
+from skill_tools.suggest_skill_for_task import suggest  # type: ignore
+ROOT = Path(__file__).resolve().parents[2]
+CORPUS_DIR = ROOT / "agents" / "eval-corpora" / "block-d"
+PERSONAS_DIR = ROOT / ".agent-src.uncompressed" / "personas"
+def _eval_d2(corpus: Path, skills_dir: Path) -> Dict[str, object]:
+    data = json.loads(corpus.read_text(encoding="utf-8"))
+    tasks = data["tasks"]
+    hits, misses = 0, []
+    for t in tasks:
+        ranked = rank(t["task"], skills_dir)[:3]
+        names = [n for n, _, _ in ranked]
+        if any(e in names for e in t["expected_top3"]):
+            hits += 1
+        else:
+            misses.append({"id": t["id"], "expected": t["expected_top3"],
+                           "got": names})
+    pct = hits / len(tasks) if tasks else 0.0
+    return {"hits": hits, "total": len(tasks), "pct": round(pct, 3),
+            "passed": pct >= 0.85, "misses": misses}
+def _eval_d3(skills_dir: Path, personas_dir: Path) -> Dict[str, object]:
+    rows = audit(skills_dir, personas_dir)
+    flagged = [r["persona"] for r in rows if r["status"] == "under-cited"]
+    return {"flagged": flagged, "count": len(flagged),
+            "passed": len(flagged) >= 2}
+def _eval_d4(corpus: Path, skills_dir: Path,
+             personas_dir: Path) -> Dict[str, object]:
+    data = json.loads(corpus.read_text(encoding="utf-8"))
+    tasks = data["tasks"]
+    hits, misses = 0, []
+    for t in tasks:
+        out = suggest(t["task"], skills_dir, personas_dir, top=1)
+        got = out[0]["skill"] if out else None
+        if got == t["expected_top1"]:
+            hits += 1
+        else:
+            misses.append({"id": t["id"], "expected": t["expected_top1"],
+                           "got": got})
+    return {"hits": hits, "total": len(tasks),
+            "passed": hits >= 3, "misses": misses}
+def run_all(skills_dir: Path, personas_dir: Path,
+            corpus_dir: Path) -> Dict[str, object]:
+    d2 = _eval_d2(corpus_dir / "d2-tasks.json", skills_dir)
+    d3 = _eval_d3(skills_dir, personas_dir)
+    d4 = _eval_d4(corpus_dir / "d4-tasks.json", skills_dir, personas_dir)
+    passes = sum(1 for r in (d2, d3, d4) if r["passed"])
+    return {"D2": d2, "D3": d3, "D4": d4,
+            "tools_passed": passes,
+            "pilot_passed": passes >= 2}
+def _print_human(report: Dict[str, object]) -> None:
+    icons = {True: "✅", False: "❌"}
+    for key in ("D2", "D3", "D4"):
+        r: Dict[str, object] = report[key]  # type: ignore[assignment]
+        print(f"  {icons[bool(r['passed'])]}  {key}: {_summary(key, r)}")
+    overall = bool(report["pilot_passed"])
+    print(f"\n  pilot: {report['tools_passed']}/3 tools passed → "
+          f"{'PASS' if overall else 'FAIL'}")
+def _summary(key: str, r: Dict[str, object]) -> str:
+    if key == "D2":
+        return f"{r['hits']}/{r['total']} ({float(r['pct']) * 100:.0f}%) ≥ 85% target"
+    if key == "D3":
+        return f"{r['count']} under-cited personas (≥ 2 target)"
+    return f"{r['hits']}/{r['total']} top-1 hits (≥ 3/5 target)"
+def main(argv: List[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0])
+    parser.add_argument("--skills-dir", default=str(DEFAULT_SKILLS_DIR))
+    parser.add_argument("--personas-dir", default=str(PERSONAS_DIR))
+    parser.add_argument("--corpus-dir", default=str(CORPUS_DIR))
+    parser.add_argument("--json", action="store_true")
+    args = parser.parse_args(argv)
+    report = run_all(Path(args.skills_dir), Path(args.personas_dir),
+                     Path(args.corpus_dir))
+    if args.json:
+        json.dump(report, sys.stdout, indent=2)
+        sys.stdout.write("\n")
+    else:
+        _print_human(report)
+    return 0 if report["pilot_passed"] else 1
+_SAMPLE = {"corpus_dir": str(CORPUS_DIR)}
+if __name__ == "__main__":
+    raise SystemExit(main())

package/scripts/skill_tools/score_skill_relevance.py ADDED Viewed

@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+"""Block D · D2 — score_skill_relevance.
+Rank skills by relevance to a free-form task description.
+Heuristic (council iter-1 D-OQ1 verdict (b) — discovery-story tool 1):
+  score = keyword_overlap * 70 + persona_match * 30
+where:
+  - keyword_overlap = |task_terms ∩ skill_terms| / |task_terms|
+    (skill_terms = tokens from `name` + `description`)
+  - persona_match  = 1.0 if any persona on the skill is named or
+    role-mentioned in the task, else 0.0
+Inputs:
+  --task TEXT      — task description (required)
+  --skills-dir DIR — directory holding SKILL.md files (default: package skills)
+  --top N          — emit only top-N ranked skills (default: all non-zero)
+  --json           — machine-readable ranked output
+Output: ranked list with integer scores 0–100, descending. Ties break on name.
+Stdlib-only. ≤ 180 LOC. Embedded `_SAMPLE` for self-demo (`python3 -m … --json`).
+"""
+from __future__ import annotations
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+from typing import Dict, Iterable, List, Tuple
+ROOT = Path(__file__).resolve().parents[2]
+DEFAULT_SKILLS_DIR = ROOT / ".agent-src.uncompressed" / "skills"
+TOKEN_RE = re.compile(r"[a-z][a-z0-9]+")
+STOPWORDS = frozenset({
+    "the", "a", "an", "and", "or", "but", "of", "for", "with", "to", "in",
+    "on", "at", "by", "from", "as", "is", "are", "was", "were", "be", "been",
+    "this", "that", "these", "those", "it", "its", "use", "when", "even",
+    "via", "via:", "into", "onto", "use:", "skill", "skills", "task", "tasks",
+    "code", "file", "files", "doing", "make", "do", "go", "get", "set",
+    "not", "no", "yes", "any", "some", "all", "one", "two", "new", "old",
+    "user", "users", "our", "your", "their", "they", "we", "you", "i", "me",
+})
+def _tokenize(text: str) -> set:
+    return {t for t in TOKEN_RE.findall(text.lower()) if t not in STOPWORDS and len(t) > 2}
+def _parse_frontmatter(path: Path) -> Dict[str, object]:
+    """Minimal YAML-frontmatter reader (stdlib-only). Returns {} on parse miss."""
+    text = path.read_text(encoding="utf-8", errors="replace")
+    if not text.startswith("---"):
+        return {}
+    end = text.find("\n---", 3)
+    if end == -1:
+        return {}
+    block = text[3:end]
+    out: Dict[str, object] = {}
+    current_list_key: str | None = None
+    for raw in block.splitlines():
+        line = raw.rstrip()
+        if not line or line.startswith("#"):
+            continue
+        if current_list_key and line.startswith("  - "):
+            out.setdefault(current_list_key, []).append(line[4:].strip())  # type: ignore[union-attr]
+            continue
+        current_list_key = None
+        m = re.match(r"^([a-zA-Z_][\w-]*)\s*:\s*(.*)$", line)
+        if not m:
+            continue
+        key, val = m.group(1), m.group(2).strip()
+        if val == "":
+            current_list_key = key
+            continue
+        if val.startswith('"') and val.endswith('"'):
+            val = val[1:-1]
+        out[key] = val
+    return out
+def _load_skills(skills_dir: Path) -> List[Dict[str, object]]:
+    skills: List[Dict[str, object]] = []
+    for skill_md in sorted(skills_dir.glob("*/SKILL.md")):
+        fm = _parse_frontmatter(skill_md)
+        name = str(fm.get("name") or skill_md.parent.name)
+        desc = str(fm.get("description") or "")
+        personas = fm.get("personas") or []
+        if isinstance(personas, str):
+            personas = [personas]
+        skills.append({
+            "name": name,
+            "description": desc,
+            "personas": list(personas),
+            "terms": _tokenize(name + " " + desc),
+        })
+    return skills
+def _score(task_terms: set, skill: Dict[str, object]) -> int:
+    if not task_terms:
+        return 0
+    skill_terms = skill["terms"]  # type: ignore[index]
+    overlap = len(task_terms & skill_terms) / max(len(task_terms), 1)  # type: ignore[arg-type]
+    persona_hit = 0.0
+    task_lower = " ".join(task_terms)
+    for persona in skill["personas"]:  # type: ignore[union-attr]
+        slug = str(persona).lower()
+        if slug in task_lower or any(part in task_terms for part in slug.split("-")):  # type: ignore[operator]
+            persona_hit = 1.0
+            break
+    return round(overlap * 70 + persona_hit * 30)
+def rank(task: str, skills_dir: Path) -> List[Tuple[str, int, List[str]]]:
+    task_terms = _tokenize(task)
+    skills = _load_skills(skills_dir)
+    rows: List[Tuple[str, int, List[str]]] = []
+    for s in skills:
+        score = _score(task_terms, s)
+        if score > 0:
+            rows.append((str(s["name"]), score, list(s["personas"])))  # type: ignore[arg-type]
+    rows.sort(key=lambda r: (-r[1], r[0]))
+    return rows
+def _print_human(rows: Iterable[Tuple[str, int, List[str]]], top: int | None) -> None:
+    rows = list(rows)
+    if top:
+        rows = rows[:top]
+    if not rows:
+        print("(no relevant skills found)")
+        return
+    width = max(len(r[0]) for r in rows)
+    for name, score, personas in rows:
+        persona_str = ", ".join(personas) if personas else "—"
+        print(f"  {score:3d}  {name:<{width}}  {persona_str}")
+def main(argv: List[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0])
+    parser.add_argument("--task", required=False, default="",
+                        help="task description (required unless --sample is used)")
+    parser.add_argument("--skills-dir", default=str(DEFAULT_SKILLS_DIR),
+                        help="directory holding SKILL.md files")
+    parser.add_argument("--top", type=int, default=0, help="emit only top-N rows")
+    parser.add_argument("--json", action="store_true", help="emit JSON instead of text")
+    parser.add_argument("--sample", action="store_true", help="run against the embedded sample task")
+    args = parser.parse_args(argv)
+    task = _SAMPLE["task"] if args.sample else args.task
+    if not task:
+        parser.error("--task is required (or pass --sample)")
+    rows = rank(task, Path(args.skills_dir))
+    if args.json:
+        payload = [{"name": n, "score": s, "personas": p} for n, s, p in (rows[:args.top] if args.top else rows)]
+        json.dump({"task": task, "ranked": payload}, sys.stdout, indent=2)
+        sys.stdout.write("\n")
+    else:
+        _print_human(rows, args.top or None)
+    return 0
+_SAMPLE = {"task": "build a livewire component for the user dashboard with reactive state"}
+if __name__ == "__main__":
+    raise SystemExit(main())

package/scripts/skill_tools/suggest_skill_for_task.py ADDED Viewed

@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""Block D · D4 — suggest_skill_for_task.
+CLI wrapper that combines D2 (`score_skill_relevance`) with the persona
+matrix from D3 (`audit_persona_coverage`) and emits the top-3 skill +
+persona combos with a one-line justification each.
+Inputs:
+  --task TEXT        — task description (required)
+  --skills-dir DIR   — SKILL.md directory
+  --personas-dir DIR — persona Markdown directory
+  --top N            — emit top-N combos (default: 3)
+  --json             — machine-readable output
+Output: ranked combos with `skill`, `score`, `personas[]`, and `why`.
+Stdlib-only. ≤ 100 LOC. Embedded `_SAMPLE` for self-demo via `--sample`.
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Dict, List
+from skill_tools.audit_persona_coverage import audit  # type: ignore
+from skill_tools.score_skill_relevance import (  # type: ignore
+    DEFAULT_SKILLS_DIR,
+    rank,
+)
+ROOT = Path(__file__).resolve().parents[2]
+DEFAULT_PERSONAS = ROOT / ".agent-src.uncompressed" / "personas"
+def _persona_status(rows: List[Dict[str, object]]) -> Dict[str, str]:
+    return {str(r["persona"]): str(r["status"]) for r in rows}
+def _justify(name: str, score: int, personas: List[str],
+             status: Dict[str, str]) -> str:
+    if score >= 70:
+        head = "high keyword + persona match"
+    elif score >= 40:
+        head = "strong keyword overlap"
+    else:
+        head = "partial overlap — confirm with reviewer"
+    if personas:
+        tier_hits = ", ".join(
+            f"{p} ({status.get(p, 'unknown')})" for p in personas
+        )
+        return f"{head}; lenses: {tier_hits}"
+    return f"{head}; no persona declared on `{name}`"
+def suggest(task: str, skills_dir: Path, personas_dir: Path,
+            top: int = 3) -> List[Dict[str, object]]:
+    ranked = rank(task, skills_dir)[:top]
+    persona_rows = audit(skills_dir, personas_dir)
+    status = _persona_status(persona_rows)
+    return [
+        {
+            "skill": name,
+            "score": score,
+            "personas": personas,
+            "why": _justify(name, score, personas, status),
+        }
+        for name, score, personas in ranked
+    ]
+def _print_human(combos: List[Dict[str, object]]) -> None:
+    if not combos:
+        print("(no skill suggestions for this task)")
+        return
+    for i, c in enumerate(combos, 1):
+        personas = ", ".join(c["personas"]) if c["personas"] else "—"  # type: ignore[arg-type]
+        print(f"  {i}. {c['skill']}  ({c['score']}/100)")
+        print(f"     personas: {personas}")
+        print(f"     why: {c['why']}")
+def main(argv: List[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0])
+    parser.add_argument("--task", default="",
+                        help="task description (required unless --sample)")
+    parser.add_argument("--skills-dir", default=str(DEFAULT_SKILLS_DIR))
+    parser.add_argument("--personas-dir", default=str(DEFAULT_PERSONAS))
+    parser.add_argument("--top", type=int, default=3)
+    parser.add_argument("--json", action="store_true",
+                        help="emit JSON instead of text")
+    parser.add_argument("--sample", action="store_true",
+                        help="run against the embedded sample task")
+    args = parser.parse_args(argv)
+    task = _SAMPLE["task"] if args.sample else args.task
+    if not task:
+        parser.error("--task is required (or pass --sample)")
+    combos = suggest(task, Path(args.skills_dir),
+                     Path(args.personas_dir), args.top)
+    if args.json:
+        json.dump({"task": task, "suggestions": combos},
+                  sys.stdout, indent=2)
+        sys.stdout.write("\n")
+    else:
+        _print_human(combos)
+    return 0
+_SAMPLE = {"task": "review a livewire component for accessibility and reactive state"}
+if __name__ == "__main__":
+    raise SystemExit(main())