npm - @event4u/agent-config - Versions diffs - 6.0.0 → 6.1.0 - Mend

@event4u/agent-config 6.0.0 → 6.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (378) hide show

package/dist/agent-src/scripts/archive_completed_roadmaps.py ADDED Viewed

@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+"""Archive completed roadmaps — the PR-gate (council 2026-06-14).
+A roadmap that has reached ``count_open == 0`` and ``count_deferred == 0`` is
+**complete**. This sweep moves it to ``agents/roadmaps/archive/``, rewrites
+inbound references (``agents/roadmaps/<x>.md`` → ``agents/roadmaps/archive/<x>.md``)
+across tracked files so links never break, and regenerates the dashboard.
+It replaces the old **merge-gate** (keep one item open + a manual post-merge
+archival step that got forgotten — leaving finished roadmaps to rot in ``main``)
+with a deterministic **PR-gate**: ``/create-pr`` runs this before the PR is
+created, so the roadmap lands already-archived in the PR and merges clean.
+Default ``--changed-only``: only archive roadmaps that appear in this branch's
+history since it diverged from ``origin/main`` (``git log origin/main..HEAD``),
+so a PR archives exactly the roadmaps it completed — never an unrelated complete
+roadmap. ``--all`` archives every complete active roadmap. No agent-set
+annotation is required — completion is detected from the checkbox counts.
+Usage:
+    python3 scripts/archive_completed_roadmaps.py            # --changed-only (default)
+    python3 scripts/archive_completed_roadmaps.py --all
+    python3 scripts/archive_completed_roadmaps.py --base origin/main --dry-run
+"""
+from __future__ import annotations
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+import update_roadmap_progress as urp  # noqa: E402
+def _run(cmd: list[str], cwd: Path) -> subprocess.CompletedProcess:
+    return subprocess.run(cmd, cwd=cwd, capture_output=True, text=True)
+def _repo_root() -> Path:
+    cp = _run(["git", "rev-parse", "--show-toplevel"], Path.cwd())
+    return Path(cp.stdout.strip()) if cp.returncode == 0 else Path.cwd()
+def _branch_touched_paths(root: Path, base: str) -> set[str] | None:
+    """Repo-relative paths touched in any commit since divergence from base.
+    Returns None when the base ref is unavailable (e.g. a shallow clone or a
+    detached state) — callers treat None as "cannot scope, fall back to --all".
+    """
+    cp = _run(["git", "log", f"{base}..HEAD", "--name-only",
+               "--pretty=format:"], root)
+    if cp.returncode != 0:
+        return None
+    return {line.strip() for line in cp.stdout.splitlines() if line.strip()}
+def _inbound_ref_rewrite(root: Path, old_rel: str, new_rel: str,
+                         dry_run: bool) -> list[str]:
+    """Rewrite full-path references ``old_rel`` → ``new_rel`` in tracked files.
+    Only the exact repo-relative path is rewritten (bare-filename mentions like
+    ``road-to-x.md`` are left alone — they do not resolve as links and do not
+    break). The archived file's own path never matches because the search string
+    is the un-archived path.
+    """
+    grep = _run(["git", "grep", "-l", "--", old_rel], root)
+    changed: list[str] = []
+    if grep.returncode != 0:  # 1 = no matches, fine
+        return changed
+    for rel in grep.stdout.splitlines():
+        rel = rel.strip()
+        if not rel or rel == old_rel:  # skip the roadmap file itself
+            continue
+        fp = root / rel
+        try:
+            text = fp.read_text(encoding="utf-8")
+        except OSError:
+            continue
+        if old_rel not in text:
+            continue
+        if not dry_run:
+            fp.write_text(text.replace(old_rel, new_rel), encoding="utf-8")
+        changed.append(rel)
+    return changed
+def _git_mv(root: Path, src_rel: str, dst_rel: str, dry_run: bool) -> bool:
+    dst = root / dst_rel
+    if not dry_run:
+        dst.parent.mkdir(parents=True, exist_ok=True)
+        cp = _run(["git", "mv", src_rel, dst_rel], root)
+        return cp.returncode == 0
+    return True
+def archive_completed(root: Path, *, changed_only: bool, base: str,
+                      dry_run: bool) -> list[dict]:
+    """Archive every complete active roadmap (count_open==0, count_deferred==0).
+    Returns a list of ``{roadmap, archived_to, refs_migrated}`` records.
+    """
+    roadmap_root = root / "agents" / "roadmaps"
+    if not roadmap_root.is_dir():
+        return []
+    touched = _branch_touched_paths(root, base) if changed_only else None
+    # changed_only requested but base unavailable → conservative: archive nothing
+    # rather than sweep unrelated roadmaps.
+    if changed_only and touched is None:
+        print(f"  ⚠️  cannot resolve `{base}` — skipping the changed-only "
+              "archival sweep (run with --all to force).", file=sys.stderr)
+        return []
+    archived: list[dict] = []
+    for stats in urp.collect(roadmap_root):
+        if stats.open_ != 0 or stats.deferred != 0:
+            continue  # not complete
+        old_rel = f"agents/roadmaps/{stats.rel}"
+        if changed_only and old_rel not in touched:
+            continue  # complete, but not this branch's work
+        new_rel = f"agents/roadmaps/archive/{stats.rel}"
+        if not _git_mv(root, old_rel, new_rel, dry_run):
+            print(f"  ⚠️  git mv failed for {old_rel}", file=sys.stderr)
+            continue
+        refs = _inbound_ref_rewrite(root, old_rel, new_rel, dry_run)
+        if not dry_run and refs:
+            _run(["git", "add", "--", *refs], root)
+        archived.append({"roadmap": old_rel, "archived_to": new_rel,
+                         "refs_migrated": refs})
+    return archived
+def _regen_dashboard(root: Path, dry_run: bool) -> None:
+    if dry_run:
+        return
+    script = Path(__file__).resolve().parent / "update_roadmap_progress.py"
+    _run([sys.executable, str(script)], root)
+    dash = root / "agents" / "roadmaps-progress.md"
+    if dash.is_file():
+        _run(["git", "add", "--", "agents/roadmaps-progress.md"], root)
+def main(argv: list[str] | None = None) -> int:
+    ap = argparse.ArgumentParser(description=__doc__)
+    ap.add_argument("--all", action="store_true",
+                    help="Archive every complete active roadmap (not only "
+                         "those touched in this branch).")
+    ap.add_argument("--base", default="origin/main",
+                    help="Base ref for the changed-only scope (default origin/main).")
+    ap.add_argument("--dry-run", action="store_true",
+                    help="Report what would be archived; touch nothing.")
+    ns = ap.parse_args(argv)
+    root = _repo_root()
+    archived = archive_completed(root, changed_only=not ns.all,
+                                 base=ns.base, dry_run=ns.dry_run)
+    if not archived:
+        print("  ℹ️  No completed roadmaps to archive.")
+        return 0
+    _regen_dashboard(root, ns.dry_run)
+    verb = "Would archive" if ns.dry_run else "Archived"
+    for rec in archived:
+        print(f"  ✅  {verb}: {rec['roadmap']} → {rec['archived_to']}"
+              + (f"  ({len(rec['refs_migrated'])} ref(s) migrated)"
+                 if rec["refs_migrated"] else ""))
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

package/dist/agent-src/skills/adversarial-review/SKILL.md CHANGED Viewed

@@ -29,6 +29,11 @@ Do NOT use when:
 ## Procedure: Adversarial review
+1. **Inspect the artifact** — Read the plan, diff, or draft you are about to critique; note its scope, assumptions, and the explicit asks before attacking.
+2. **Attack** — Run Step 1 below as the grumpy senior engineer.
+3. **Defend** — Run Step 2 as the balanced engineer; classify each criticism as must-fix / defer / reject.
+4. **Revise** — Run Step 3 to fold valid fixes back in and surface only the trade-offs the user needs to decide.
 ### Step 1: Attack (Grumpy Senior Engineer)
 Assume your plan/fix is flawed. Ask yourself:
@@ -112,6 +117,15 @@ Only surface trade-offs or concerns that need the user's input.
 - **api-design** — review API design for consistency and breaking changes.
 - **security** — review security-sensitive changes for attack surface.
+## RDP: fresh-context verifier as the default gate (structural)
+Within the Reasoning Discipline Protocol the fresh-context verifier subagent is
+the **default** final gate — but, because it is a full extra inference pass, it
+fires only on the **structural-complexity** signal: ≥ 2 of {branching/conditional
+logic, ≥ 3 explicit must/must-not constraints, stateful operations,
+irreversibility} **and** estimated work ≥ ~1k tokens. Token length alone never
+triggers it. See [`rdp-gate`](../../contexts/execution/rdp-gate.md) (L12).
 ## Auto-trigger keywords
 - adversarial review

package/dist/agent-src/skills/agent-security-review/SKILL.md ADDED Viewed

@@ -0,0 +1,113 @@
+---
+model_tier: high
+name: agent-security-review
+description: "Use for an adversarial red-team / blue-team / auditor review of an AI agent's CONFIG + behaviour (rules, skills, MCP, hooks, permissions) — attack-chain → defensive-gap list, not a code audit."
+personas:
+  - security-engineer
+domain: quality
+council_depth: deep
+workspaces:
+  - engineering
+packs:
+  - engineering-base
+---
+# agent-security-review
+Adversarial review of an **agent's configuration + behaviour** — the trust
+anchor, not the app code. Where [`threat-modeling`](../threat-modeling/SKILL.md)
+models a code change and [`security-audit`](../security-audit/SKILL.md) hunts
+code vulns, this asks: given this assembled config (rules, skills, MCP, hooks,
+permissions, memory), how would an attacker turn it against its owner, and what
+gap lets them?
+Pairs the static signal from `/security-audit-config` with a three-lens
+adversarial pass. Output is **decision support** — surface the trade-off, name
+the gap; the human decides.
+## When to use
+- "Is my agent setup safe / could this be weaponised".
+- Before trusting a third-party skill pack, MCP server, or rules file.
+- Periodic posture review of a fleet's agent config.
+- Any `D`/`F` category from `/security-audit-config` needing depth.
+## Procedure
+### 1. Inventory + inspect the attack surface
+Inspect the config the agent loads and check each surface: instruction files
+(CLAUDE.md / AGENTS.md / .cursor/rules / copilot-instructions), installed skills
++ their `allowed-tools`, MCP servers + tool descriptions, hooks + lifecycle
+scripts, permission/auto-approve settings, persistent memory. Static pass first:
+```bash
+python3 src/scripts/security_audit_config.py --root <repo> --json
+```
+### 2. Red team (attacker lens)
+Per surface, construct concrete **attack chains** grounded in known classes:
+- Rules-file backdoor — hidden-Unicode / suppression instruction in a loaded file.
+- MCP tool-poisoning / rug-pull — malicious or mutated tool description.
+- Lethal trifecta — a path reading private data, ingesting untrusted content,
+  AND able to communicate externally.
+- Consent bypass — `bypassPermissions`, `Bash(*)`, auto-approve, `npx -y`.
+- Memory / context poisoning — a planted instruction firing later.
+Name the chain: *entry → mechanism → impact*. Be specific (which file, tool).
+### 3. Blue team (defender lens)
+Per chain, evaluate existing defences: are the always-on rules
+([`untrusted-input-defense`](../../rules/untrusted-input-defense.md),
+[`lethal-trifecta-guard`](../../rules/lethal-trifecta-guard.md),
+[`non-destructive-by-default`](../../rules/non-destructive-by-default.md)) in
+force? Egress gated? Untrusted leg quarantined? Note present vs **absent**.
+### 4. Auditor (synthesis)
+Pair each chain with its gap, prioritise (likelihood × impact). For the hardest
+calls run [`ai-council`](../ai-council/SKILL.md) (`council_depth: deep`) +
+[`judge-security-auditor`](../judge-security-auditor/SKILL.md) over flagged
+files. Produce a ranked **attack-chain → gap → recommended control** table.
+## Output
+A prioritised table — `attack chain | defensive gap | OWASP ASI | recommended control | confidence` —
+prefixed with the trust-and-safety banner (advisory security output):
+```
+> HUMAN REVIEW REQUIRED — adversarial agent-config review. Findings are
+> decision support, not a guarantee; detection is probabilistic. Validate
+> each chain before acting.
+```
+Recommend controls; never auto-apply config changes (per
+[`scope-control`](../../rules/scope-control.md)).
+## Gotcha
+- **Clean static score ≠ safe.** The worst chains (rug-pull MCP tool whose
+  description mutates post-approval, a lethal-trifecta path across three
+  individually-fine skills) leave no single linter hit — only the red-team lens
+  (step 2) **inspects** how surfaces compose. Always run the adversarial pass.
+- **Tool descriptions are part of the surface.** Reading only config files and
+  skipping each MCP server's live tool descriptions misses tool-poisoning.
+- **The reviewer is not the fixer.** Emitting a config patch turns advisory
+  review into an unreviewed change — recommend, hand back.
+## Do NOT
+- Do NOT treat a clean static score as proof of safety — the red-team lens finds
+  chains the linters cannot see.
+- Do NOT block or "fix" the consumer's config autonomously — surface + recommend.
+- Do NOT re-audit application code here — that is `security-audit` / `threat-modeling`.
+- Do NOT omit the HUMAN REVIEW REQUIRED banner.
+## See also
+- `/security-audit-config` — the static A–F counterpart.
+- [`untrusted-input-defense`](../../rules/untrusted-input-defense.md), [`lethal-trifecta-guard`](../../rules/lethal-trifecta-guard.md) — the prevention rules.
+- [`threat-modeling`](../threat-modeling/SKILL.md), [`judge-security-auditor`](../judge-security-auditor/SKILL.md), [`ai-council`](../ai-council/SKILL.md).

package/dist/agent-src/skills/agent-security-review/evals/triggers.json ADDED Viewed

@@ -0,0 +1,51 @@
+{
+    "skill": "agent-security-review",
+    "description": "5 should-trigger + 5 should-not-trigger queries. Should-trigger covers DE + EN phrasings for adversarial review of an agent's CONFIG/behaviour (rules, skills, MCP, hooks, permissions). Should-not-trigger covers the near-miss neighbours (code threat-model, code security-audit, the static config-audit command, privacy review, dependency CVE scan) whose vocabulary overlaps.",
+    "queries": [
+        {
+            "q": "red-team my agent setup — could someone weaponise my CLAUDE.md or MCP servers?",
+            "trigger": true
+        },
+        {
+            "q": "do an adversarial security review of this agent config and its skills",
+            "trigger": true
+        },
+        {
+            "q": "is this third-party skill pack safe to install, attacker's view?",
+            "trigger": true
+        },
+        {
+            "q": "prüfe meine Agent-Konfiguration adversarial auf Angriffsketten",
+            "trigger": true
+        },
+        {
+            "q": "what could an attacker do with my rules files, hooks and tool permissions?",
+            "trigger": true
+        },
+        {
+            "q": "threat-model this new payments endpoint before I build it",
+            "trigger": false,
+            "note": "code change threat model → threat-modeling"
+        },
+        {
+            "q": "find vulnerabilities in my application code",
+            "trigger": false,
+            "note": "code vulnerability hunt → security-audit"
+        },
+        {
+            "q": "give me an A-F score for my agent config",
+            "trigger": false,
+            "note": "static scored audit → /security-audit-config command, not the adversarial skill"
+        },
+        {
+            "q": "are we GDPR compliant with this data flow?",
+            "trigger": false,
+            "note": "regulatory-regime read → privacy-review"
+        },
+        {
+            "q": "scan my dependencies for known CVEs",
+            "trigger": false,
+            "note": "dependency CVE audit → security-audit dependency pass"
+        }
+    ]
+}

package/dist/agent-src/skills/ai-council/SKILL.md CHANGED Viewed

@@ -616,7 +616,7 @@ member can refine, agree, or push back on the previous critique
 without seeing which provider produced which point.
 The default round count comes from `defaults.min_rounds` in
-`agents/settings/.ai-council.yml` (default `2` so members critique each other
+`~/.event4u/agent-config/settings/.ai-council.yml` (default `2` so members critique each other
 at least once before convergence). The host agent does **not** ask
 "how many rounds?" when the requested count is `<= min_rounds` —
 the settings owner already made that decision. Ask only when a
@@ -738,7 +738,7 @@ Activation — two equivalent paths:
 * CLI: `--peer-review` on `council:estimate` or `council:run`.
 * Config: `ai_council.peer_review.enabled: true` in
-  `agents/settings/.ai-council.yml`. Default is `false`.
+  `~/.event4u/agent-config/settings/.ai-council.yml`. Default is `false`.
 Mechanics:
@@ -782,7 +782,7 @@ swaps.
 | **Outsider** | `openai` | naive-but-sharp questions, beginner's-mind probes |
 | **Executor** | `anthropic` | what ships this quarter, what blocks delivery |
-Activation — edit `agents/settings/.ai-council.yml` and flip the advisor's
+Activation — edit `~/.event4u/agent-config/settings/.ai-council.yml` and flip the advisor's
 `enabled: true`. Optional `model: <name>` overrides the bound
 member's default model. An advisor referencing a disabled member
 fails closed at config load — never silently skipped.

package/dist/agent-src/skills/async-python-patterns/SKILL.md CHANGED Viewed

@@ -146,7 +146,7 @@ A single blocking call (sync I/O, time.sleep, CPU-heavy parse, large JSON load)
 ## Provenance
-- Adopted from: `Microck/ordinary-claude-skills@8f5c83174f7aa683b4ddc7433150471983b93131:skills_all/async-python-patterns/SKILL.md` (MIT, © 2025 Microck) — **Sunset Policy applied**: 694-line cookbook source reduced to a ~140-line decision framework; pattern catalogues externalized to upstream docs below.
+- Adopted from: an external reference (MIT, © 2025 an external reference) — **Sunset Policy applied**: 694-line cookbook source reduced to a ~140-line decision framework; pattern catalogues externalized to upstream docs below.
 - Externalized cookbook:
   - asyncio core: https://docs.python.org/3/library/asyncio.html · https://docs.python.org/3/library/asyncio-task.html
   - TaskGroup (3.11+): https://docs.python.org/3/library/asyncio-task.html#task-groups

package/dist/agent-src/skills/blast-radius-analyzer/SKILL.md CHANGED Viewed

@@ -63,7 +63,7 @@ Run grep/search for the exact symbol, column, or event name. Enumerate:
 | DB references | Foreign keys, indexes, views, triggers on the column |
 | Config / docs | YAML, JSON, Markdown that name the symbol |
-### 3. Inspect indirect deps
+### 3. Inspect indirect dependencies
 For each direct dependency, identify second-order fan-out:
@@ -89,16 +89,17 @@ For every dependency, mark:
 ### 5. Consult engineering memory
 Via [`memory-access`](../../../docs/guidelines/agent-infra/memory-access.md) call
-`retrieve(types=["architecture-decisions", "ownership"],
+`retrieve(types=["ownership"],
 keys=<changed paths + changed symbol>, limit=5)`. Surface:
-- **Architecture decisions** that constrain the planned change — cite
-  `id` and the decision verbatim so the report is self-auditing.
+- **Architecture decisions** that constrain the planned change — check the
+  ADR index [`docs/decisions/INDEX.md`](../../../docs/decisions/INDEX.md) and
+  cite the ADR number + the decision verbatim so the report is self-auditing.
 - **Ownership** matches — add these as `owner hint` candidates when
   the direct grep had no result.
 Memory entries are supplementary, never authoritative: a grep miss is
-still a grep miss. Do not infer deps from memory alone.
+still a grep miss. Do not infer dependencies from memory alone.
 ## Validation
@@ -109,8 +110,8 @@ Before finalizing the report, confirm:
 3. Second-order fan-out is bounded — any runaway chain is flagged, not expanded
 4. Every `external` reach has at least one named owner hint or an explicit
    "owner unknown — ask"
-5. You have NOT invented deps that grep did not find
-6. You have NOT merged direct and indirect deps — they are listed separately
+5. You have NOT invented dependencies that grep did not find
+6. You have NOT merged direct and indirect dependencies — they are listed separately
 ## Output format
@@ -150,10 +151,10 @@ Open questions:
 Required fields (ordered):
 1. **Skill** and **Change** — one-line edit summary
-2. **Direct deps** — grouped by class, each with file:line citations and exact counts
-3. **Indirect deps** — 2nd-order only, bounded
+2. **Direct dependencies** — grouped by class, each with file:line citations and exact counts
+3. **Indirect dependencies** — 2nd-order only, bounded
 4. **Reach summary** — counts per reach level
-5. **Risk surfaces** — deps grouped by risk type
+5. **Risk surfaces** — dependencies grouped by risk type
 6. **Open questions** — unresolved items with grep evidence
 Runtime confirmation (e.g. *"actually run the test suite to see what breaks"*,
@@ -179,7 +180,7 @@ does not execute code, run tests, or touch the network**.
 * NEVER return `safe` out of politeness when external reach exists — mark it clearly
 * NEVER silently fall back to "module-level impact" when grep shows cross-module callers
 * NEVER claim a dependency without a file:line citation from grep output
-* NEVER chase deps past 2nd order without explicit scope approval — flag and stop
+* NEVER chase dependencies past 2nd order without explicit scope approval — flag and stop
 ## References

package/dist/agent-src/skills/command-routing/SKILL.md CHANGED Viewed

@@ -80,7 +80,7 @@ agents should not bypass the dispatcher.
 ## GitHub API: Replying to PR review comments
-When commands reply to PR review comments (e.g. `/fix-pr-bot-comments`):
+When commands reply to PR review comments (e.g. `/fix-pr-comments`):
 ### 1. Read the setting

package/dist/agent-src/skills/complexity-first-planning/SKILL.md ADDED Viewed

@@ -0,0 +1,96 @@
+---
+name: complexity-first-planning
+description: "Use when staging multi-component or uncertain work — tackle the load-bearing unknown first (risk-first decomposition), not the easy parts first."
+source: package
+domain: engineering
+status: active
+model_tier: medium
+tier: senior
+context_spine: [repo]
+workspaces:
+  - agent-config-maintainer
+packs:
+  - meta
+---
+# complexity-first-planning
+Part of the Reasoning Discipline Protocol. Engage per
+[`rdp-gate`](../../contexts/execution/rdp-gate.md) (skip on trivial / linear
+tasks; light touch on a strong-reasoning host).
+> **Provenance.** This is an **RDP derivation from general engineering discipline
+> (risk-first / critical-path / pre-mortem)** — it is **not** an Anthropic-
+> documented Fable behavior. Fable's "start at the top of your difficulty range"
+> is about *task selection* (give the model harder tasks), not intra-task order.
+> The skill stands on its own merit; it is not sold as a frontier-model transplant.
+## When to use
+- Staging multi-component work where the hardest/most-uncertain part is not yet proven.
+- A plan whose later steps depend on an assumption that could collapse.
+Do NOT use for single-step, linear, or fully-specified tasks (no load-bearing
+unknown to resolve), or when the user has already fixed the sequence.
+## When the agent should load this
+- The user asks to "plan", "break down", or "stage" work that spans ≥2 components
+  and at least one part is unproven.
+- A multi-step plan is forming whose later steps assume something untested.
+- Mid-task: a step just failed because an earlier, easier step baked in a wrong
+  assumption — reload this and re-sequence risk-first.
+## Procedure
+1. **Inspect and name the unknowns.** Read the affected components first, then
+   list which carry real uncertainty (technical feasibility, an unverified
+   integration, an ambiguous requirement) — analyze the existing system before
+   planning any change.
+2. **Assess and rank by load-bearing risk.** The load-bearing unknown is the one
+   whose failure invalidates the most dependent work — not the one that is merely hard.
+3. **Resolve it first.** Spike / probe / prototype the load-bearing unknown
+   before building anything that depends on it. Record the result in the notes
+   file (see [`notes-first-reasoning`](../../rules/notes-first-reasoning.md)):
+   prediction → result → lesson.
+4. **Cascade.** Once the riskiest assumption holds (or is corrected), sequence
+   the dependent work. If it fails, the cheap early failure saved the rework.
+## Output
+A short ordered plan that leads with the load-bearing unknown + how it will be
+proven, then the dependent steps. One recommendation, not a survey.
+## Do NOT
+- Build the easy parts first to show progress, then discover the hard part breaks them.
+- Treat "hardest" as "most code" — rank by *dependency blast radius*, not effort.
+- Over-plan a strong-reasoning host (it sequences risk natively — keep it light).
+## Gotchas
+- **Mistaking effort for risk.** A 400-line but well-understood refactor is *low*
+  load-bearing risk; a 5-line call into an unverified third-party API is *high*.
+  Ranking by size instead of dependency blast radius is the classic failure.
+- **"Resolved on paper".** Reasoning that the unknown "should work" is not
+  resolving it — the spike must actually run / compile / return before dependent
+  work starts. Record prediction → result, not prediction → assumption.
+- **Spike sprawl.** The probe answers exactly one question (does the load-bearing
+  assumption hold?), then stops. Turning it into the real implementation defeats
+  the cheap-early-failure purpose.
+## Related Skills
+**WHEN to use this**
+- Staging multi-component work where the hardest / most-uncertain part is unproven.
+- A plan whose later steps rest on an assumption that could collapse.
+**WHEN NOT to use this**
+- Single-step, linear, or fully-specified work — no load-bearing unknown to
+  resolve; the [`rdp-gate`](../../contexts/execution/rdp-gate.md) filters these.
+- Breaking a feature into tasks in general — route to
+  [`feature-planning`](../feature-planning/SKILL.md), which composes this skill.
+- Recording the spike's prediction / result / lesson — that belongs in
+  [`notes-first-reasoning`](../../rules/notes-first-reasoning.md).

package/dist/agent-src/skills/complexity-first-planning/evals/triggers.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "skill": "complexity-first-planning",
+  "description": "5 should-trigger + 5 should-not-trigger. Should-trigger covers multi-component / uncertain work where a load-bearing unknown must be resolved first; should-not covers trivial / linear / fully-sequenced work and neighbors (feature breakdown, simple tests).",
+  "queries": [
+    {"q": "build a multi-tenant reporting pipeline with caching and access control — where do I start?", "trigger": true},
+    {"q": "we need to migrate auth to OAuth and we're not sure the legacy token format maps cleanly", "trigger": true},
+    {"q": "stage this feature so we fail fast on the riskiest part", "trigger": true},
+    {"q": "the plan depends on the new vector DB actually handling our query latency — sequence the work", "trigger": true},
+    {"q": "which part of this 6-step build should we tackle first?", "trigger": true},
+    {"q": "add a unit test for the existing slugify() helper", "trigger": false, "note": "single linear step — no load-bearing unknown"},
+    {"q": "rename this method across the codebase", "trigger": false, "note": "mechanical, fully specified"},
+    {"q": "break this epic into tickets", "trigger": false, "note": "feature-planning breakdown, not risk-first ordering"},
+    {"q": "fix the typo in the README", "trigger": false, "note": "trivial"},
+    {"q": "implement steps 1 through 4 in the order I listed", "trigger": false, "note": "user already fixed the sequence"}
+  ]
+}

package/dist/agent-src/skills/copilot-config/SKILL.md CHANGED Viewed

@@ -15,7 +15,7 @@ packs:
 Use this skill when:
 - Editing `.github/copilot-instructions.md` to improve Copilot behavior
-- Dealing with Copilot PR review comments (via `/fix-pr-bot-comments`)
+- Dealing with Copilot PR review comments (via `/fix-pr-comments`)
 - Analyzing Copilot's review patterns to identify recurring false positives
 - Tuning Copilot's code suggestions for the project
@@ -96,7 +96,7 @@ Before creating a comment, Copilot must:
 ## Handling Copilot Bot Comments (as Augment Agent)
-When the user asks to fix Copilot's PR review comments (via `/fix-pr-bot-comments`):
+When the user asks to fix Copilot's PR review comments (via `/fix-pr-comments`):
 ### 1. Evaluate Each Comment
@@ -179,8 +179,7 @@ Copilot and Augment complement each other:
 ## Related
 - **File:** `.github/copilot-instructions.md` — Copilot configuration
-- **Command:** `/fix-pr-bot-comments` — fix bot review comments
-- **Command:** `/fix-pr-comments` — fix all review comments
+- **Command:** `/fix-pr-comments` — fix all review comments (bot + human)
 - **Skill:** `code-review` — PR review process and conventions

package/dist/agent-src/skills/defense-in-depth/SKILL.md CHANGED Viewed

@@ -152,6 +152,6 @@ BEFORE adding the 5th guard:
 ## Provenance
-- Adopted from: `Microck/ordinary-claude-skills@8f5c83174f7aa683b4ddc7433150471983b93131:skills_all/defense-in-depth/SKILL.md` (MIT, © 2025 Microck).
+- Adopted from: an external reference (MIT, © 2025 an external reference).
 - Provenance registry: `agents/settings/contexts/skills-provenance.yml` (entry: `defense-in-depth`).
 - Iron-Law floor: `non-destructive-by-default`, `verify-before-complete`, `skill-quality`.

package/dist/agent-src/skills/developer-like-execution/SKILL.md CHANGED Viewed

@@ -133,11 +133,12 @@ If important information is missing:
 - Identify likely cause and smallest correct change
 - **Consult memory — invariants and prior decisions.** Via
   [`memory-access`](../../../docs/guidelines/agent-infra/memory-access.md), call
-  `retrieve(types=["domain-invariants", "architecture-decisions"], keys=<touched paths>, limit=3)`.
+  `retrieve(types=["domain-invariants"], keys=<touched paths>, limit=3)`.
   A matching `domain-invariant` is a hard constraint — violating it = regression,
-  surface the conflict to the user before proceeding. A matching
-  `architecture-decision` explains *why* the current shape exists; plan around
-  it, do not silently overturn it. Cite matching `id`s in the plan.
+  surface the conflict to the user before proceeding. For architectural rationale
+  (*why* the current shape exists), check the ADR index
+  [`docs/decisions/INDEX.md`](../../../docs/decisions/INDEX.md); plan around it, do
+  not silently overturn it. Cite matching `id`s / ADR numbers in the plan.
   See [`engineering-memory-data-format`](../../../docs/guidelines/agent-infra/engineering-memory-data-format.md)
   for the schema.

package/dist/agent-src/skills/error-handling-patterns/SKILL.md CHANGED Viewed

@@ -126,7 +126,7 @@ Exactly **one** layer translates internal errors to the egress format (HTTP stat
 ## Provenance
-- Adopted from: `Microck/ordinary-claude-skills@8f5c83174f7aa683b4ddc7433150471983b93131:skills_all/error-handling-patterns/SKILL.md` (MIT, © 2025 Microck) — **Sunset Policy applied**: 636-line source reduced to a ~150-line decision framework; language catalogues externalized to the upstream resources below.
+- Adopted from: an external reference (MIT, © 2025 an external reference) — **Sunset Policy applied**: 636-line source reduced to a ~150-line decision framework; language catalogues externalized to the upstream resources below.
 - Externalized catalogues:
   - Python: https://docs.python.org/3/tutorial/errors.html · https://docs.python.org/3/library/exceptions.html
   - PHP / Laravel: https://laravel.com/docs/errors · https://www.php.net/manual/en/language.exceptions.php