npm - azclaude-copilot - Versions diffs - 0.4.10 → 0.4.13 - Mend

azclaude-copilot 0.4.10 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/.claude-plugin/marketplace.json +3 -3
package/.claude-plugin/plugin.json +5 -5
package/README.md +311 -50
package/bin/cli.js +2 -2
package/package.json +2 -2
package/templates/CLAUDE.md +5 -1
package/templates/agents/constitution-guard.md +121 -0
package/templates/agents/milestone-builder.md +10 -5
package/templates/agents/orchestrator.md +23 -0
package/templates/agents/spec-reviewer.md +123 -0
package/templates/capabilities/evolution/cycle2-knowledge.md +3 -3
package/templates/capabilities/manifest.md +15 -0
package/templates/commands/add.md +23 -0
package/templates/commands/analyze.md +181 -0
package/templates/commands/audit.md +12 -0
package/templates/commands/blueprint.md +82 -2
package/templates/commands/clarify.md +160 -0
package/templates/commands/constitute.md +190 -0
package/templates/commands/copilot.md +34 -12
package/templates/commands/dream.md +32 -1
package/templates/commands/evolve.md +23 -1
package/templates/commands/fix.md +12 -0
package/templates/commands/issues.md +168 -0
package/templates/commands/refactor.md +12 -0
package/templates/commands/sentinel.md +17 -8
package/templates/commands/setup.md +41 -0
package/templates/commands/ship.md +19 -1
package/templates/commands/spec.md +196 -0
package/templates/commands/tasks.md +151 -0
package/templates/hooks/post-tool-use.js +1 -1
package/templates/skills/architecture-advisor/SKILL.md +15 -0

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "azclaude-marketplace",
-  "description": "AZCLAUDE — The operating system for Claude Code",
+  "description": "AZCLAUDE — A complete AI coding environment for Claude Code",
   "owner": {
     "name": "haytamAroui",
     "url": "https://github.com/haytamAroui"
@@ -8,8 +8,8 @@
   "plugins": [
     {
       "name": "azclaude",
-      "description": "The operating system for Claude Code. Memory, auto-tracking, agents, skills, intelligence, evolution — orchestrating Claude Code's native architecture. Zero external dependencies. One command to set up.",
-      "version": "1.0.0",
+      "description": "AZCLAUDE is a complete AI coding environment for Claude Code. It installs 27 commands, 8 auto-invoked skills, 13 specialized agents, 4 hooks, and a persistent memory system — in one command.\n\nKey features:\n• Memory across sessions — goals.md + checkpoints injected automatically before every session\n• Self-improving loop — /reflect fixes stale CLAUDE.md rules, /reflexes learns from tool-use patterns, /evolve creates agents from git evidence\n• Autonomous copilot mode — /copilot runs a three-tier team (orchestrator → problem-architect → milestone-builder) across sessions until the product ships\n• Security layer — 102-rule environment scan (/sentinel), pre-write secret blocking, pre-ship credential audit\n• Progressive levels 0–10 — start with CLAUDE.md, grow into multi-agent pipelines and self-evolving environments\n• Zero dependencies — no npm packages, no external APIs, no vector databases. Plain markdown files and Claude Code's native architecture.\n\nExample use cases:\n• /setup — scan an existing project, detect stack + domain + scale, fill CLAUDE.md, generate project-specific skills and agents automatically\n• /copilot \"Build a compliance SaaS with trilingual support\" — walk away, come back to working code across multiple sessions\n• /sentinel — run a scored security audit (0–100, grade A–F) across hooks, permissions, MCP servers, agent configs, and secrets\n• /evolve — detect gaps in the environment, generate new skills and agents from git co-change evidence, report score delta (e.g. 42/100 → 68/100)\n• /reflect — find stale, missing, or contradicting rules in CLAUDE.md and propose exact fixes\n• /snapshot — save mid-session reasoning (WHY decisions were made) so it's auto-injected at the next session start, surviving context compaction\n• /debate \"REST vs GraphQL for this project\" — adversarial evidence-based decision with order-independent scoring, logged to decisions.md",
+      "version": "0.4.12",
       "source": {
         "source": "github",
         "repo": "haytamAroui/AZ-CLAUDE",

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,17 +1,17 @@
 {
   "name": "azclaude",
-  "version": "1.0.0",
-  "description": "The operating system for Claude Code. Memory, auto-tracking, agents, skills, intelligence, evolution — orchestrating Claude Code's native architecture. Zero external dependencies. One command to set up.",
+  "version": "0.4.12",
+  "description": "AZCLAUDE is a complete AI coding environment for Claude Code. It installs 27 commands, 8 auto-invoked skills, 13 specialized agents, 4 hooks, and a persistent memory system — in one command.\n\nKey features:\n• Memory across sessions — goals.md + checkpoints injected automatically before every session\n• Self-improving loop — /reflect fixes stale CLAUDE.md rules, /reflexes learns from tool-use patterns, /evolve creates agents from git evidence\n• Autonomous copilot mode — /copilot runs a three-tier team (orchestrator → problem-architect → milestone-builder) across sessions until the product ships\n• Security layer — 102-rule environment scan (/sentinel), pre-write secret blocking, pre-ship credential audit\n• Progressive levels 0–10 — start with CLAUDE.md, grow into multi-agent pipelines and self-evolving environments\n• Zero dependencies — no npm packages, no external APIs, no vector databases. Plain markdown files and Claude Code's native architecture.\n\nExample use cases:\n• /setup — scan an existing project, detect stack + domain + scale, fill CLAUDE.md, generate project-specific skills and agents automatically\n• /copilot \"Build a compliance SaaS with trilingual support\" — walk away, come back to working code across multiple sessions\n• /sentinel — run a scored security audit (0–100, grade A–F) across hooks, permissions, MCP servers, agent configs, and secrets\n• /evolve — detect gaps in the environment, generate new skills and agents from git co-change evidence, report score delta (e.g. 42/100 → 68/100)\n• /reflect — find stale, missing, or contradicting rules in CLAUDE.md and propose exact fixes\n• /snapshot — save mid-session reasoning (WHY decisions were made) so it's auto-injected at the next session start, surviving context compaction\n• /debate \"REST vs GraphQL for this project\" — adversarial evidence-based decision with order-independent scoring, logged to decisions.md",
   "author": {
     "name": "haytamAroui",
     "url": "https://github.com/haytamAroui"
   },
-  "homepage": "https://github.com/haytamAroui/AZ-CLAUDE/blob/main/DOCS.md",
-  "repository": "https://github.com/haytamAroui/AZ-CLAUDE",
+  "homepage": "https://github.com/haytamAroui/AZ-CLAUDE-COPILOT/blob/main/DOCS.md",
+  "repository": "https://github.com/haytamAroui/AZ-CLAUDE-COPILOT",
   "license": "MIT",
   "keywords": ["memory", "setup", "agents", "commands", "context", "lazy-loading", "hooks", "domain-aware"],
   "commands": "./templates/commands/",
   "skills": "./templates/skills/",
   "agents": "./templates/agents/",
-  "hooks": "./hooks/hooks.json"
+  "hooks": "./templates/hooks/hooks.json"
 }

package/README.md CHANGED Viewed

@@ -10,9 +10,11 @@
   <p align="center">
     <a href="#install">Install</a> ·
     <a href="#the-core-idea">Core Idea</a> ·
+    <a href="#zero-setup-grows-with-your-project">Zero Setup</a> ·
     <a href="#what-you-get">What You Get</a> ·
     <a href="#memory-system">Memory</a> ·
-    <a href="#all-26-commands">Commands</a> ·
+    <a href="#self-improving-loop">Self-Improving Loop</a> ·
+    <a href="#all-27-commands">Commands</a> ·
     <a href="#autonomous-mode">Autonomous Mode</a> ·
     <a href="DOCS.md">Full Docs</a>
   </p>
@@ -35,6 +37,8 @@ Claude starts every session blind.    Claude reads goals.md before your first me
 No project conventions.               CLAUDE.md has your stack, domain, and rules.
 Repeats the same mistakes.            antipatterns.md prevents known failures.
 Forgets what was decided.             decisions.md logs every architecture choice.
+Loses reasoning mid-session.          /snapshot saves WHY — auto-injected next session.
+CLAUDE.md drifts from reality.        /reflect finds stale rules and fixes them.
 Builds the same agent repeatedly.     patterns.md encodes what worked.
 Can't work autonomously.              /copilot builds, tests, commits, ships — unattended.
 ```
@@ -43,6 +47,63 @@ One install. Any stack. Zero dependencies.
 ---
+## Zero Setup. Grows With Your Project.
+Most AI coding tools require upfront decisions: which agents to create, what prompts to write, which skills to define. You can't know that before the project exists.
+AZCLAUDE inverts this. **You start with almost nothing. The environment builds itself from evidence.**
+```bash
+azclaude-copilot setup --full   # one command. that's it.
+```
+No agent files to write. No skills to configure. No prompt engineering. `setup --full` installs 27 commands, 4 hooks, memory structure, and a manifest. The rest is generated from your actual codebase as you work.
+**What the environment looks like across sessions:**
+```
+Day 1 — /setup runs:
+  Scans your code. Detects domain + stack + scale.
+  Fills CLAUDE.md with your actual project data (no placeholders).
+  Generates 2 project-specific skills from your file patterns.
+  Creates goals.md. Hooks are wired.
+Day 2 — you just work. Hooks observe silently:
+  Every edit → breadcrumb in goals.md (automatic)
+  Every tool sequence → logged to observations.jsonl (automatic)
+  Session end → "In progress" migrates to "Done" (automatic)
+  Session start → goals.md + last checkpoint injected (automatic)
+Day 5 — /evolve reads your git history:
+  auth/ files always change together → cc-auth agent created
+  6 locale files always co-edited → i18n-sync skill created
+  No decisions made by you. Git evidence decides.
+Day 10 — /reflect reads friction logs:
+  STALE DATA — design tokens in CLAUDE.md don't match codebase
+  MISSING RULE — wrong agent routing causing silent failures
+  Proposes exact fixes. You approve. CLAUDE.md corrects itself.
+Day 30 — you finish the project:
+  Environment score: 42/100 → 91/100
+  Agents specialized to your codebase. Reflexes learned from your patterns.
+  CLAUDE.md reflects how the project actually works — not what you guessed on day 1.
+```
+**The project you finish with is not the project you started with.** Not because you configured it — because the system learned from the work.
+### How lazy loading keeps it fast
+37 capability files exist in `.claude/capabilities/`. Without discipline, every task would load all of them. Instead, `manifest.md` acts as a routing table:
+```
+CLAUDE.md → read manifest.md → load ONLY the files for this task (~380 tokens)
+```
+Claude reads the manifest (one file), finds which 1-3 capability files apply, loads only those. Adding a new agent or skill doesn't increase the cost of unrelated tasks. The environment grows without bloating context.
+---
 ## Install
 **Step 1 — Install globally from your terminal:**
@@ -51,13 +112,13 @@ One install. Any stack. Zero dependencies.
 npm install -g azclaude-copilot@latest
 ```
-**Step 2 — Run setup inside Claude Code to get the full capabilities:**
+**Step 2 — Run setup inside Claude Code:**
 ```bash
 azclaude-copilot setup --full
 ```
-That's it. Your project now has AZCLAUDE in `.claude/` — 26 commands, memory, hooks, reflexes, agents, and skills.
+That's it. Your project now has AZCLAUDE in `.claude/` — 27 commands, 4 hooks, memory, reflexes, agents, and skills.
 ```bash
 azclaude-copilot doctor   # 32 checks — verify everything is wired correctly
@@ -67,18 +128,19 @@ azclaude-copilot doctor   # 32 checks — verify everything is wired correctly
 ## What You Get
-**26 commands** · **8 auto-invoked skills** · **10 agents** · **3 hooks** · **memory across sessions** · **learned reflexes** · **self-evolving environment**
+**27 commands** · **8 auto-invoked skills** · **13 agents** · **4 hooks** · **memory across sessions** · **learned reflexes** · **self-evolving environment**
 ```
 .claude/
 ├── CLAUDE.md                 ← dispatch table: conventions, stack, routing
-├── commands/                 ← 26 slash commands (/add, /fix, /audit, /copilot...)
+├── commands/                 ← 27 slash commands (/add, /fix, /copilot, /sentinel...)
 ├── skills/                   ← 8 skills (test-first, security, architecture-advisor...)
-├── agents/                   ← 10 agents (orchestrator, code-reviewer, test-writer...)
+├── agents/                   ← 13 agents (orchestrator, code-reviewer, security-auditor...)
 ├── capabilities/             ← 37 files, lazy-loaded via manifest.md (~380 tokens/task)
 ├── hooks/
-│   ├── post-tool-use.js      ← writes breadcrumb to goals.md on every edit
 │   ├── user-prompt.js        ← injects goals.md + checkpoint before your first message
+│   ├── pre-tool-use.js       ← blocks hardcoded secrets before any file write
+│   ├── post-tool-use.js      ← writes breadcrumb to goals.md on every edit
 │   └── stop.js               ← migrates In-progress → Done, trims, resets counter
 └── memory/
     ├── goals.md              ← rolling ledger of what changed and why
@@ -102,21 +164,40 @@ azclaude-copilot doctor   # 32 checks — verify everything is wired correctly
 Scans your codebase, detects domain + stack + scale, fills CLAUDE.md, creates goals.md, generates project-specific skills and agents. Run once. After that, every Claude Code session opens with full project context.
-### 2. `/dream` — start from an idea
+### 2. `/dream` — start from an idea, get a full environment
 ```
 /dream "Build a compliance SaaS — FastAPI, Supabase, trilingual"
 ```
-Structured intake → environment scan → builds CLAUDE.md, memory, skills, agents, milestones level by level. If you have a non-developer domain (compliance, finance, medical, legal), it generates a domain-specific advisor skill with decision matrices automatically.
+One command builds everything from scratch:
+```
+Phase 1: Asks 4 questions (what, stack, who uses it, what's out of scope)
+Phase 2: Scans existing environment — won't regenerate what already exists
+Phase 3: Builds level by level:
+         L1 → CLAUDE.md (project rules + routing)
+         L2 → MCP config
+         L3 → Skills (project-specific commands)
+         L4 → Memory (goals.md + patterns + antipatterns)
+         L5 → Agents (specialized for your stack, from git evidence)
+         L6 → Hooks (stateful session tracking)
+Phase 3b: Domain advisor skill — auto-generated if non-dev domain detected
+          (compliance, finance, medical, legal, logistics, research, marketing)
+Phase 4: Quality gate — won't say "ready" without passing all checks
+```
+If your domain is compliance, finance, or medical — it generates a domain-specific advisor skill with decision matrices, thresholds, and anti-patterns automatically.
 ### 3. `/copilot` — walk away, come back to a product
 ```bash
 npx azclaude-copilot . "Build a compliance SaaS with trilingual support"
+# or resume existing run:
+npx azclaude-copilot .
 ```
-Restarts Claude Code sessions in a loop until `COPILOT_COMPLETE`. Each session: reads state, picks next milestone, implements, tests, commits, evolves. No human input needed.
+Node.js runner restarts Claude Code sessions in a loop until `COPILOT_COMPLETE`. Each session reads state, picks next milestone, implements, tests, commits, evolves. No human input needed.
 ### Day-to-day
@@ -127,10 +208,12 @@ Restarts Claude Code sessions in a loop until `COPILOT_COMPLETE`. Each session:
 /test             # framework detection, exit-code gate, failure classification
 /evolve           # scan for gaps, generate fixes, create agents from evidence
 /ship             # tests → secrets scan → commit → push → deploy
+/sentinel         # security scan — scored 0–100, grade A–F, 5 layers, 102 rules
 /pulse            # health check — recent changes, current level, next steps
 /debate [topic]   # adversarial decision protocol with evidence scoring
-/blueprint [plan] # read-only analysis → plan.md with milestones
-/snapshot         # save WHY you made decisions — run every 15-20 turns
+/snapshot         # save WHY you made decisions — auto-injected next session
+/reflect          # find and fix stale/missing rules in CLAUDE.md
+/reflexes         # view learned behavioral patterns with confidence scores
 ```
 ---
@@ -142,14 +225,15 @@ The core insight: **Claude needs to see two things at the start of every session
 ### How it works (zero user input)
 ```
-Every edit:  PostToolUse hook → breadcrumb appended to goals.md
+Every edit:  pre-tool-use.js  → blocks hardcoded secrets before write
+             post-tool-use.js → breadcrumb appended to goals.md
              (timestamp, file, diff stats, one-line summary)
-Session end: Stop hook → In-progress migrates to Done
+Session end: stop.js → In-progress migrates to Done
              Trims to 20 Done entries, archives overflow
              Resets counters
-Session start: UserPromptSubmit hook → injects before your first message:
+Session start: user-prompt.js → injects before your first message:
                ┌─ goals.md (capped: 30 in-progress + 20 done)
                ├─ latest checkpoint (capped at 50 lines)
                ├─ plan status: X/N done, Y in-progress, Z blocked  [copilot mode]
@@ -161,9 +245,14 @@ Session start: UserPromptSubmit hook → injects before your first message:
 ### Manual layer (you control)
 ```bash
-/snapshot     # save reasoning snapshot — WHY decisions were made
-              # every 15-20 turns on complex work
-              # auto-injected at next session start
+/snapshot     # save reasoning snapshot — captures:
+              #   • What you're doing right now (specific task, not project description)
+              #   • WHY each decision was made this session
+              #   • What you know that isn't written down yet  ← the key section
+              #   • Top 3 next actions
+              #   • Risks and open questions
+              # Run every 15–20 turns. Auto-injected at next session start.
+              # Protects against context compaction losing mid-session reasoning.
 /persist      # end-of-session: update goals.md, write session narrative
               # run before closing
@@ -205,6 +294,56 @@ AZCLAUDE_HOOK_PROFILE=strict   claude   # all + reflex guidance injection
 ---
+## Self-Improving Loop
+AZCLAUDE doesn't just remember — it learns and corrects itself. Three commands form a loop that runs every few sessions:
+```
+/reflect   →   Reads friction logs + session history
+               Finds missing rules, dead rules, vague rules, contradicting rules
+               Proposes exact CLAUDE.md edits, one finding per change
+               You approve → CLAUDE.md gets smarter
+/reflexes  →   Reads observations.jsonl (captured automatically by post-tool-use.js)
+               Finds tool sequences, file co-access, error→fix pairs, naming patterns
+               Creates confidence-scored reflex files (0.3 tentative → 0.9 near-certain)
+               Strong reflexes (≥ 0.7) feed into /add behavior automatically
+/evolve    →   Detects gaps: stale data, missing capabilities, context rot
+               Generates fixes: new skills, new agents, updated patterns
+               Quality-gates everything before merging
+               Creates agents from git evidence (not guessing)
+               Reports score delta (e.g. 42/100 → 68/100 in one cycle)
+```
+**Real example — what this loop found on a production project in one run:**
+```
+/reflect found:
+  MISSING RULE  — Wrong agent routing causing silent failures every session
+  MISSING RULE  — Domain-specific legal term (CAO 98) kept drifting back into code
+  STALE DATA    — Design tokens in CLAUDE.md were wrong hex values (not matching codebase)
+  MISSING ROUTE — Most frequent task had no slash command dispatch
+/reflexes found (from 78 observations, 3 sessions):
+  i18n-all-6-locales     (confidence 0.85) → always edit all 6 locale files atomically
+  page-tsx-read-before-edit (0.75)          → re-read before touching — changes too often
+  next-config-build-verify  (0.70)          → run tsc --noEmit after next.config.ts edits
+  vertex-assess-co-edit     (0.60)          → vertex_client.py and assess_paid.py always coupled
+/evolve found:
+  plan.md frozen at 9/9 done — actually 18 milestones, M12–M18 active
+  No i18n-sync skill despite 6-locale changes in every commit
+  eu-ai-act-engine skill had no test recipe for zero-coverage modules
+  Score: 42/100 → 68/100
+```
+All of this without human diagnosis. The system found it, proposed fixes, applied them.
+**The same loop runs on AZCLAUDE itself.** When sentinel.md had a Windows path bug and a broken agent dispatch — a real project test exposed both. AZCLAUDE diagnosed them, fixed `sentinel.md`, tests went from 1195/1197 to 1197/1197, and shipped v0.4.9.
+---
 ## Evolution System
 `/evolve` finds gaps in the environment and fixes them. Three cycles:
@@ -214,7 +353,7 @@ AZCLAUDE_HOOK_PROFILE=strict   claude   # all + reflex guidance injection
 - Generates: fixes for each gap
 - Evaluates: quality-gates before merging (syntax, self-applicability, pressure-test resilience)
-**Cycle 2 — Knowledge Consolidation** (every 3+ sessions)
+**Cycle 2 — Knowledge Consolidation** (every 2+ sessions)
 - Harvests patterns.md and sessions/ by recency + importance
 - Prunes stale entries, consolidates redundant patterns
 - Enriches agent definitions with accumulated learnings
@@ -244,6 +383,53 @@ Skills and agents that are project-generic get promoted to `~/shared-skills/`
 ---
+## Security
+Zero dependencies in `package.json`. The only external binary is `claude` (installed separately). No supply-chain risk.
+**6 layers, 4 enforcement points:**
+| Layer | Where it runs | What it blocks |
+|-------|--------------|----------------|
+| Hook integrity | Every session start | SHA-256 mismatch → hooks tampered |
+| Secret blocking | `pre-tool-use.js` — before every write | `AKIA*`, `sk-*`, `ghp_*`, `glpat-*`, `xoxb-*`, `-----BEGIN PRIVATE KEY` |
+| Prompt injection defense | `user-prompt.js` — before context injection | `curl\|bash`, `ignore previous instructions`, base64 payloads in goals.md/checkpoints |
+| Environment audit | `/sentinel` — on-demand, 102 rules | Scored 0–100, grade A–F across 5 layers |
+| Pre-ship scan | `/ship` — before every commit | Secrets in staged files, failing tests, IDE errors |
+| Agent scoping | All review agents | Reviewer/auditor agents are read-only — no Write/Edit permissions |
+### `/sentinel` — Environment Security Scan
+```bash
+/sentinel          # full scan (default)
+/sentinel --hooks  # Layer 1+2: hook integrity + permissions
+/sentinel --mcp    # Layer 3: MCP server secrets and unknown packages
+/sentinel --agents # Layer 4: prompt injection in agent files
+/sentinel --secrets # Layer 5: credentials in committed code
+```
+Produces a scored report with verdict: `BLOCKED` / `CLEAR` / `PROCEED WITH CAUTION`.
+```
+╔══════════════════════════════════════════════════╗
+║          SENTINEL — Environment Security         ║
+╚══════════════════════════════════════════════════╝
+Layer 1 — Hook Integrity       25/25   ✓ verified
+Layer 2 — Permission Audit     12/20   ⚠ Bash(rm:*) too broad
+Layer 3 — MCP Server Scan      20/20   ✓ clean
+Layer 4 — Agent Config Review  15/15   ✓ no injection found
+Layer 5 — Secrets Scan         18/20   ⚠ API key in settings
+──────────────────────────────────────────────────
+Total: 90/100   Grade: A   Verdict: CLEAR
+```
+Any hardcoded secret → `BLOCKED` — `/ship` will not proceed until resolved.
+See [SECURITY.md](SECURITY.md) for full details.
+---
 ## Intelligence Layer
 ### 8 Skills (auto-invoked — no slash command needed)
@@ -295,15 +481,16 @@ When `/dream` or `/setup` detects a non-developer domain, a domain-specific advi
 Every tool use is observed. Patterns that repeat become reflexes:
 ```yaml
-id: grep-before-edit
-trigger: "when modifying code files"
-action: "Search with Grep first, confirm with Read, then Edit"
-confidence: 0.7       # 0.3 tentative → 0.9 near-certain
-evidence_count: 8
+id: i18n-all-6-locales
+trigger: "any src/messages/*.json file is edited"
+action: "edit all 6 locale files in the same operation — never fewer"
+confidence: 0.85      # 0.3 tentative → 0.9 near-certain
+evidence_count: 6
 domain: workflow
+scope: project        # promote to global when seen in 2+ projects at ≥ 0.8
 ```
-- `PostToolUse` hook captures observations to `reflexes/observations.jsonl` automatically
+- `post-tool-use.js` captures observations to `reflexes/observations.jsonl` automatically
 - 3+ occurrences creates a reflex at confidence 0.3
 - Confidence rises with confirming observations, decays -0.02/week without use
 - Strong clusters (3+ reflexes, avg confidence > 0.7) evolve into skills or agents
@@ -343,14 +530,27 @@ Orchestrator          Problem-Architect          Milestone-Builder
 Reads plan.md    →    Analyzes milestone    →     Pre-reads all files
 Selects wave          Returns Team Spec:          Implements
 Dispatches            • agents needed             Runs tests
-Monitors              • skills to load            Self-corrects (budget)
+Monitors              • skills to load            Self-corrects (2 fix attempts)
 Triggers /evolve      • files to pre-read         Commits + reports back
-Never writes code     • Files Written (parallel safety)
+Never writes code     • Files Written (parallel
+                        safety — prevents
+                        concurrent file corruption)
                       • pre-conditions, risks
                       • complexity (SIMPLE/MEDIUM/COMPLEX)
                       Never implements
 ```
+**Self-healing protocol — every failure teaches the environment:**
+```
+Build step fails →
+  1. Re-read the exact error (not a summary)
+  2. Check antipatterns.md — seen this before?
+  3. Try alternative approach
+  4. Record what failed → antipatterns.md
+  5. Record what worked → patterns.md
+  Never fail silently.
+```
 **Copilot pipeline:**
 ```
 Session 1:  /dream → /blueprint (architect annotates milestones) → M1, M2, M3 → /snapshot
@@ -371,7 +571,7 @@ Session 4:  /evolve → /audit → /ship → COPILOT_COMPLETE
 ---
-## All 26 Commands
+## All 27 Commands
 ### Build and Ship
@@ -397,18 +597,19 @@ Session 4:  /evolve → /audit → /ship → COPILOT_COMPLETE
 |---------|-------------|
 | `/debate` | Adversarial debate with evidence scoring (AceMAD). Order-independent, length-independent. |
 | `/evolve` | Detect gaps → generate fixes → quality-gate → create agents from evidence. 3 cycles. |
+| `/sentinel` | Security scan — 5 layers, 102 rules, scored 0–100 (grade A–F). Blocks /ship on findings. |
 | `/reflexes` | View, analyze, promote learned behavioral patterns. Confidence scoring. |
+| `/reflect` | Self-improve CLAUDE.md. Reads friction logs + session history. Proposes exact rule edits. |
 | `/level-up` | Show current level (0-10), build the next one progressively. |
 | `/find` | Search across commands, `~/shared-skills/`, capabilities manifest. |
 | `/create` | Build a new command with frontmatter, trigger variants, and tests. |
-| `/reflect` | Self-improve CLAUDE.md from conversation friction and session history. |
 | `/hookify` | Generate hooks from friction patterns. 5 hook types (block / warn / remind / inject / track). |
 ### Memory and Session
 | Command | What it does |
 |---------|-------------|
-| `/snapshot` | Mid-session: WHY + decisions + what's next. Auto-injected at next session start. |
+| `/snapshot` | Mid-session: WHY + decisions + what isn't written down yet + top 3 next actions. Auto-injected next session. |
 | `/persist` | End-of-session: update goals.md, write session narrative to `sessions/`. |
 | `/pulse` | Health check — recent changes, current level, reflexes, blockers, next steps. |
 | `/explain` | Code or error to plain language. 2-3 paragraphs max. |
@@ -416,7 +617,7 @@ Session 4:  /evolve → /audit → /ship → COPILOT_COMPLETE
 ---
-## 10 Agents
+## 13 Agents
 **Framework agents** (ship with AZCLAUDE, always available):
@@ -427,7 +628,10 @@ Session 4:  /evolve → /audit → /ship → COPILOT_COMPLETE
 | `milestone-builder` | Base builder. Pre-reads all files, implements, verifies, self-corrects (fix budget), commits, reports. |
 | `orchestrator-init` | Runs once during `/setup`. Scans project, fills CLAUDE.md, creates goals.md. Exits permanently. |
 | `loop-controller` | Level 10 autonomous agent. 3 cycles: evolution, knowledge consolidation, topology optimization. |
+| `evolution-module` | Called by orchestrator to run /evolve and /level-up at Level 10. Delegates to loop-controller. |
+| `intelligence-module` | Optional Level 8-9 agent. Pipeline isolation, debate engine, prompt optimization (OPRO), ELO ranking. |
 | `code-reviewer` | Spec-first review. Stage 1: spec compliance. Stage 2: quality. Read-only. Never modifies files. |
+| `security-auditor` | Pre-ship security scan. 102 rules across 5 layers. Verdict: APPROVE / REQUEST CHANGES / BLOCKED. |
 | `test-writer` | Reads existing test patterns. Matches framework, style, naming. Writes and runs tests. |
 | `cc-template-author` | Writes AZCLAUDE template files with proper structure. |
 | `cc-cli-integrator` | Integrates new features into `bin/cli.js`. |
@@ -437,6 +641,76 @@ Session 4:  /evolve → /audit → /ship → COPILOT_COMPLETE
 - Named `cc-{area}`, scoped to specific directories
 - Created when 3+ files in the same area change together across 2+ commits
 - Every agent has exactly 5 layers: persona, scope, tools, constraints, domain knowledge
+- `cc-` prefix prevents framework collisions (langgraph, crewai, autogen)
+---
+## Skills vs Agents — The Right Tool
+Claude Code is already capable. The goal is guidance, not instructions. Before creating an agent, understand what each tool is actually for.
+### Skills: project-specific guidance
+A skill is a markdown file that fires automatically when Claude needs context it can't derive from the code alone. The best skill answers one question: **"In this project, when doing X, what do you need to know that you can't read from the files?"**
+Skills are NOT:
+- Generic instructions Claude already knows ("write clean code", "add error handling")
+- Boilerplate copied from another project without reading this one first
+- A wrapper around knowledge Claude already has by default
+Skills ARE:
+- "In this compliance project, every obligation must be traced to an article number — here's the format"
+- "Our auth module uses RS256 not HS256 — here's why and where that decision lives"
+- "The 6 locale files must always be edited atomically — here's the co-edit pattern"
+`/setup` and `/evolve` generate skills by running `problem-architect` first — it reads your actual file structure, co-change patterns, and conventions, then builds skills around the gaps it finds. Generic skill templates are not installed.
+### Agents: only for parallelism and isolation
+An agent is a sub-process. Use one when you need work to happen **in parallel** or **in a separate context** from the main session. Not for organizing knowledge — skills do that cheaper.
+**Create an agent when:**
+- Two workstreams can run concurrently (parallel dispatch saves real time)
+- A task must be isolated from main context (experiments, reviews, security scans)
+- There's enough domain depth to justify a dedicated context window (5+ files, unique conventions, a clear scope boundary)
+**Don't create an agent when:**
+- A tight skill + Claude's native capability already handles it
+- You'd create it just to "have one for auth" or "have one for the frontend"
+- The agent's instructions are things Claude already knows without being told
+**The test:** Would removing this agent and writing a skill instead produce worse results? If no — use a skill. Agents cost tokens every time they're loaded. A skill that gives Claude the right context is lighter and often better.
+### The right order
+```
+1. Craft a skill that gives Claude the project-specific context it's missing
+2. Watch if the same workflow keeps recurring across sessions (/reflexes will detect it)
+3. If work can be parallelized OR isolated → promote to an agent
+4. Let /evolve make the call from git evidence — it sees what actually co-changes
+```
+---
+## Progressive Levels (0–10)
+AZCLAUDE builds capability progressively — start simple, grow into complexity:
+| Level | What gets built | Trigger |
+|-------|----------------|---------|
+| 0 | Nothing yet | Fresh project |
+| 1 | CLAUDE.md — project rules + dispatch | `/setup` or `/dream` |
+| 2 | MCP config — database, browser, API access | `/level-up` |
+| 3 | Skills — project-specific commands | `/setup` generates ≥ 2 |
+| 4 | Memory — goals.md, patterns, antipatterns | `/setup` |
+| 5 | Agents — from git co-change analysis | `/evolve` after 5+ commits |
+| 6 | Hooks — stateful session tracking | `azclaude-copilot setup` |
+| 7 | External MCP servers | `/level-up` |
+| 8 | Orchestrated pipeline — multi-agent with problem-architect | `/level-up` |
+| 9 | Intelligence — debate, OPRO, ELO, pipeline isolation | `setup --full` |
+| 10 | Self-evolving — loop-controller, 3-cycle autonomous evolution | `/evolve` sustained |
+Run `/level-up` at any time to see your current level and build the next one.
 ---
@@ -446,42 +720,29 @@ Session 4:  /evolve → /audit → /ship → COPILOT_COMPLETE
 |---------|------------------|---------|
 | Project memory | Starts fresh every session | goals.md + checkpoints injected automatically |
 | Conventions | Ad-hoc, re-explained each time | CLAUDE.md — loaded before every task |
+| Mid-session reasoning | Lost on context compaction | /snapshot saves WHY — auto-injected next session |
 | Learned behavior | None | Reflexes extracted from tool-use, confidence-scored |
+| CLAUDE.md quality | Drifts, never updated | /reflect finds stale/missing/dead rules and fixes them |
 | Architecture decisions | Re-debated every time | decisions.md — logged once, referenced forever |
 | Failed approaches | Repeated | antipatterns.md — agents read before implementing |
+| Security | Manual | 4-layer enforcement: write-time blocking, context scan, audit, pre-ship |
 | Domain knowledge | Generic | Domain advisors generated for compliance, finance, medical, legal... |
 | Agent specialization | None | Project agents emerge from git evidence, not guessing |
 | Autonomous building | Not possible | /copilot — three-tier intelligent team |
-| Self-improvement | Not possible | /evolve — 3-cycle environment evolution |
+| Self-improvement | Not possible | /evolve + /reflect + /reflexes — 3-layer environment evolution |
 | Any stack | Yes | Yes |
 | You own the code | Yes | Yes |
 | Zero dependencies | — | Yes (0 in package.json) |
 ---
-## Security
-Zero dependencies in `package.json`. The only external binary is `claude` (installed separately). No supply-chain risk.
-**6 layers:**
-1. **Hook integrity** — SHA-256 hash verified on every run
-2. **Command injection protection** — shell metacharacters rejected in file paths
-3. **Prompt injection defense** — strips `curl|bash`, `ignore previous instructions`, base64 blocks from context injection
-4. **Skill checksums** — portable skills SHA-256 hashed, imports fail if tampered
-5. **Credential auditing** — `/ship` blocks on `.env`, `AKIA*`, `sk-*`, `ghp_*` before any git push
-6. **Agent scoping** — review agents read-only (`EnterPlanMode`), experiments in isolated worktrees (`EnterWorktree`)
-See [SECURITY.md](SECURITY.md) for full details.
----
 ## Verified
-1196 tests. Every template, command, capability, agent, hook, and CLI feature verified.
+1353 tests. Every template, command, capability, agent, hook, and CLI feature verified.
 ```bash
 bash tests/test-features.sh
-# Results: 1196 passed, 0 failed, 1196 total
+# Results: 1353 passed, 0 failed, 1353 total
 ```
 ---

package/bin/cli.js CHANGED Viewed

@@ -8,7 +8,7 @@ const { execSync }  = require('child_process');
 const TEMPLATE_DIR = path.join(__dirname, '..', 'templates');
 const CORE_COMMANDS     = ['setup', 'fix', 'add', 'audit', 'test', 'blueprint', 'ship', 'pulse', 'explain', 'snapshot', 'persist'];
-const EXTENDED_COMMANDS = ['dream', 'refactor', 'doc', 'loop', 'migrate', 'deps', 'find', 'create', 'reflect', 'hookify', 'sentinel'];
+const EXTENDED_COMMANDS = ['dream', 'refactor', 'doc', 'loop', 'migrate', 'deps', 'find', 'create', 'reflect', 'hookify', 'sentinel', 'clarify', 'spec', 'analyze', 'constitute', 'tasks', 'issues'];
 const ADVANCED_COMMANDS = ['evolve', 'debate', 'level-up', 'copilot', 'reflexes'];
 const COMMANDS          = [...CORE_COMMANDS, ...EXTENDED_COMMANDS, ...ADVANCED_COMMANDS];
@@ -428,7 +428,7 @@ function installScripts(projectDir, cfg) {
 // ─── Agents ───────────────────────────────────────────────────────────────────
-const AGENTS = ['orchestrator-init', 'code-reviewer', 'test-writer', 'loop-controller', 'cc-template-author', 'cc-cli-integrator', 'cc-test-maintainer', 'orchestrator', 'problem-architect', 'milestone-builder', 'security-auditor'];
+const AGENTS = ['orchestrator-init', 'code-reviewer', 'test-writer', 'loop-controller', 'cc-template-author', 'cc-cli-integrator', 'cc-test-maintainer', 'orchestrator', 'problem-architect', 'milestone-builder', 'security-auditor', 'spec-reviewer', 'constitution-guard'];
 function installAgents(projectDir, cfg) {
   const agentsDir = path.join(projectDir, cfg, 'agents');

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "azclaude-copilot",
-  "version": "0.4.10",
-  "description": "AI coding environment — 26 commands, 8 skills, 10 agents, memory, reflexes, evolution. Install: npm install -g azclaude-copilot@latest, then in Claude Code: azclaude-copilot setup --full",
+  "version": "0.4.13",
+  "description": "AI coding environment — 27 commands, 8 skills, 13 agents, memory, reflexes, evolution. Install: npm install -g azclaude-copilot@latest, then in Claude Code: azclaude-copilot setup --full",
   "bin": {
     "azclaude": "bin/cli.js",
     "azclaude-copilot": "bin/copilot.js"