azclaude-copilot 0.5.5 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@
9
9
  {
10
10
  "name": "azclaude",
11
11
  "description": "AZCLAUDE is a complete AI coding environment for Claude Code. It installs 39 commands, 10 auto-invoked skills, 15 specialized agents, 4 hooks, and a persistent memory system — in one command.\n\nKey features:\n• Memory across sessions — goals.md + checkpoints injected automatically before every session\n• Self-improving loop — /reflect fixes stale CLAUDE.md rules, /reflexes learns from tool-use patterns, /evolve creates agents from git evidence\n• Autonomous copilot mode — /copilot runs a three-tier team (orchestrator → problem-architect → milestone-builder) across sessions until the product ships\n• Spec-driven workflow — /constitute writes project rules, /spec writes structured ACs, /analyze detects plan drift and ghost milestones, /blueprint traces every milestone to a spec\n• Security layer — 111-rule environment scan (/sentinel), pre-write secret blocking, pre-ship credential audit\n• Progressive levels 0–10 — start with CLAUDE.md, grow into multi-agent pipelines and self-evolving environments\n• Zero dependencies — no npm packages, no external APIs, no vector databases. Plain markdown files and Claude Code's native architecture.\n• Smart install — npx azclaude-copilot@latest auto-detects first install vs upgrade vs verify. Context-aware onboarding shows the right next command for your project state.\n\nExample use cases:\n• /setup — scan an existing project, detect stack + domain + scale, fill CLAUDE.md, generate project-specific skills and agents automatically\n• /copilot \"Build a compliance SaaS with trilingual support\" — walk away, come back to working code across multiple sessions\n• /sentinel — run a scored security audit (0–100, grade A–F) across hooks, permissions, MCP servers, agent configs, and secrets\n• /evolve — detect gaps in the environment, generate new skills and agents from git co-change evidence, report score delta (e.g. 42/100 → 68/100)\n• /constitute — write your project's constitution (non-negotiables, architectural commitments, definition of done) — gates all future AI actions\n• /analyze — cross-artifact consistency check: ghost milestones, spec vs. code drift, unplanned commits\n• /reflect — find stale, missing, or contradicting rules in CLAUDE.md and propose exact fixes\n• /debate \"REST vs GraphQL for this project\" — adversarial evidence-based decision with order-independent scoring, logged to decisions.md",
12
- "version": "0.5.0",
12
+ "version": "0.5.6",
13
13
  "source": {
14
14
  "source": "github",
15
15
  "repo": "haytamAroui/AZ-CLAUDE-COPILOT",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "azclaude",
3
- "version": "0.5.0",
3
+ "version": "0.5.6",
4
4
  "description": "AZCLAUDE is a complete AI coding environment for Claude Code. It installs 39 commands, 10 auto-invoked skills, 15 specialized agents, 4 hooks, and a persistent memory system — in one command.\n\nKey features:\n• Memory across sessions — goals.md + checkpoints injected automatically before every session\n• Self-improving loop — /reflect fixes stale CLAUDE.md rules, /reflexes learns from tool-use patterns, /evolve creates agents from git evidence\n• Autonomous copilot mode — /copilot runs a three-tier team (orchestrator → problem-architect → milestone-builder) across sessions until the product ships\n• Spec-driven workflow — /constitute writes project rules, /spec writes structured ACs, /analyze detects plan drift and ghost milestones, /blueprint traces every milestone to a spec\n• Security layer — 111-rule environment scan (/sentinel), pre-write secret blocking, pre-ship credential audit\n• Progressive levels 0–10 — start with CLAUDE.md, grow into multi-agent pipelines and self-evolving environments\n• Zero dependencies — no npm packages, no external APIs, no vector databases. Plain markdown files and Claude Code's native architecture.\n• Smart install — npx azclaude-copilot@latest auto-detects first install vs upgrade vs verify. Context-aware onboarding shows the right next command for your project state.\n\nExample use cases:\n• /setup — scan an existing project, detect stack + domain + scale, fill CLAUDE.md, generate project-specific skills and agents automatically\n• /copilot \"Build a compliance SaaS with trilingual support\" — walk away, come back to working code across multiple sessions\n• /sentinel — run a scored security audit (0–100, grade A–F) across hooks, permissions, MCP servers, agent configs, and secrets\n• /evolve — detect gaps in the environment, generate new skills and agents from git co-change evidence, report score delta (e.g. 42/100 → 68/100)\n• /constitute — write your project's constitution (non-negotiables, architectural commitments, definition of done) — gates all future AI actions\n• /analyze — cross-artifact consistency check: ghost milestones, spec vs. code drift, unplanned commits\n• /reflect — find stale, missing, or contradicting rules in CLAUDE.md and propose exact fixes\n• /debate \"REST vs GraphQL for this project\" — adversarial evidence-based decision with order-independent scoring, logged to decisions.md",
5
5
  "author": {
6
6
  "name": "haytamAroui",
package/README.md CHANGED
@@ -1,27 +1,24 @@
1
- <p align="center">
2
- <h1 align="center">AZCLAUDE</h1>
3
- <p align="center"><strong>A complete AI coding environment — built on Claude Code's native architecture.</strong></p>
4
- <p align="center">
1
+ <div align="center">
2
+ <h1>AZCLAUDE</h1>
3
+ <p><strong>A complete AI coding environment — built on Claude Code's native architecture.</strong></p>
4
+ <p>
5
5
  <a href="https://www.npmjs.com/package/azclaude-copilot"><img src="https://img.shields.io/npm/v/azclaude-copilot.svg" alt="npm version"></a>
6
6
  <a href="https://github.com/haytamAroui/AZ-CLAUDE-COPILOT/actions/workflows/tests.yml"><img src="https://github.com/haytamAroui/AZ-CLAUDE-COPILOT/actions/workflows/tests.yml/badge.svg" alt="tests"></a>
7
7
  <a href="https://github.com/haytamAroui/AZ-CLAUDE-COPILOT/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="license"></a>
8
8
  <a href="https://nodejs.org"><img src="https://img.shields.io/badge/node-%3E%3D16-brightgreen" alt="node version"></a>
9
9
  </p>
10
- <p align="center">
11
- <a href="#install">Install</a> ·
10
+ <p>
12
11
  <a href="#the-core-idea">Core Idea</a> ·
13
- <a href="#zero-setup-grows-with-your-project">Zero Setup</a> ·
12
+ <a href="#how-it-works-the-execution-pipeline">Pipeline</a> ·
13
+ <a href="#install">Install</a> ·
14
14
  <a href="#what-you-get">What You Get</a> ·
15
- <a href="#spec-driven-workflow">Spec-Driven</a> ·
16
- <a href="#memory-system">Memory</a> ·
17
- <a href="#self-improving-loop">Self-Improving Loop</a> ·
18
- <a href="#all-37-commands">Commands</a> ·
19
- <a href="#parallel-execution">Parallel</a> ·
20
- <a href="#mcp-integration">MCP</a> ·
15
+ <a href="#architecture-philosophy">Architecture</a> ·
21
16
  <a href="#autonomous-mode">Autonomous Mode</a> ·
17
+ <a href="#parallel-execution">Parallel</a> ·
18
+ <a href="#what-makes-it-different">Compare</a> ·
22
19
  <a href="DOCS.md">Full Docs</a>
23
20
  </p>
24
- </p>
21
+ </div>
25
22
 
26
23
  ---
27
24
 
@@ -29,7 +26,7 @@
29
26
 
30
27
  **CLAUDE.md and markdown memory files are the best way to work with an LLM.**
31
28
 
32
- Not vector databases. Not API wrappers. Not prompt templates. Plain markdown files, structured and injected at exactly the right moment.
29
+ Not vector databases. Not API wrappers. Not MCP servers for local data. Plain markdown files, structured and injected at exactly the right moment.
33
30
 
34
31
  Claude Code exposes this natively: `CLAUDE.md` for conventions, hooks for automation, `.claude/` for state. AZCLAUDE implements the full architecture on top of it — every file, every hook, every pattern proven to work.
35
32
 
@@ -41,12 +38,7 @@ No project conventions. CLAUDE.md has your stack, domain, and rule
41
38
  Repeats the same mistakes. antipatterns.md prevents known failures.
42
39
  Forgets what was decided. decisions.md logs every architecture choice.
43
40
  Loses reasoning mid-session. /snapshot saves WHY — auto-injected next session.
44
- CLAUDE.md drifts from reality. /reflect finds stale rules and fixes them.
45
- Builds the same agent repeatedly. patterns.md encodes what worked.
46
41
  Can't work autonomously. /copilot builds, tests, commits, ships — unattended.
47
- Plans without requirements. /spec writes structured specs before any code is planned.
48
- Milestones violate project rules. constitution-guard blocks non-compliant milestones.
49
- Plan drifts from what was built. /analyze catches ghost milestones before they ship.
50
42
  Agents run serially, one at a time. Task Classifier + parallel waves run agents simultaneously.
51
43
  ```
52
44
 
@@ -54,60 +46,59 @@ One install. Any stack. Zero dependencies.
54
46
 
55
47
  ---
56
48
 
57
- ## Zero Setup. Grows With Your Project.
58
-
59
- Most AI coding tools require upfront decisions: which agents to create, what prompts to write, which skills to define. You can't know that before the project exists.
60
-
61
- AZCLAUDE inverts this. **You start with almost nothing. The environment builds itself from evidence.**
62
-
63
- ```bash
64
- npx azclaude-copilot@latest # one command. that's it.
65
- ```
66
-
67
- No agent files to write. No skills to configure. No prompt engineering. `npx azclaude-copilot` installs 39 commands, 4 hooks, memory structure, and a manifest. The rest is generated from your actual codebase as you work. Run the same command again later — it auto-detects whether to skip, install, or upgrade.
68
-
69
- **What the environment looks like across sessions:**
70
-
71
- ```
72
- Day 1 — /setup runs:
73
- Scans your code. Detects domain + stack + scale.
74
- Fills CLAUDE.md with your actual project data (no placeholders).
75
- Generates 2 project-specific skills from your file patterns.
76
- Creates goals.md. Hooks are wired.
77
-
78
- Day 2 — you just work. Hooks observe silently:
79
- Every edit breadcrumb in goals.md (automatic)
80
- Every tool sequence → logged to observations.jsonl (automatic)
81
- Session end → "In progress" migrates to "Done" (automatic)
82
- Session start → goals.md + last checkpoint injected (automatic)
83
-
84
- Day 5 /evolve reads your git history:
85
- auth/ files always change together → cc-auth agent created
86
- 6 locale files always co-edited i18n-sync skill created
87
- No decisions made by you. Git evidence decides.
88
-
89
- Day 10 /reflect reads friction logs:
90
- STALE DATA design tokens in CLAUDE.md don't match codebase
91
- MISSING RULE — wrong agent routing causing silent failures
92
- Proposes exact fixes. You approve. CLAUDE.md corrects itself.
93
-
94
- Day 30 you finish the project:
95
- Environment score: 42/100 → 91/100
96
- Agents specialized to your codebase. Reflexes learned from your patterns.
97
- CLAUDE.md reflects how the project actually works — not what you guessed on day 1.
98
- ```
99
-
100
- **The project you finish with is not the project you started with.** Not because you configured it — because the system learned from the work.
101
-
102
- ### How lazy loading keeps it fast
103
-
104
- 37 capability files exist in `.claude/capabilities/`. Without discipline, every task would load all of them. Instead, `manifest.md` acts as a routing table:
105
-
106
- ```
107
- CLAUDE.md → read manifest.md → load ONLY the files for this task (~380 tokens)
108
- ```
109
-
110
- Claude reads the manifest (one file), finds which 1–3 capability files apply, loads only those. Adding a new agent or skill doesn't increase the cost of unrelated tasks. The environment grows without bloating context.
49
+ ## How It Works: The Execution Pipeline
50
+
51
+ Most AI coding tools pass your raw text straight to the LLM. That's why they get lazy, repeat mistakes, or overwrite files blindly.
52
+
53
+ AZCLAUDE sits as a mandatory middleware firewall between your input and Claude. It injects state, routes intent, enforces the SDLC, and gates every write through a security scanner — all inside Node.js, before Claude sees a single token.
54
+
55
+ ```text
56
+ ┌──────────────────────────────────────────────────────────────┐
57
+ │ 1. USER INPUT: "Build auth" OR "Should we?" OR "How's this?" │
58
+ └─────────────────────────────┬────────────────────────────────┘
59
+
60
+ ════════════════════════════════════════════════════════════════
61
+ [ THE AZCLAUDE FIREWALL (user-prompt.js) ]
62
+ ════════════════════════════════════════════════════════════════
63
+
64
+ ┌──────────────────────────────────────────────────────────────┐
65
+ 2. STATE INJECTION & COMPACTION GUARD │
66
+ ├─► Memory check: If context > 85%, auto-save checkpoint │
67
+ └─► Inject state: goals.md, decisions.md, patterns.md │
68
+ └─────────────────────────────┬────────────────────────────────┘
69
+
70
+ ┌──────────────────────────────────────────────────────────────┐
71
+ 3. THE INTENT ROUTER (Dynamic Dispatch)
72
+ └─┬───────────────────────────┬──────────────────────────────┬─┘
73
+ │ │ │
74
+ ▼ ▼ ▼
75
+ [ TIER 0: QUESTION ] [ TIER 1: ANALYZE ] [ TIER 2: IMPLEMENT ]
76
+ "How does this work?" "Should we build this?" "Build X" / "Fix Y"
77
+ │ │ │
78
+ │ Bypasses pipeline ├─► Load relevant skills ├─► STEP 1: problem-architect
79
+ │ Answer directly │ (test-first, security, │ (BLOCKING Team Spec first)
80
+ │ │ architecture-advisor) ├─► STEP 1b: Web research
81
+ │ │ Reason directly. ├─► STEP 2: Load skill set
82
+ │ │ Skip problem-architect. └─► STEP 3: Post-code review
83
+ ▼ ▼ ▼
84
+ ┌──────────────────────────────────────────────────────────────┐
85
+ │ 4. CLAUDE EXECUTES THE ENRICHED PAYLOAD │
86
+ │ (User input + protected state + mandatory instructions) │
87
+ └─────────────────────────────┬────────────────────────────────┘
88
+
89
+ ════════════════════════════════════════════════════════════════
90
+ [ THE SECURITY GATE (pre/post-tool-use.js) ]
91
+ ════════════════════════════════════════════════════════════════
92
+
93
+ ┌──────────────────────────────────────────────────────────────┐
94
+ 5. OUTBOUND SECURITY & MEMORY TRACKING │
95
+ │ ├─► pre-tool-use.js: blocks curl|bash, secrets, traversal │
96
+ │ ├─► Native execution: Claude runs the approved command │
97
+ │ └─► post-tool-use.js: writes breadcrumb to goals.md │
98
+ └──────────────────────────────────────────────────────────────┘
99
+ ```
100
+
101
+ **Why this matters:** The routing happens inside Node.js hooks — Claude cannot skip or override it. Ask a question (Tier 0) and it answers directly. Ask it to build (Tier 2) and it is structurally required to run `problem-architect`, load skills, and pass a security gate before a single file is touched.
111
102
 
112
103
  ---
113
104
 
@@ -119,7 +110,7 @@ npx azclaude-copilot@latest
119
110
 
120
111
  One command, no flags. Auto-detects whether this is a fresh install or an upgrade:
121
112
 
122
- - **First time** → full install (39 commands, 4 hooks, 15 agents, 10 skills, memory, reflexes)
113
+ - **First time** → full install (39 commands, 4 hooks, 15 agents, 10 skills, memory, reflexes). Creates folders, instructions, and hooks — **no manual setup required.**
123
114
  - **Already installed, older version** → auto-upgrades everything to latest templates
124
115
  - **Already up to date** → verifies, no overwrites
125
116
 
@@ -136,251 +127,134 @@ npx azclaude-copilot@latest doctor # 32 checks — verify everything is wired
136
127
  ```
137
128
  .claude/
138
129
  ├── CLAUDE.md ← dispatch table: conventions, stack, routing
139
- ├── commands/ ← 39 slash commands (/add, /fix, /copilot, /parallel, /mcp, /sentinel...)
140
- ├── skills/ ← 10 skills (test-first, security, architecture-advisor, frontend-design...)
130
+ ├── commands/ ← 39 slash commands (/add, /fix, /copilot, /parallel...)
131
+ ├── skills/ ← 10 skills (test-first, security, architecture-advisor...)
141
132
  ├── agents/ ← 15 agents (orchestrator, spec-reviewer, constitution-guard...)
142
- ├── capabilities/ ← 43 files, lazy-loaded via manifest.md (~380 tokens/task)
133
+ ├── capabilities/ ← 48 files, lazy-loaded via manifest.md (~100 tokens/task)
143
134
  ├── hooks/
144
- │ ├── user-prompt.js ← injects goals.md + checkpoint before your first message
135
+ │ ├── user-prompt.js ← Brain Router + goals injection before first message
145
136
  │ ├── pre-tool-use.js ← blocks hardcoded secrets before any file write
146
137
  │ ├── post-tool-use.js ← writes breadcrumb to goals.md on every edit
147
- │ └── stop.js ← migrates In-progress → Done, trims, resets counter
138
+ │ └── stop.js ← migrates In-progress → Done, trims, resets
148
139
  └── memory/
149
140
  ├── goals.md ← rolling ledger of what changed and why
150
141
  ├── checkpoints/ ← WHY decisions were made (/snapshot)
151
- ├── patterns.md ← what worked — agents read this before implementing
142
+ ├── patterns.md ← what worked — agents read before implementing
152
143
  ├── antipatterns.md ← what broke — prevents repeating failures
153
144
  ├── decisions.md ← architecture choices logged by /debate
154
- ├── blockers.md ← what's stuck and why
155
145
  └── reflexes/ ← learned behavioral patterns (confidence-scored)
156
146
  ```
157
147
 
158
- ---
159
-
160
- ## Three Ways to Use It
148
+ ### Three ways to start
161
149
 
162
- ### 1. `/setup` wire an existing project
163
-
164
- ```
165
- /setup
166
- ```
150
+ | Command | What happens |
151
+ |---------|-------------|
152
+ | `/setup` | Scans your codebase, detects domain + stack + scale, fills CLAUDE.md, creates goals.md. |
153
+ | `/dream "Build a compliance SaaS"` | Builds everything from scratch: CLAUDE.md → Hooks → skills → memory → agents. |
154
+ | `/copilot` | Walk away, come back to a product. Autonomous milestone execution. |
167
155
 
168
- Scans your codebase, detects domain + stack + scale, fills CLAUDE.md, creates goals.md, generates project-specific skills and agents. Run once. After that, every Claude Code session opens with full project context.
156
+ ---
169
157
 
170
- ### 2. `/dream` — start from an idea, get a full environment
158
+ ## Architecture Philosophy
171
159
 
172
- ```
173
- /dream "Build a compliance SaaS — FastAPI, Supabase, trilingual"
174
- ```
160
+ **AZCLAUDE uses Markdown files and lifecycle hooks — not MCP servers — as its core architecture.** This is a deliberate engineering decision, not a gap.
175
161
 
176
- Builds everything from scratch in four phases:
162
+ ### Why Markdown beats MCP for an AI coding environment
177
163
 
178
- ```
179
- Phase 1: Asks 4 questions (what, stack, who uses it, what's out of scope)
180
- Phase 2: Scans existing environment — won't regenerate what already exists
181
- Phase 3: Builds level by level:
182
- L1 → CLAUDE.md L2 → MCP config
183
- L3 → Skills L4 → Memory
184
- L5 → Agents L6 → Hooks
185
- Phase 3b: Domain advisor skill — auto-generated if non-dev domain detected
186
- (compliance, finance, medical, legal, logistics, research, marketing)
187
- Phase 4: Quality gate — won't say "ready" without passing all checks
188
- ```
164
+ MCP adds an IPC layer between Claude and your project data. For external services (databases, APIs, deployment platforms), that makes sense — Claude can't `Read` a Postgres table. But AZCLAUDE's intelligence layer is entirely **local files**: goals, plans, patterns, decisions, agent instructions, skill definitions.
189
165
 
190
- ### 3. `/copilot` walk away, come back to a product
166
+ For local files, Claude already has native tools (`Read`, `Write`, `Bash`, `Grep`) that are faster, cheaper, and more reliable than any protocol layer:
191
167
 
192
- ```bash
193
- npx azclaude-copilot . "Build a compliance SaaS with trilingual support"
194
- # or resume:
195
- npx azclaude-copilot .
196
- ```
168
+ | Operation | AZCLAUDE (Markdown + Hooks) | Hypothetical MCP Server |
169
+ |-----------|---------------------------|------------------------|
170
+ | Read goals.md | Hook injects directly — **0 tool calls** | MCP tool → IPC → read → IPC → parse — **1+ roundtrips** |
171
+ | Spawn code-reviewer | `Read("agents/code-reviewer.md")` — **1 native call** | `spawn_agent()` → IPC → reads same file — **slower** |
172
+ | Load patterns | Hook injects at session start — **0 cost** | `get_patterns()` → IPC — **added cost, same result** |
197
173
 
198
- Node.js runner restarts Claude Code sessions in a loop until `COPILOT_COMPLETE`. Each session reads state, picks next milestone, implements, tests, commits, evolves. No human input needed. [Details below.](#autonomous-mode)
174
+ ### The performance tax nobody talks about
199
175
 
200
- ---
176
+ Every MCP tool you register costs you **three ways**:
201
177
 
202
- ## Spec-Driven Workflow
178
+ 1. **Token overhead** — Each tool definition consumes ~100-300 tokens in the system prompt. 10 tools = 3,000 tokens gone before Claude starts thinking.
179
+ 2. **IPC latency** — Every call = JSON-RPC serialization → stdio pipe → Node.js process → work → serialize → pipe back → parse.
180
+ 3. **Decision overhead** — More tools = more inference time deciding which tool to use. A 50-item menu is slower than a 5-item menu.
203
181
 
204
- The biggest cause of wasted work: building the wrong thing correctly. The spec-driven workflow ensures you build what you actually meant to build.
182
+ ### Hooks enforce. Markdown extends. That's the full architecture.
205
183
 
206
184
  ```
207
- /constitute → /spec → /clarify → /blueprint → /copilot → /analyze → /ship
185
+ Claude Code
186
+ ├── Hooks (enforcement — AZCLAUDE's security + automation layer)
187
+ │ ├── user-prompt.js → Brain Router: forces problem-architect before coding
188
+ │ ├── pre-tool-use.js → Blocks secrets, injection, path traversal
189
+ │ ├── post-tool-use.js → Tracks every edit in goals.md
190
+ │ └── stop.js → Session cleanup, friction logging
191
+
192
+ └── Markdown files (capability — Claude reads natively, zero overhead)
193
+ ├── 39 commands → Claude reads the .md, follows instructions
194
+ ├── 15 agents → Claude spawns as subagents with Task tool
195
+ ├── 10 skills → Auto-invoked when relevant context detected
196
+ ├── 48 capabilities → Lazy-loaded via manifest.md (~100 tokens overhead)
197
+ └── Memory files → goals.md, decisions.md, patterns.md, checkpoints/
208
198
  ```
209
199
 
210
- | Command | Purpose |
211
- |---------|---------|
212
- | `/constitute` | Define ground rules before any planning. Non-negotiables, required patterns, definition of done. Copilot enforces on every milestone. |
213
- | `/spec` | Write a structured spec: user stories (≥2), acceptance criteria (≥3), out-of-scope, failure modes. spec-reviewer validates quality — if incomplete, `/blueprint` is blocked. |
214
- | `/clarify` | Structured interrogation (max 5 questions). Resolves open questions in a spec. Required before `/blueprint` if questions remain. |
215
- | `/blueprint` | Derive a milestone plan from the spec. Each milestone traces to an acceptance criterion. Task classifier groups coupled work for parallel safety. |
216
- | `/analyze` | Cross-artifact consistency check. Finds ghost milestones (marked done, files missing), spec vs. implementation drift, plan vs. reality gaps. |
217
- | `/tasks` | Build dependency graph from plan.md. Shows parallelizable wave groups and critical path length. |
218
- | `/issues` | Convert plan.md milestones to GitHub Issues. Deduplicates, creates labels, writes issue numbers back to plan.md. |
219
-
220
- **What the gates prevent:**
221
-
222
- | Without spec-driven | With spec-driven |
223
- |---------------------|-----------------|
224
- | Plan milestones that don't trace to requirements | spec-reviewer blocks /blueprint if ACs < 3 or goal unclear |
225
- | Copilot builds things that violate project rules | constitution-guard blocks each milestone before dispatch |
226
- | Ship code where plan.md says "done" but files are missing | /analyze catches ghost milestones; /ship blocks on them |
227
- | Open questions resolved arbitrarily in implementation | /clarify forces answers before planning starts |
200
+ **Zero IPC. Zero process overhead. Zero token tax. Claude reads files it already knows how to read.**
228
201
 
229
202
  ---
230
203
 
231
- ## Memory System
204
+ ### Native Synergy: Performance via Plan & UltraThink
232
205
 
233
- Claude needs two things at the start of every session **what changed** and **why decisions were made**. Everything else is noise.
206
+ AZCLAUDE commands don't fight Claude Code's native featuresthey amplify them.
234
207
 
235
- ### Automatic (zero user input)
208
+ - **Native Plan Mode:** Commands like `/blueprint`, `/debate`, and `/sentinel` leverage Claude's native `plan` mode for read-only analysis, ensuring safety and focused reasoning before a single file is touched.
209
+ - **UltraThink Integration:** Using `--deep` with any command auto-loads `ultrathink` logic, enabling deeper dependency tracing and adversarial testing beyond standard limits.
236
210
 
237
- ```
238
- Every edit: post-tool-use.js → breadcrumb in goals.md (timestamp, file, diff stats)
239
- Before write: pre-tool-use.js → blocks hardcoded secrets
240
- Session end: stop.js → In-progress migrates to Done, trims to 20 entries
241
- Session start: user-prompt.js → injects goals.md + latest checkpoint + plan status
242
- ```
211
+ ### When MCP IS the right tool
243
212
 
244
- **Token cost: ~500 tokens fixed.** goals.md auto-rotates at 30 entries oldest 15 archived, newest 15 kept. Same cost at session 5 or session 500.
245
-
246
- ### Manual (you control)
247
-
248
- ```bash
249
- /snapshot # save WHY decisions were made — auto-injected next session
250
- # run every 15–20 turns to protect against context compaction
251
- /persist # end-of-session: update goals.md, write session narrative
252
- /pulse # health check — recent changes, blockers, next steps
253
- ```
254
-
255
- ### Hook profiles
213
+ AZCLAUDE still recommends MCP serversfor things that aren't files on disk:
256
214
 
257
215
  ```bash
258
- AZCLAUDE_HOOK_PROFILE=minimal claude # goals.md tracking only
259
- AZCLAUDE_HOOK_PROFILE=standard claude # all features (default)
260
- AZCLAUDE_HOOK_PROFILE=strict claude # all + reflex guidance injection
216
+ /mcp # detect your stack → recommend servers → show install commands
261
217
  ```
262
218
 
263
- ### State files the runner is stateless, these files ARE the state
219
+ **Universal:** `Context7` (live library docs), `Sequential Thinking` (iterative reasoning).
220
+ **Stack-specific:** `GitHub MCP`, `Playwright MCP`, `Supabase MCP`, `Brave Search`.
264
221
 
265
- | File | Written by | Read by | Purpose |
266
- |------|-----------|---------|---------|
267
- | `CLAUDE.md` | /setup, /dream | Every session | Conventions, routing, project identity |
268
- | `memory/goals.md` | Hooks | Every session start | File breadcrumbs + session state |
269
- | `memory/checkpoints/` | /snapshot | Every session start | WHY decisions were made |
270
- | `memory/patterns.md` | /evolve, agents | Agents, /add, /fix | What works — follow this |
271
- | `memory/antipatterns.md` | /evolve, agents | Agents, /add, /fix | What broke — avoid this |
272
- | `memory/decisions.md` | /debate | All agents | Architecture choices — never re-debate |
273
- | `memory/blockers.md` | /copilot | /copilot, /debate | What's stuck and why |
274
- | `memory/reflexes/` | Hooks, /reflexes | /evolve, agents | Learned behavioral patterns |
275
- | `plan.md` | /blueprint | /copilot, /add | Milestone tracker with status |
222
+ The rule: **if it's on disk, use Markdown. If it's external, use MCP.**
276
223
 
277
224
  ---
278
225
 
279
- ## Self-Improving Loop
226
+ ## Native Tool Orchestration
280
227
 
281
- AZCLAUDE doesn't just remember it learns and corrects itself. Three commands form a feedback loop:
228
+ AZCLAUDE hardwires its logic directly into the host CLI's built-in tools:
282
229
 
283
- ```
284
- /reflect → Reads friction logs + session history
285
- Finds missing rules, dead rules, vague rules, contradictions
286
- Proposes exact CLAUDE.md edits. You approve. CLAUDE.md corrects itself.
287
-
288
- /reflexes → Reads observations.jsonl (captured automatically by post-tool-use.js)
289
- Finds tool sequences, file co-access, error→fix pairs
290
- Creates confidence-scored reflex files (0.3 tentative → 0.9 certain)
291
- Strong reflexes (≥ 0.7) feed into /add behavior automatically
292
-
293
- /evolve → Detects gaps: stale data, missing capabilities, context rot
294
- Generates fixes: new skills, new agents, updated patterns
295
- Quality-gates everything before merging
296
- Creates agents from git evidence (not guessing)
297
- Reports score delta (e.g. 42/100 → 68/100 in one cycle)
298
- ```
230
+ - **`AskUserQuestion`**: Wrapped into `/add`, `/blueprint`, and `/setup` to force clarification of vague requirements.
231
+ - **`EnterPlanMode`**: Called during `/blueprint`, `/audit`, and `/sentinel` for forced read-only analysis.
232
+ - **`EnterWorktree`**: Called to isolate state during `/evolve` and `/fix`.
233
+ - **`CronCreate` / `CronList`**: Tied to `/loop` for autonomous background execution.
234
+ - **`mcp__ide__getDiagnostics`**: Hard-gated before `/test` and `/ship`.
299
235
 
300
- **Real example what this loop found on a production project in one run:**
236
+ ### Genius Wiring (1 AZCLAUDE Command : N Native Tools)
301
237
 
302
- ```
303
- /reflect found:
304
- MISSING RULE — Wrong agent routing causing silent failures every session
305
- MISSING RULE — Domain-specific legal term kept drifting back into code
306
- STALE DATA — Design tokens in CLAUDE.md were wrong hex values
307
- MISSING ROUTE — Most frequent task had no slash command dispatch
308
-
309
- /reflexes found (from 78 observations, 3 sessions):
310
- i18n-all-6-locales (confidence 0.85) → always edit all 6 locale files atomically
311
- page-tsx-read-before-edit (0.75) → re-read before touching — changes too often
312
- next-config-build-verify (0.70) → run tsc --noEmit after next.config.ts edits
313
-
314
- /evolve found:
315
- plan.md frozen at 9/9 done — actually 18 milestones, M12–M18 active
316
- No i18n-sync skill despite 6-locale changes in every commit
317
- Score: 42/100 → 68/100
318
- ```
238
+ Single-word commands act as high-level orchestrators, wrapping multiple primitive CLI tools into one logical autonomous turn.
319
239
 
320
- All without human diagnosis. The system found it, proposed fixes, applied them.
321
-
322
- ---
323
-
324
- ## Evolution System
325
-
326
- `/evolve` finds gaps in the environment and fixes them. Three cycles:
327
-
328
- **Cycle 1 — Environment Evolution:** Detects stale patterns, friction signals, context rot. Generates fixes. Quality-gates before merging.
329
-
330
- **Cycle 2 — Knowledge Consolidation** (every 2+ sessions): Harvests patterns by recency + importance. Prunes stale entries. Auto-prunes reflexes where confidence < 0.15.
331
-
332
- **Cycle 3 — Topology Optimization** (when friction detected): Measures agent influence. Identifies merge candidates. Tests in isolated worktree before adopting.
333
-
334
- **Agent emergence from git evidence:**
335
-
336
- ```
337
- Session 1: 0 project agents. Build basic structure.
338
- Git: 3 commits touching fastapi/, next/, supabase/
339
-
340
- Session 2: /evolve reads git log
341
- 15 files in fastapi/ → cc-fastapi agent created
342
- 8 files in next/ with i18n patterns → cc-frontend-i18n agent created
343
-
344
- Session 3: Compliance logic repeating across 6 files → cc-compliance-engine agent
345
- 3 agents, all from real code — not guessing
346
-
347
- Session 4: Full evolved environment. /audit → /ship → COPILOT_COMPLETE
348
- ```
349
-
350
- Skills and agents that are project-generic get promoted to `~/shared-skills/` — improvements discovered in one project become available to all your projects.
240
+ | Command | Orchestrated Native Pipeline |
241
+ |---------|-----------------------------|
242
+ | **`/blueprint`** | `EnterPlanMode` → `Read` → `AskUserQuestion` → `Write` (plan.md) |
243
+ | **`/add`** | `Read` (context) → `AskUserQuestion` → `EnterWorktree` → `Write` → `getDiagnostics` |
244
+ | **`/fix`** | `Read` → `Bash` (repro) → `Grep` → `EnterWorktree` → `Edit` → `Test` |
245
+ | **`/sentinel`** | `EnterPlanMode` → `Read` → `Grep` → `Write` (report) |
351
246
 
352
247
  ---
353
248
 
354
249
  ## Autonomous Mode
355
250
 
356
- ### `/copilot` — describe a product, come back to working code
357
-
358
251
  ```bash
359
252
  npx azclaude-copilot . "Build a compliance SaaS with trilingual support"
360
253
  ```
361
254
 
362
- Node.js runner restarts Claude Code sessions in a loop until `COPILOT_COMPLETE`.
255
+ Node.js runner restarts Claude Code sessions in a loop until `COPILOT_COMPLETE`. Each session reads state, picks next milestone, implements, tests, commits, evolves. No human input needed.
363
256
 
364
- **Four-phase execution loop:**
365
-
366
- ```
367
- Phase 1 — Intelligence gathering (parallel agents)
368
- Multiple analyst agents run simultaneously — arch, UX, market, compliance.
369
- Each returns findings. Orchestrator synthesizes.
370
-
371
- Phase 2 — Debate synthesis
372
- /debate resolves tensions with evidence scoring. Produces prioritized action list.
373
-
374
- Phase 3 — Blueprint (parallel explore agents)
375
- /blueprint runs explore agents in parallel. Writes file:line plan.
376
- Task Classifier groups coupled work → safe parallel dispatch by design.
377
-
378
- Phase 4 — Execution (parallel milestone agents, worktree-isolated)
379
- Orchestrator dispatches same-wave milestones simultaneously.
380
- Each agent owns its scope. Orchestrator merges on completion.
381
- ```
382
-
383
- **Three-tier intelligent team (Phase 4):**
257
+ **The intelligent team:**
384
258
 
385
259
  ```
386
260
  Orchestrator Problem-Architect Milestone-Builder
@@ -389,47 +263,19 @@ Reads plan.md → Analyzes milestone → Pre-reads all files
389
263
  Selects wave Returns Team Spec: Implements
390
264
  Dispatches • agents needed Runs tests
391
265
  Monitors • skills to load Self-corrects (2 attempts)
392
- Triggers /evolve • Files Written (parallel Commits + reports back
393
- Never writes code safety check)
394
- • complexity estimate
395
- Never implements
396
- ```
397
-
398
- **Self-healing — every failure teaches the environment:**
399
-
400
- ```
401
- Build step fails →
402
- 1. Re-read the exact error (not a summary)
403
- 2. Check antipatterns.md — seen this before?
404
- 3. Try alternative approach
405
- 4. Record what failed → antipatterns.md
406
- 5. Record what worked → patterns.md
407
- Never fail silently.
266
+ Triggers /evolve • files to touch Commits + reports back
267
+ Never writes code Never implements
408
268
  ```
409
269
 
410
- **Copilot pipeline (with spec-driven workflow):**
270
+ **Self-healing:** Every failure check antipatterns.md → try alternative → record what failed → record what worked. Never fail silently.
411
271
 
412
- ```
413
- Session 0: /constitute → /spec → /clarify → /blueprint
414
- Session 1: /copilot → constitution-guard validates → M1, M2, M3 → /snapshot
415
- Session 2: /evolve → M4+M5 parallel → M6 → /analyze (ghost check) → /snapshot
416
- Session 3: /evolve → M7, M8, M9 → /snapshot
417
- Session 4: /evolve → /analyze → /audit → /ship → COPILOT_COMPLETE
418
- ```
419
-
420
- **Exit conditions:**
421
-
422
- | Condition | Exit code |
423
- |-----------|-----------|
424
- | `COPILOT_COMPLETE` in goals.md | 0 — product shipped |
425
- | Max sessions reached (default: 20) | 1 — resume with `npx azclaude-copilot .` |
426
- | All milestones blocked | 1 — needs human intervention |
272
+ **Stall detection:** If `plan.md` hash unchanged for 3 sessions → exit. Stuck milestones in-progress for 2+ sessions → injected warnings. All milestones blocked → human intervention.
427
273
 
428
274
  ---
429
275
 
430
276
  ## Parallel Execution
431
277
 
432
- AZCLAUDE runs multiple Claude Code agents simultaneously on the same codebase — without file corruption or test interference. Each agent works in an isolated git worktree on its own branch.
278
+ AZCLAUDE runs multiple Claude Code agents simultaneously — without file corruption or test interference. Each agent works in an isolated git worktree on its own branch.
433
279
 
434
280
  ```
435
281
  M1 (schema) → done
@@ -443,291 +289,232 @@ M1 (schema) → done
443
289
 
444
290
  3 sequential waves instead of 6 sequential milestones. Same output, fraction of the time.
445
291
 
446
- **Real sessionSystems Registry sprint (compliance SaaS, 5 milestones):**
292
+ ### Real caseShopFlow e-commerce sprint
447
293
 
294
+ > **Prompt:** *"Add order tracking + product review system — full parallel mode, no limits"*
295
+
296
+ **Phase 0 — Intelligence (4 agents, ~9 minutes, all parallel):**
448
297
  ```
449
- Phase 1 Intelligence (4 agents, parallel)
450
- arch-analyst → found broken auto-link bug in assess-paid/page.tsx
451
- ux-analyst identified save-to-registry conversion hole
452
- market-intel → found FRIA + Art. 49 regulatory blue ocean
453
- compliance → mapped 13 fields present vs 66 required
454
- Time: ~9 minutes. Equivalent human analyst work: full day.
455
-
456
- Phase 2 Debate synthesis (1 agent)
457
- 4 tensions resolved with verdicts. Prioritized action list produced.
458
-
459
- Phase 3 — Blueprint (3 explore agents, parallel)
460
- Read assess-paid page, systems pages, and API routes simultaneously.
461
- Produced file:line plan across 5 milestones.
462
-
463
- Phase 4 — Execution (2 agents, parallel — classifier applied)
464
- dev-frontend (M1+M2) 64.5k tokens assess-paid/page.tsx + save-registry UI
465
- dev-backend (M4) 37.5k tokens systems.py + DB migration SQL
466
-
467
- Classifier merged M1+M2 automatically — both touch assess-paid/page.tsx.
468
- M4 backend ran in parallel — completely independent file set, zero conflict risk.
298
+ ├── Explore: Codebase architecture scan (55k tokens) — found checkout/page.tsx
299
+ │ is 70% done; review POST never passes order_id to backend
300
+ ├── Explore: UX journey + conversion analysis (54k tokens) — post-purchase save-to-
301
+ │ account flow is the biggest conversion hole
302
+ ├── Agent: Competitor feature research (49k tokens) only platform without
303
+ │ inline review request after delivery; biggest gap vs Shopify/WooCommerce
304
+ └── Explore: Performance + SEO audit (51k tokens) — product schema missing
305
+ review aggregate (affects Google rich results)
469
306
  ```
470
307
 
471
- **Four-layer safety model:**
472
-
473
- ```
474
- Layer 0 — Task Classifier (blueprint, before milestones exist)
475
- Groups coupled work (same schema, config, utility module) into single milestones.
476
- Conflicts become impossible by design — before any safety checking begins.
308
+ **Debate verdicts:** Fix broken order_id link first (0.5 days) → reviews over rating-only (saves 2 sprints) → mobile-first detail page → workflow engine not static forms.
477
309
 
478
- Layer 1 — Directory check + shared-utility grep (blueprint, pre-plan)
479
- Fast, no agents spawned. Catches ~80% of remaining conflicts.
310
+ **Phase 1 — Blueprint (3 parallel reads of codebase plan approved)**
480
311
 
481
- Layer 2 — Problem-architect exact file scan (post-plan, per milestone)
482
- Returns Files Written: exact paths + Parallel Safe: YES/NO.
483
- Corrects Layer 1 when it finds shared utilities across directories.
312
+ **Phase 2 — Wave 1 (classifier merged M1+M2 shared checkout/page.tsx):**
313
+ ```
314
+ ├── M1+M2: checkout frontend order_id auto-link + "Track Order" panel (78k tokens)
315
+ │ ← MERGED by classifier: both touch checkout/page.tsx
316
+ └── M4-backend: orders.py API + DB migration (37k tokens)
317
+ ← PARALLEL: zero shared files with M1+M2
318
+ ```
484
319
 
485
- Layer 3 — Orchestrator dispatch gate (runtime, unconditional)
486
- Final overlap check before spawning. Cannot be bypassed.
320
+ **Phase 3 — Wave 2 (different file owners, all parallel):**
321
+ ```
322
+ └── M3+M4-frontend+M5: order detail page + review section + completion score (84k tokens)
487
323
  ```
488
324
 
489
- **Automatic** via `/copilot`: the orchestrator reads `Wave:` fields in plan.md, dispatches same-wave milestones with `isolation: "worktree"` in a single message, then merges sequentially.
325
+ **Result: 5 milestones shipped, 1 commit (`a3f9c1b`), 0 merge conflicts.**
490
326
 
491
- **Manual** via `/parallel M2 M3 M4 M5`: dispatch specific milestones simultaneously.
327
+ What the classifier caught: M1 and M2 were separate plan milestones but both wrote to `checkout/page.tsx` — running them as separate agents would have caused a conflict. The classifier merged them into one agent before dispatch.
492
328
 
493
- See [docs/parallel-execution.md](docs/parallel-execution.md) for the complete reference.
329
+ **On tokens:** You will notice the token counts look large. You would spend the same tokens building this sequentially — the work is identical. What changes is wall-clock time. Sequential execution: each agent waits for the previous one to finish → ~2 hours. Parallel waves: agents run simultaneously → ~15 minutes. Same total tokens. Same output. One-eighth the time.
494
330
 
495
- ### Why coordination matters
331
+ ### Four-layer safety model
496
332
 
497
- Claude Code's `isolation: "worktree"` in the Task tool is a raw primitive like `pthread_create`. You have threads, but threads alone aren't a concurrent system.
333
+ Parallel execution is safe only when agents don't write to the same files. The key insight: **Layer 0 makes conflicts impossible by design** before any safety checking begins.
498
334
 
499
- | Raw capability | AZCLAUDE coordination layer |
500
- |---|---|
501
- | Task tool spawns agents | Orchestrator decides WHEN and HOW MANY |
502
- | Worktree isolates files | Blueprint classifier ensures they're safe to isolate |
503
- | Agents can read files | Problem-architect pre-packages the exact context each needs |
504
- | Agents can write code | Patterns/antipatterns constrain what they write |
505
- | Agents can fail | Blocker recovery + /debate escalation handles the failure |
506
- | Sessions end | goals.md + checkpoints + plan.md resume exactly where it stopped |
507
- | Code accumulates | /evolve turns git evidence into new agents for next time |
335
+ | Layer | When | What |
336
+ |-------|------|------|
337
+ | **0 — Task Classifier** | `/blueprint`, before milestones exist | Groups coupled work into single milestones. Conflicts become impossible by construction. |
338
+ | **1 Directory + import check** | `/blueprint`, post-plan | Fast grep: same dirs? shared utility imports? |
339
+ | **2 problem-architect file scan** | Post-plan, per milestone | Returns exact `Files Written:` paths + `Parallel Safe: YES/NO` |
340
+ | **3 Orchestrator dispatch gate** | Runtime, final | Overlap check before spawning. Cannot be bypassed. |
508
341
 
509
- 6 desks is not a team. AZCLAUDE turns 6 desks into a coordinated team.
342
+ ### The engine analogy
510
343
 
511
- ---
344
+ Claude Code's `isolation: "worktree"` in the Task tool is a raw primitive — like `pthread_create`. You have threads, but threads alone aren't a concurrent system.
512
345
 
513
- ## Security
346
+ | Without AZCLAUDE | With AZCLAUDE |
347
+ |------------------|---------------|
348
+ | Which tasks to parallelize? | **Task Classifier** — groups coupled work, splits independent work |
349
+ | Is it safe to parallelize? | **Four-layer safety** — classifier + dir check + file scan + dispatch gate |
350
+ | What context does each agent need? | **Problem-Architect** — builds full Team Spec per milestone |
351
+ | What conventions to follow? | **patterns.md / antipatterns.md** — injected automatically |
352
+ | What if one agent fails? | **Blocker recovery + /debate escalation** |
353
+ | What happens when the session ends? | **goals.md + checkpoints + plan.md** — resumes exactly |
354
+ | How do we improve over time? | **/evolve** — new agents from git evidence every 3 milestones |
514
355
 
515
- Zero dependencies in `package.json`. The only external binary is `claude` (installed separately). No supply-chain risk.
356
+ **Claude Code is the engine. AZCLAUDE is the transmission, the steering, and the GPS the system that makes those cylinders produce coordinated forward motion instead of random spinning.**
516
357
 
517
- **4 enforcement points, always active:**
358
+ See [docs/parallel-execution.md](docs/parallel-execution.md) for the complete reference (merge protocol, conflict resolution, worktree isolation rules).
518
359
 
519
- | Layer | Where | What it blocks |
520
- |-------|-------|----------------|
521
- | Secret blocking | `pre-tool-use.js` — before every write | `AKIA*`, `sk-*`, `ghp_*`, `glpat-*`, `xoxb-*`, `-----BEGIN PRIVATE KEY` |
522
- | Prompt injection | `user-prompt.js` — before context injection | `curl\|bash`, `ignore previous instructions`, base64 payloads |
523
- | Pre-ship scan | `/ship` — before every commit | Secrets in staged files, failing tests, IDE errors |
524
- | Agent scoping | All review agents | Reviewer/auditor agents are read-only — no Write/Edit permissions |
360
+ ---
525
361
 
526
- ### `/sentinel` — on-demand security scan
362
+ ## Memory System
527
363
 
528
- ```bash
529
- /sentinel # full scan — 5 layers, 102 rules, scored 0–100 (grade A–F)
530
- /sentinel --hooks # hook integrity + permissions only
531
- /sentinel --secrets # credential scan only
532
- ```
364
+ Two things at session start — **what changed** and **why decisions were made**. Everything else is noise.
533
365
 
366
+ **Automatic (zero user input):**
534
367
  ```
535
- ╔══════════════════════════════════════════════════╗
536
- ║ SENTINEL Environment Security ║
537
- ╚══════════════════════════════════════════════════╝
538
- Layer 1 Hook Integrity 25/25 ✓ verified
539
- Layer 2 — Permission Audit 12/20 ⚠ Bash(rm:*) too broad
540
- Layer 3 — MCP Server Scan 20/20 ✓ clean
541
- Layer 4 — Agent Config Review 15/15 ✓ no injection found
542
- Layer 5 — Secrets Scan 18/20 ⚠ API key in settings
543
- ──────────────────────────────────────────────────
544
- Total: 90/100 Grade: A Verdict: CLEAR
368
+ Every edit: post-tool-use.js → breadcrumb in goals.md
369
+ Before write: pre-tool-use.js → blocks hardcoded secrets
370
+ Session end: stop.js → In-progress → Done, trims to 20 entries
371
+ Session start: user-prompt.js → injects goals.md + checkpoint + plan status
545
372
  ```
373
+ Token cost: ~500 tokens fixed. Auto-rotates at 30 entries (oldest 15 archived to `sessions/`) — same cost at session 5 or session 500.
546
374
 
547
- Any hardcoded secret `BLOCKED`. `/ship` will not proceed until resolved. See [SECURITY.md](SECURITY.md) for full details.
375
+ **Manual:** `/snapshot` (save reasoning), `/persist` (end-of-session), `/pulse` (health check).
548
376
 
549
377
  ---
550
378
 
551
- ## MCP Integration
379
+ ## Self-Improving Loop
552
380
 
553
- AZCLAUDE recommends MCP servers based on your stack and wires them into daily-use commands.
381
+ AZCLAUDE doesn't just remember it learns and corrects itself:
554
382
 
555
- ```bash
556
- /mcp # detect stack → recommend → show install commands
557
383
  ```
384
+ /reflect → Finds missing rules, dead rules, contradictions in CLAUDE.md
385
+ Proposes exact edits. You approve. CLAUDE.md corrects itself.
558
386
 
559
- **Universal (free, no API key):** `Context7` (live library docs before writing code), `Sequential Thinking` (iterative reasoning for planning).
387
+ /reflexes → Finds repeating tool sequences from observations
388
+ Creates confidence-scored reflexes (0.3 tentative → 0.9 certain)
560
389
 
561
- **Stack-specific:** `GitHub MCP`, `Playwright MCP`, `Supabase MCP`, `Brave Search`.
390
+ /evolve → Detects gaps generates fixes quality-gates everything
391
+ Creates agents from git evidence (not guessing)
392
+ Score delta: 42/100 → 68/100 in one cycle
393
+ ```
562
394
 
563
395
  ---
564
396
 
565
- ## Intelligence Layer
566
-
567
- ### 10 Skills (auto-invoked)
568
-
569
- | Skill | Triggers on |
570
- |-------|------------|
571
- | `session-guard` | Session start, context reset, idle detection |
572
- | `test-first` | Writing/fixing code in TDD projects |
573
- | `env-scanner` | Project setup, stack detection |
574
- | `security` | Credentials, auth, payments, secrets |
575
- | `debate` | Decisions, trade-offs, architecture comparisons |
576
- | `skill-creator` | Repeated workflows, new capability needed |
577
- | `agent-creator` | Agent boundaries, 5-layer structure |
578
- | `architecture-advisor` | DB choice, rendering strategy, testing approach — by project scale |
579
- | `frontend-design` | UI components, styling, layout decisions |
580
- | `mcp` | MCP server recommendations based on stack |
581
-
582
- ### Architecture Advisor — 8 Decision Matrices
583
-
584
- Not "which is popular" — which is right for **your project's scale**:
585
-
586
- | Decision | SMALL | MEDIUM | LARGE |
587
- |----------|-------|--------|-------|
588
- | Architecture | Flat modules | Modular monolith | Monolith + targeted microservices |
589
- | Database | SQLite | PostgreSQL | PostgreSQL + Redis + search |
590
- | Testing | Test-after critical paths | TDD for business logic | Full TDD |
591
- | API | tRPC (internal) | REST | REST + GraphQL (mobile) |
592
- | Auth | Clerk / Supabase | Auth0 | Keycloak (self-hosted) |
593
- | Deploy | Vercel / Railway | Managed containers | AWS/GCP with IaC |
594
-
595
- Every recommendation includes the **threshold where it changes** and the **anti-pattern** to avoid.
596
-
597
- ### Domain Advisors — Auto-Generated for 7 Domains
598
-
599
- When `/dream` or `/setup` detects a non-developer domain, a domain-specific advisor skill is generated automatically:
600
-
601
- | Domain | What gets generated |
602
- |--------|-------------------|
603
- | Compliance | Regulation mapping, evidence strategy, article-level traceability |
604
- | Finance | Event-sourced data model, integer-cents precision, reconciliation |
605
- | Medical | FHIR vs HL7, HIPAA vs GDPR, clinical workflow |
606
- | Marketing | Channel strategy, funnel design, pricing model |
607
- | Research | Literature scope, methodology, statistical rigor |
608
- | Legal | Contract structure, clause tracking, risk classification |
609
- | Logistics | Routing, inventory model, tracking granularity |
610
-
611
- ### Reflexes — Learned Behavioral Patterns
612
-
613
- Every tool use is observed. Patterns that repeat become reflexes:
614
-
615
- ```yaml
616
- id: i18n-all-6-locales
617
- trigger: "any src/messages/*.json file is edited"
618
- action: "edit all 6 locale files in the same operation — never fewer"
619
- confidence: 0.85 # 0.3 tentative → 0.9 certain
620
- evidence_count: 6
621
- ```
397
+ ## Security
398
+
399
+ Zero dependencies in `package.json`. No supply-chain risk.
400
+
401
+ | Layer | Where | What it blocks |
402
+ |-------|-------|----------------|
403
+ | Secret blocking | `pre-tool-use.js` | `AKIA*`, `sk-*`, `ghp_*`, private keys |
404
+ | Prompt injection | `user-prompt.js` | `curl|bash`, `ignore previous instructions` |
405
+ | Pre-ship scan | `/ship` | Secrets in staged files, failing tests |
406
+ | Agent scoping | Review agents | Read-only — no Write/Edit permissions |
622
407
 
623
- - 3+ occurrences creates a reflex at confidence 0.3
624
- - Confidence rises with confirming observations, decays -0.02/week without use
625
- - Strong clusters evolve into skills or agents via `/evolve`
626
- - Global promotion when seen in 2+ projects at confidence ≥ 0.8
408
+ `/sentinel` — on-demand 5-layer, 111-rule security scan, scored 0–100 (grade A–F).
627
409
 
628
410
  ---
629
411
 
630
412
  ## All 39 Commands
631
413
 
632
- ### Build and Ship
414
+ AZCLAUDE commands are divided into four tiers of intelligence.
633
415
 
634
- | Command | What it does |
635
- |---------|-------------|
636
- | `/copilot` | Autonomous milestone execution. Delegates to orchestrator team. Zero human input. |
637
- | `/dream` | Idea full project scaffold. CLAUDE.md, memory, skills, agents — level by level. |
638
- | `/setup` | Analyze existing project. Detect domain + stack + scale. Build environment. |
639
- | `/add` | Add a feature. Pre-analyzes scope via intelligent-dispatch before touching code. |
640
- | `/fix` | REPRODUCE INVESTIGATE HYPOTHESIZE FIX. Show passing tests. |
641
- | `/audit` | Spec-first code review (read-only). Ghost milestone check. |
642
- | `/test` | Framework detection, exit-code gate, failure classification. |
643
- | `/blueprint` | Read-only analysis structured plan.md. Task classifier + parallel optimization. |
644
- | `/ship` | Ghost check risk scan tests → secrets scan → commit → push → deploy. |
645
- | `/refactor` | Safe restructuring. Constitution pre-flight. Tests before + after. |
646
- | `/doc` | Generate docs from code. Matches existing style. |
647
- | `/migrate` | Upgrade deps/frameworks. Researches breaking changes. |
648
- | `/deps` | Audit: outdated, vulnerable, unused packages. |
649
-
650
- ### Spec-Driven Development
651
-
652
- | Command | What it does |
653
- |---------|-------------|
654
- | `/constitute` | Define ground rules. Non-negotiables, required patterns, definition of done. |
655
- | `/spec` | Structured spec: user stories, acceptance criteria, out-of-scope, failure modes. |
656
- | `/clarify` | Resolve open questions in a spec (max 5 questions). |
657
- | `/analyze` | Cross-artifact consistency. Ghost milestones, spec drift, plan gaps. |
658
- | `/tasks` | Dependency graph from plan.md. Wave groups + critical path. |
659
- | `/issues` | Convert milestones to GitHub Issues with traceability. |
660
- | `/parallel` | Run multiple milestones simultaneously. Worktree isolation + auto-merge. |
661
- | `/mcp` | Recommend and install MCP servers for your stack. |
662
- | `/driven` | Generate code-rules.md — DO/DO NOT coding contract. |
663
- | `/verify` | Audit code against code-rules.md. Reports violations at `file:line`. |
664
- | `/inoculate` | Scan agents/skills for context inoculation coverage. Based on Anthropic's misalignment paper. |
665
- | `/ghost-test` | Detect reward hacking in test suites (AlwaysEqual, sys.exit bypass, framework patching). |
666
-
667
- ### Think and Improve
668
-
669
- | Command | What it does |
670
- |---------|-------------|
671
- | `/debate` | Adversarial debate with evidence scoring (AceMAD protocol). |
672
- | `/evolve` | Detect gaps → generate fixes → quality-gate → create agents from evidence. |
673
- | `/sentinel` | Security scan — 5 layers, 102 rules, scored 0–100 (grade A–F). |
674
- | `/reflexes` | View, analyze, promote learned behavioral patterns. |
675
- | `/reflect` | Self-improve CLAUDE.md from friction logs + session history. |
676
- | `/level-up` | Show current level (0–10), build the next one. |
677
- | `/find` | Search across commands, `~/shared-skills/`, capabilities. |
678
- | `/create` | Build a new command with frontmatter and tests. |
679
- | `/hookify` | Generate hooks from friction patterns. 5 hook types. |
680
-
681
- ### Memory and Session
682
-
683
- | Command | What it does |
684
- |---------|-------------|
685
- | `/snapshot` | Save WHY you made decisions. Auto-injected next session. |
686
- | `/persist` | End-of-session: update goals.md, write session narrative. |
687
- | `/pulse` | Health check — recent changes, level, reflexes, blockers. |
688
- | `/explain` | Code or error to plain language. |
689
- | `/loop` | Repeat any command on an interval via CronCreate. |
416
+ ### 1. Build & Core
417
+ | Command | Purpose |
418
+ |---------|---------|
419
+ | `/copilot` | **Autonomous Mode.** Zero-human-input milestone execution. |
420
+ | `/dream` | **Greenfield.** High-fidelity project generation from a single idea. |
421
+ | `/setup` | **Environment Scan.** Detects stack/domain and builds the initial agent team. |
422
+ | `/add` | **Feature Addition.** Intelligent pre-flight + implementation of new logic. |
423
+ | `/fix` | **Bug Resolution.** 4-phase mandatory-repro/hypothesize/verify loop. |
424
+ | `/ship` | **Release Gate.** Ghost check security scan → tests → commit → push. |
425
+ | `/refactor` | **Structural Shift.** Dependency-aware code restructuring. |
426
+ | `/test` | **Smart Testing.** Framework detection + failure classification. |
427
+ | `/blueprint` | **Strategic Planning.** Read-only analysis multi-milestone path. |
428
+ | `/migrate` | **Upgrades.** Safe dependency/framework version transitions. |
429
+ | `/doc` | **Documentation.** Code-to-markdown generation with signature detection. |
430
+
431
+ ### 2. Spec-Driven Tier
432
+ | Command | Purpose |
433
+ |---------|---------|
434
+ | `/constitute` | **Ground Rules.** Define non-negotiables before planning. |
435
+ | `/spec` | **Requirements.** Structured goals/user-stories/ACs before code. |
436
+ | `/clarify` | **Interrogation.** 5-question loop to resolve vague requirements. |
437
+ | `/analyze` | **Consistency.** Detects ghost milestones and plan drift. |
438
+ | `/tasks` | **Wave Groups.** Builds parallelizable dependency graphs. |
439
+ | `/issues` | **GitHub sync.** Converts plan milestones to tracked issues. |
440
+ | `/parallel` | **Concurrent Exec.** Runs milestones in isolated worktrees. |
441
+ | `/driven` | **Coding Contract.** Stack-specific DO/DO NOT rules. |
442
+ | `/verify` | **Compliance.** Audits code against the coding contract. |
443
+ | `/sentinel` | **Security.** 111-rule, 6-layer deep environment scan. |
444
+
445
+ ### 3. Intelligence & Evolution
446
+ | Command | Purpose |
447
+ |---------|---------|
448
+ | `/debate` | **Decision Protocol.** Evidence-tagged adversarial reasoning. |
449
+ | `/evolve` | **Self-Improvement.** Scans for gaps → fixes them → quality-gates. |
450
+ | `/reflexes` | **Behavioral Learning.** Manages confidence-scored tool patterns. |
451
+ | `/reflect` | **Metacognition.** Re-writes its own rules from friction logs. |
452
+ | `/level-up` | **Capabilities.** Visual checklist to build the next capability level. |
453
+
454
+ ### 4. Memory & Utilities
455
+ | Command | Purpose |
456
+ |---------|---------|
457
+ | `/snapshot` | **Reasoning Checkpoint.** Saves tech-lead context mid-session. |
458
+ | `/persist` | **Session Closure.** Goals + friction log session archive. |
459
+ | `/pulse` | **Health Check.** Quick overview of git, health, and next steps. |
460
+ | `/explain` | **Plain Language.** Step-by-step logic breakdown. |
461
+ | `/loop` | **Automation.** Schedule commands on a cron interval. |
462
+ | `/mcp` | **Stack Scaling.** Tailored external MCP recommendations. |
690
463
 
691
464
  ---
692
465
 
693
- ## Skills vs Agents — The Right Tool
466
+ ## The 15 Expert Agents
467
+
468
+ AZCLAUDE dispatches specialists. Every agent has a 5-layer definition (Persona, Scope, Tools, Constraints, Domain).
469
+
470
+ | Agent | Purpose |
471
+ |-------|---------|
472
+ | **orchestrator** | Tech Lead. Reads constitution, manages milestone dispatch. |
473
+ | **problem-architect** | Strategy. Analyzes milestones, returns Team Spec + risks. |
474
+ | **milestone-builder** | Implementation. Reads non-negotiables, builds, verifies. |
475
+ | **orchestrator-init** | Initialization. Fills CLAUDE.md and goals.md on session 0. |
476
+ | **spec-reviewer** | Gatekeeper. Validates spec quality before planning begins. |
477
+ | **constitution-guard** | Compliance. Blocks milestones that violate project rules. |
478
+ | **code-reviewer** | Quality. Spec-first review with Distrust-in-Review logic. |
479
+ | **test-writer** | Verification. Matches framework/style to write robust tests. |
480
+ | **security-auditor** | Hardening. 111-rule scan for exfiltration and secrets. |
481
+ | **devops-engineer** | Infrastructure. CI/CD, Docker, deployment configuration. |
482
+ | **qa-engineer** | Quality Assurance. E2E tests, release readiness, risk coverage. |
483
+ | **loop-controller** | Level 10. Autonomous environment evolution. |
484
+ | **cc-template-author** | Maintenance. Core AZCLAUDE template development. |
485
+ | **cc-cli-integrator** | CLI Routing. Wires commands, agents, and skills into bin/cli.js. |
486
+ | **cc-test-maintainer** | Test Suite. Keeps test-features.sh in sync with all templates. |
694
487
 
695
- ### Skills: project-specific guidance
488
+ ---
696
489
 
697
- A skill fires automatically when Claude needs context it can't derive from code alone. The best skill answers: **"In this project, when doing X, what do you need to know that you can't read from the files?"**
490
+ ## Auto-Invoked Skills
698
491
 
699
- Skills are NOT generic instructions Claude already knows ("write clean code"). Skills ARE project-specific knowledge: "Our auth uses RS256 not HS256 — here's why" or "The 6 locale files must always be edited atomically."
492
+ Skills fire automatically based on context—no commands needed.
700
493
 
701
- ### Agents: only for parallelism and isolation
494
+ | Skill | Triggers on | Purpose |
495
+ |-------|-------------|---------|
496
+ | **session-guard** | Session Start | Context reset and idle detection. |
497
+ | **test-first** | Implementation | Enforces TDD in designated projects. |
498
+ | **env-scanner** | Startup | Infrastructure and stack auto-analysis. |
499
+ | **debate** | Decisions | Triggers adversarial reasoning for trade-offs. |
500
+ | **security** | Sensitive code | Flags credentials, auth, and secret handling. |
501
+ | **skill-creator** | New patterns | Generates new skills for repeated workflows. |
502
+ | **agent-creator** | Scaling | Builds new agents from co-change evidence. |
503
+ | **architecture-advisor**| Big decisions| Pattern selection by project scale. |
504
+ | **frontend-design** | UI work | 12 aesthetic directions + premium design system. |
702
505
 
703
- An agent is a sub-process. Use one when work must happen **in parallel** or **in a separate context**. Not for organizing knowledge — skills do that cheaper.
506
+ ---
704
507
 
705
- **The test:** Would removing this agent and writing a skill produce worse results? If no — use a skill.
508
+ ## Capability Manifest (48 Modules)
706
509
 
707
- ```
708
- 1. Craft a skill for the project-specific context Claude is missing
709
- 2. Watch if the same workflow keeps recurring (/reflexes will detect it)
710
- 3. If work can be parallelized or isolated → promote to an agent
711
- 4. Let /evolve make the call from git evidence
712
- ```
510
+ AZCLAUDE is a lazy-loaded environment of 48 capability modules. It only loads what the task needs, keeping context costs at ~380 tokens.
713
511
 
714
- ---
512
+ - **Shared Intelligence:** `debate.md`, `evidence.md`, `decision-log.md`
513
+ - **Execution:** `parallel-coordination.md`, `worktree-isolation.md`, `merge-protocol.md`
514
+ - **Evolution:** `environment-growth.md`, `topology-optimization.md`, `reflex-analysis.md`
515
+ - **Security:** `sentinel-layers.md`, `exfiltration-blocking.md`, `secret-patterns.md`
715
516
 
716
- ## Progressive Levels (0–10)
717
-
718
- | Level | What gets built | Trigger |
719
- |-------|----------------|---------|
720
- | 0 | Nothing yet | Fresh project |
721
- | 1 | CLAUDE.md — project rules + dispatch | `/setup` or `/dream` |
722
- | 2 | MCP config | `/level-up` |
723
- | 3 | Skills — project-specific commands | `/setup` generates ≥ 2 |
724
- | 4 | Memory — goals.md, patterns, antipatterns | `/setup` |
725
- | 5 | Agents — from git co-change analysis | `/evolve` after 5+ commits |
726
- | 6 | Hooks — stateful session tracking | `npx azclaude-copilot` |
727
- | 7 | External MCP servers | `/level-up` |
728
- | 8 | Orchestrated pipeline — multi-agent | `/level-up` |
729
- | 9 | Intelligence — debate, OPRO, ELO | `npx azclaude-copilot` |
730
- | 10 | Self-evolving — loop-controller | `/evolve` sustained |
517
+ [Full technical documentation →](DOCS.md)
731
518
 
732
519
  ---
733
520
 
@@ -735,34 +522,31 @@ An agent is a sub-process. Use one when work must happen **in parallel** or **in
735
522
 
736
523
  | | Claude Code alone | AZCLAUDE |
737
524
  |---|---|---|
525
+ | Architecture | Generic chat + tools | Native Markdown + hooks — zero MCP tax, zero IPC, zero token overhead |
738
526
  | Project memory | Starts fresh every session | goals.md + checkpoints injected automatically |
739
527
  | Conventions | Re-explained each time | CLAUDE.md — loaded before every task |
740
528
  | Mid-session reasoning | Lost on compaction | /snapshot saves WHY — auto-injected next session |
741
529
  | Learned behavior | None | Reflexes from tool-use, confidence-scored |
742
- | CLAUDE.md quality | Drifts, never updated | /reflect finds and fixes stale rules |
743
530
  | Architecture decisions | Re-debated every time | decisions.md — logged once, referenced forever |
744
531
  | Failed approaches | Repeated | antipatterns.md — agents read before implementing |
745
532
  | Security | Manual | 4-layer enforcement: write-time blocking + audit + pre-ship |
746
533
  | Agent specialization | None | Project agents emerge from git evidence |
747
534
  | Autonomous building | Not possible | /copilot — three-tier intelligent team |
535
+ | Parallel execution | Raw worktree primitive | Four-layer classifier + safety model |
748
536
  | Self-improvement | Not possible | /evolve + /reflect + /reflexes loop |
749
- | Requirements traceability | None | /spec → acceptance criteria → every milestone |
750
537
  | Governance | None | constitution-guard blocks non-compliant milestones |
751
- | Plan drift | Invisible | /analyze catches ghost milestones before they ship |
752
- | Parallel safety | Raw worktree primitive | Four-layer classifier + safety model |
753
538
  | Any stack | Yes | Yes |
754
- | You own the code | Yes | Yes |
755
539
  | Zero dependencies | — | Yes (0 in package.json) |
756
540
 
757
541
  ---
758
542
 
759
543
  ## Verified
760
544
 
761
- 1788 tests. Every template, command, capability, agent, hook, and CLI feature verified.
545
+ 1794 tests. Every template, command, capability, agent, hook, and CLI feature verified.
762
546
 
763
547
  ```bash
764
548
  bash tests/test-features.sh
765
- # Results: 1788 passed, 0 failed, 1788 total
549
+ # Results: 1794 passed, 0 failed, 1794 total
766
550
  ```
767
551
 
768
552
  ---
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "azclaude-copilot",
3
- "version": "0.5.5",
3
+ "version": "0.5.6",
4
4
  "description": "AI coding environment — 39 commands, 10 skills, 15 agents, memory, reflexes, evolution. Install: npx azclaude-copilot@latest, then open Claude Code.",
5
5
  "bin": {
6
6
  "azclaude": "bin/cli.js",
@@ -71,12 +71,26 @@ try {
71
71
  } else {
72
72
  const p = promptText.toLowerCase();
73
73
 
74
- // ── Detect if this is a QUESTION-ONLY message (no action needed) ──
75
- // Only skip if the message is PURELY a question with no action verb
74
+ // ── Tier classification 3 levels of routing ───────────────────────────
75
+ // TIER 0: Pure question skip pipeline entirely (explain, define, show me)
76
+ // TIER 1: Analysis/discussion — load skills only, skip problem-architect
77
+ // ("is this good?", "do we need to?", "should we?", "verify this")
78
+ // TIER 2: Implementation — full pipeline with problem-architect blocking
79
+ // ("build X", "fix X", "create X", "add X", "deploy X")
76
80
  const isQuestionOnly = /^(what|how|why|where|when|who|can you explain|show me|tell me|do you know)\b/.test(p.trim())
77
- && !/\b(build|add|create|implement|fix|refactor|deploy|test|review|write|make|change|update|modify|remove|delete|move|rename|install|setup|configure|migrate)\b/.test(p);
81
+ && !/\b(build|add|create|implement|fix|refactor|deploy|write|make|change|update|modify|remove|delete|move|rename|install|configure|migrate)\b/.test(p);
78
82
 
79
- if (!isQuestionOnly) {
83
+ // Discussion framing — "do we need to implement X" is NOT the same as "implement X"
84
+ const isDiscussion = /\b(do we|should we|would we|need to implement|want to|could we|thinking about|wondering if|considering|is this|verify|is it|does it|did you|did we)\b/.test(p)
85
+ && !/^(yes|ok|sure|go ahead|let'?s|actually implement|actually build|actually fix)\b/.test(p.trim());
86
+
87
+ // Concrete implementation signals — requires file writes, not just reasoning
88
+ const isImplementation = /\b(build|add|create|implement|fix|deploy|migrate|refactor|write|make|change|update|modify|remove|delete|install|configure|rename|setup)\b/.test(p)
89
+ && !isDiscussion;
90
+
91
+ const tier = isQuestionOnly ? 0 : isImplementation ? 2 : 1;
92
+
93
+ if (tier > 0) {
80
94
  const agentsDir = path.join(cfg, 'agents');
81
95
  const skillsDir = path.join(cfg, 'skills');
82
96
  const hasAgents = fs.existsSync(agentsDir);
@@ -100,15 +114,16 @@ try {
100
114
  if (intents.length === 0) intents.push('CODE');
101
115
 
102
116
  // ── Build the MANDATORY pipeline ──
117
+ const tierLabel = tier === 2 ? 'IMPLEMENT' : 'ANALYZE';
103
118
  console.log('');
104
119
  console.log('--- AZCLAUDE PIPELINE (MANDATORY) ---');
105
- console.log('Detected: ' + intents.join(' + '));
120
+ console.log('Detected: ' + intents.join(' + ') + ' | Tier: ' + tierLabel + (tier === 1 ? ' (skip problem-architect — load skills + reason directly)' : ''));
106
121
  console.log('');
107
122
 
108
- // ── STEP 1: problem-architect ALWAYS runs first ──
109
- // This is the AZCLAUDE brainit decides which agents, skills, files to use.
110
- // NO exceptions. NO "skip if small task". ALWAYS run pre-flight.
111
- if (agentExists('problem-architect')) {
123
+ // ── STEP 1: problem-architect TIER 2 only (concrete implementation tasks) ──
124
+ // Tier 1 (analysis/discussion) skips thisClaude reasons directly with loaded skills.
125
+ // Tier 2 (build/fix/create/deploy) always runs pre-flight no exceptions.
126
+ if (tier === 2 && agentExists('problem-architect')) {
112
127
  console.log('STEP 1 — PRE-FLIGHT (BLOCKING):');
113
128
  console.log(' Spawn Agent(subagent_type="problem-architect") with this prompt:');
114
129
  console.log(' "Task: [user\'s request]');