@wazir-dev/cli 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +73 -4
- package/README.md +6 -6
- package/docs/concepts/architecture.md +1 -1
- package/docs/concepts/roles-and-workflows.md +2 -0
- package/docs/concepts/why-wazir.md +59 -0
- package/docs/decisions/2026-03-19-deferred-items.md +564 -0
- package/docs/decisions/2026-03-19-enhancement-decisions.md +300 -0
- package/docs/readmes/INDEX.md +21 -5
- package/docs/readmes/features/expertise/README.md +2 -2
- package/docs/readmes/features/exports/README.md +2 -2
- package/docs/readmes/features/schemas/README.md +3 -0
- package/docs/readmes/features/skills/README.md +17 -0
- package/docs/readmes/features/skills/clarifier.md +5 -0
- package/docs/readmes/features/skills/claude-cli.md +5 -0
- package/docs/readmes/features/skills/codex-cli.md +5 -0
- package/docs/readmes/features/skills/dispatching-parallel-agents.md +5 -0
- package/docs/readmes/features/skills/executing-plans.md +5 -0
- package/docs/readmes/features/skills/executor.md +5 -0
- package/docs/readmes/features/skills/finishing-a-development-branch.md +5 -0
- package/docs/readmes/features/skills/gemini-cli.md +5 -0
- package/docs/readmes/features/skills/humanize.md +5 -0
- package/docs/readmes/features/skills/init-pipeline.md +5 -0
- package/docs/readmes/features/skills/receiving-code-review.md +5 -0
- package/docs/readmes/features/skills/requesting-code-review.md +5 -0
- package/docs/readmes/features/skills/reviewer.md +5 -0
- package/docs/readmes/features/skills/subagent-driven-development.md +5 -0
- package/docs/readmes/features/skills/using-git-worktrees.md +5 -0
- package/docs/readmes/features/skills/wazir.md +5 -0
- package/docs/readmes/features/skills/writing-skills.md +5 -0
- package/docs/readmes/features/workflows/prepare-next.md +1 -1
- package/docs/reference/configuration-reference.md +47 -6
- package/docs/reference/launch-checklist.md +4 -4
- package/docs/reference/review-loop-pattern.md +117 -8
- package/docs/reference/roles-reference.md +1 -0
- package/docs/reference/skill-tiers.md +147 -0
- package/docs/reference/tooling-cli.md +3 -1
- package/docs/truth-claims.yaml +12 -0
- package/expertise/antipatterns/process/ai-coding-antipatterns.md +97 -1
- package/exports/hosts/claude/.claude/settings.json +9 -0
- package/exports/hosts/claude/CLAUDE.md +1 -1
- package/exports/hosts/claude/export.manifest.json +4 -2
- package/exports/hosts/claude/host-package.json +3 -1
- package/exports/hosts/codex/AGENTS.md +1 -1
- package/exports/hosts/codex/export.manifest.json +4 -2
- package/exports/hosts/codex/host-package.json +3 -1
- package/exports/hosts/cursor/.cursor/hooks.json +4 -0
- package/exports/hosts/cursor/.cursor/rules/wazir-core.mdc +1 -1
- package/exports/hosts/cursor/export.manifest.json +4 -2
- package/exports/hosts/cursor/host-package.json +3 -1
- package/exports/hosts/gemini/GEMINI.md +1 -1
- package/exports/hosts/gemini/export.manifest.json +4 -2
- package/exports/hosts/gemini/host-package.json +3 -1
- package/hooks/context-mode-router +191 -0
- package/hooks/definitions/context_mode_router.yaml +19 -0
- package/hooks/hooks.json +31 -6
- package/hooks/protected-path-write-guard +8 -0
- package/hooks/routing-matrix.json +45 -0
- package/hooks/session-start +62 -1
- package/llms-full.txt +905 -132
- package/package.json +2 -3
- package/schemas/hook.schema.json +2 -1
- package/schemas/phase-report.schema.json +80 -0
- package/schemas/usage.schema.json +25 -1
- package/schemas/wazir-manifest.schema.json +19 -0
- package/skills/brainstorming/SKILL.md +18 -155
- package/skills/clarifier/SKILL.md +122 -98
- package/skills/claude-cli/SKILL.md +320 -0
- package/skills/codex-cli/SKILL.md +260 -0
- package/skills/debugging/SKILL.md +13 -0
- package/skills/design/SKILL.md +13 -0
- package/skills/dispatching-parallel-agents/SKILL.md +13 -0
- package/skills/executing-plans/SKILL.md +13 -0
- package/skills/executor/SKILL.md +72 -19
- package/skills/finishing-a-development-branch/SKILL.md +13 -0
- package/skills/gemini-cli/SKILL.md +260 -0
- package/skills/humanize/SKILL.md +13 -0
- package/skills/init-pipeline/SKILL.md +73 -164
- package/skills/prepare-next/SKILL.md +81 -10
- package/skills/receiving-code-review/SKILL.md +13 -0
- package/skills/requesting-code-review/SKILL.md +13 -0
- package/skills/reviewer/SKILL.md +287 -15
- package/skills/run-audit/SKILL.md +13 -0
- package/skills/scan-project/SKILL.md +13 -0
- package/skills/self-audit/SKILL.md +197 -16
- package/skills/subagent-driven-development/SKILL.md +13 -0
- package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +2 -0
- package/skills/subagent-driven-development/implementer-prompt.md +8 -0
- package/skills/subagent-driven-development/spec-reviewer-prompt.md +7 -0
- package/skills/tdd/SKILL.md +13 -0
- package/skills/using-git-worktrees/SKILL.md +13 -0
- package/skills/using-skills/SKILL.md +13 -0
- package/skills/verification/SKILL.md +13 -0
- package/skills/wazir/SKILL.md +194 -377
- package/skills/writing-plans/SKILL.md +14 -1
- package/skills/writing-skills/SKILL.md +13 -0
- package/templates/artifacts/implementation-plan.md +3 -0
- package/templates/artifacts/tasks-template.md +133 -0
- package/templates/examples/phase-report.example.json +48 -0
- package/tooling/src/adapters/composition-engine.js +256 -0
- package/tooling/src/adapters/model-router.js +84 -0
- package/tooling/src/capture/command.js +24 -1
- package/tooling/src/capture/run-config.js +3 -1
- package/tooling/src/capture/store.js +24 -0
- package/tooling/src/capture/usage.js +106 -0
- package/tooling/src/checks/ac-matrix.js +256 -0
- package/tooling/src/checks/command-registry.js +12 -0
- package/tooling/src/checks/docs-truth.js +1 -1
- package/tooling/src/checks/skills.js +111 -0
- package/tooling/src/cli.js +9 -0
- package/tooling/src/commands/stats.js +161 -0
- package/tooling/src/commands/validate.js +5 -1
- package/tooling/src/export/compiler.js +33 -37
- package/tooling/src/gating/agent.js +145 -0
- package/tooling/src/guards/phase-prerequisite-guard.js +127 -0
- package/tooling/src/hooks/routing-logic.js +69 -0
- package/tooling/src/init/auto-detect.js +260 -0
- package/tooling/src/init/command.js +95 -135
- package/tooling/src/input/scanner.js +46 -0
- package/tooling/src/reports/command.js +103 -0
- package/tooling/src/reports/phase-report.js +323 -0
- package/tooling/src/state/command.js +160 -0
- package/tooling/src/state/db.js +287 -0
- package/tooling/src/status/command.js +53 -1
- package/wazir.manifest.yaml +26 -14
|
@@ -26,7 +26,7 @@ Submit pull requests to these curated lists (one PR per list, follow each repo's
|
|
|
26
26
|
### awesome-claude-code
|
|
27
27
|
- **Repo:** `github.com/anthropics/awesome-claude-code` (or the most-starred community fork)
|
|
28
28
|
- **Section:** Tools / Plugins / Extensions
|
|
29
|
-
- **Entry format:** `[Wazir](https://github.com/MohamedAbdallah-14/Wazir) - Host-native engineering OS kit with 10 roles,
|
|
29
|
+
- **Entry format:** `[Wazir](https://github.com/MohamedAbdallah-14/Wazir) - Host-native engineering OS kit with 10 roles, 4 phases (15 workflows), and 268 expertise modules.`
|
|
30
30
|
- **Tips:** Keep the description under 120 characters. Link directly to the repo.
|
|
31
31
|
|
|
32
32
|
### awesome-ai-agents
|
|
@@ -56,7 +56,7 @@ Show HN: Wazir – Engineering OS kit for AI coding agents (Claude, Codex, Gemin
|
|
|
56
56
|
### First comment
|
|
57
57
|
Post a comment immediately after submission explaining:
|
|
58
58
|
1. What problem Wazir solves (AI agents lack structured engineering workflows)
|
|
59
|
-
2. How it works (10 canonical roles, 14-phase pipeline,
|
|
59
|
+
2. How it works (10 canonical roles, 14-phase pipeline, 268 expertise modules)
|
|
60
60
|
3. What makes it different (host-native, works across Claude/Codex/Gemini/Cursor)
|
|
61
61
|
4. Quick install: `npx @wazir-dev/cli init`
|
|
62
62
|
5. Invite feedback -- HN readers appreciate genuine requests for input
|
|
@@ -75,7 +75,7 @@ Post a comment immediately after submission explaining:
|
|
|
75
75
|
**Title:** "How I Built an Engineering OS for AI Coding Agents"
|
|
76
76
|
|
|
77
77
|
1. **Hook** -- The problem: AI agents write code but lack engineering discipline.
|
|
78
|
-
2. **Architecture overview** -- 10 roles,
|
|
78
|
+
2. **Architecture overview** -- 10 roles, 4 phases (15 workflows), expertise modules, quality gates.
|
|
79
79
|
3. **Code walkthrough** -- Show a real workflow: how a feature moves from requirements through TDD to deployment.
|
|
80
80
|
4. **Host-native approach** -- Explain why one kit works across Claude, Codex, Gemini, and Cursor.
|
|
81
81
|
5. **Results** -- Concrete metrics or before/after comparisons.
|
|
@@ -100,7 +100,7 @@ Structure as a 5-7 tweet thread:
|
|
|
100
100
|
|
|
101
101
|
1. **Hook tweet:** One-liner about the problem + link to repo.
|
|
102
102
|
2. **What it is:** Brief description of Wazir.
|
|
103
|
-
3. **Architecture:** 10 roles,
|
|
103
|
+
3. **Architecture:** 10 roles, 4 phases (15 workflows), 308 modules (include a diagram image).
|
|
104
104
|
4. **Demo:** Short GIF or screenshot of a workflow in action.
|
|
105
105
|
5. **Multi-host:** Works with Claude, Codex, Gemini, and Cursor.
|
|
106
106
|
6. **Install:** `npx @wazir-dev/cli init`
|
|
@@ -134,10 +134,25 @@ review_loop(artifact_path, phase, dimensions[], depth, config, options={}):
|
|
|
134
134
|
log(pass_number+1, dimension, findings) -> log_path
|
|
135
135
|
|
|
136
136
|
if findings.has_issues:
|
|
137
|
-
# --- Fix
|
|
137
|
+
# --- Fix and re-submit (MANDATORY) ---
|
|
138
|
+
# The producer MUST fix findings and the reviewer MUST re-review.
|
|
139
|
+
# "Fix and continue without re-review" is EXPLICITLY PROHIBITED.
|
|
138
140
|
producer_fix(artifact_path, findings)
|
|
139
141
|
# Continue to next pass -- the fix will be re-reviewed
|
|
140
142
|
|
|
143
|
+
# --- Post-loop: escalation if issues remain ---
|
|
144
|
+
if remaining.has_issues:
|
|
145
|
+
# Cap reached with unresolved findings. Present to user:
|
|
146
|
+
# 1. Approve with known issues (Recommended if non-blocking)
|
|
147
|
+
# 2. Fix manually and re-run
|
|
148
|
+
# 3. Abort
|
|
149
|
+
escalate_to_user(remaining, options=[
|
|
150
|
+
"approve-with-issues",
|
|
151
|
+
"fix-manually-and-rerun",
|
|
152
|
+
"abort"
|
|
153
|
+
])
|
|
154
|
+
# User decides. If approved, log "user-approved-with-issues" in final pass file.
|
|
155
|
+
|
|
141
156
|
return { pass_count: total_passes, issues_found, issues_fixed, remaining, attributions }
|
|
142
157
|
```
|
|
143
158
|
|
|
@@ -328,10 +343,11 @@ Pass counts are FIXED per depth. Quick = 3 passes, standard = 5 passes, deep = 7
|
|
|
328
343
|
|
|
329
344
|
## Loop Cap Configuration
|
|
330
345
|
|
|
331
|
-
The `
|
|
346
|
+
The `workflow_policy` section of `run-config.yaml` (legacy: `phase_policy`) controls which workflows are enabled and sets an absolute safety ceiling per workflow. Only two fields exist: `enabled` and `loop_cap`. There is no `passes` field -- depth determines pass counts (3/5/7), not workflow policy.
|
|
332
347
|
|
|
333
348
|
```yaml
|
|
334
|
-
|
|
349
|
+
workflow_policy:
|
|
350
|
+
# Clarifier phase workflows
|
|
335
351
|
discover: { enabled: true, loop_cap: 10 }
|
|
336
352
|
clarify: { enabled: true, loop_cap: 10 }
|
|
337
353
|
specify: { enabled: true, loop_cap: 10 }
|
|
@@ -341,21 +357,24 @@ phase_policy:
|
|
|
341
357
|
design-review: { enabled: true, loop_cap: 10 }
|
|
342
358
|
plan: { enabled: true, loop_cap: 10 }
|
|
343
359
|
plan-review: { enabled: true, loop_cap: 10 }
|
|
360
|
+
# Executor phase workflows
|
|
344
361
|
execute: { enabled: true, loop_cap: 10 }
|
|
345
362
|
verify: { enabled: true, loop_cap: 5 }
|
|
346
363
|
review: { enabled: true, loop_cap: 10 }
|
|
347
|
-
learn: { enabled:
|
|
348
|
-
prepare_next: { enabled:
|
|
364
|
+
learn: { enabled: true, loop_cap: 5 }
|
|
365
|
+
prepare_next: { enabled: true, loop_cap: 5 }
|
|
349
366
|
run_audit: { enabled: false, loop_cap: 10 }
|
|
350
367
|
```
|
|
351
368
|
|
|
352
369
|
**`loop_cap`** is an absolute safety ceiling that prevents runaway loops regardless of depth. It is checked by `wazir capture loop-check` in pipeline mode. It is NOT the same as pass count (which is determined by depth: 3/5/7). Example: depth=deep gives 7 passes, but if `loop_cap: 5`, the cap guard fires at pass 5 and escalates. This is intentional -- the operator can constrain expensive phases.
|
|
353
370
|
|
|
354
|
-
**Adaptive
|
|
371
|
+
**Adaptive workflows** (`author`, `run_audit`) default to `enabled: false`. They are activated by explicit operator config or intent detection.
|
|
372
|
+
|
|
373
|
+
**Post-run workflows** (`learn`, `prepare_next`) default to `enabled: true`. They run as part of the Final Review phase:
|
|
355
374
|
|
|
375
|
+
- `learn` extracts durable learnings from review findings -- recurring findings become accepted learnings.
|
|
376
|
+
- `prepare_next` prepares context and handoff for the next run.
|
|
356
377
|
- `author` has a human approval gate, not an iterative review loop.
|
|
357
|
-
- `learn` extracts learnings from the completed run -- it is post-execution housekeeping.
|
|
358
|
-
- `prepare_next` prepares context for the next run -- it is a handoff phase.
|
|
359
378
|
- `run_audit` is an on-demand standalone audit, not part of the main pipeline flow.
|
|
360
379
|
|
|
361
380
|
---
|
|
@@ -427,3 +446,93 @@ Do NOT load or invoke any skills."
|
|
|
427
446
|
|
|
428
447
|
For committed changes, replace `--uncommitted` with `--base <sha>`.
|
|
429
448
|
Replace `[DIMENSION]`, `[dimension description]`, and `[criteria]` with the task-specific values from the execution plan and spec.
|
|
449
|
+
|
|
450
|
+
---
|
|
451
|
+
|
|
452
|
+
## Codex Output Context Protection
|
|
453
|
+
|
|
454
|
+
Codex CLI output includes internal traces (file reads, tool calls, reasoning) that are NOT useful for the review — only the final findings matter. To prevent context flooding:
|
|
455
|
+
|
|
456
|
+
### Tee + Extract Pattern
|
|
457
|
+
|
|
458
|
+
1. **Always tee** Codex output to a file:
|
|
459
|
+
```bash
|
|
460
|
+
codex exec ... 2>&1 | tee .wazir/runs/latest/reviews/<phase>-review-pass-<N>.md
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
2. **Extract findings** after the last `codex` marker using `execute_file`:
|
|
464
|
+
```bash
|
|
465
|
+
# If context-mode available (has_execute_file: true):
|
|
466
|
+
mcp__plugin_context-mode_context-mode__execute_file(
|
|
467
|
+
path: ".wazir/runs/latest/reviews/<phase>-review-pass-<N>.md",
|
|
468
|
+
language: "shell",
|
|
469
|
+
code: "tac $FILE | sed '/^codex$/q' | tac | tail -n +2"
|
|
470
|
+
)
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
3. **Present extracted findings only** — the raw trace stays in the file for debugging but never enters the main context window.
|
|
474
|
+
|
|
475
|
+
### Fallback (no context-mode)
|
|
476
|
+
|
|
477
|
+
If `context_mode.has_execute_file` is false, extract using shell directly:
|
|
478
|
+
|
|
479
|
+
```bash
|
|
480
|
+
tac <file> | sed '/^codex$/q' | tac | tail -n +2
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
This reverses the file, finds the first (= last original) `codex` marker, reverses back, and skips the marker line.
|
|
484
|
+
|
|
485
|
+
**If no marker found:** fail closed
|
|
486
|
+
|
|
487
|
+
---
|
|
488
|
+
|
|
489
|
+
## Phase Scoring: First vs Final Artifact Comparison
|
|
490
|
+
|
|
491
|
+
At the start of each review loop (pass 1), score the artifact on its phase's canonical dimension set (1-10 per dimension). At the end of the loop (final pass), score again using the **same canonical dimensions**. Present the delta in the end-of-phase report.
|
|
492
|
+
|
|
493
|
+
### Canonical Dimension Sets Per Phase
|
|
494
|
+
|
|
495
|
+
These are the fixed rubrics — no ad-hoc dimension selection:
|
|
496
|
+
|
|
497
|
+
| Phase | Canonical Dimensions |
|
|
498
|
+
|-------|---------------------|
|
|
499
|
+
| research-review | Coverage, Source quality, Relevance, Gaps identified, Actionability |
|
|
500
|
+
| clarification-review / spec-challenge | Completeness, Testability, Ambiguity, Assumptions, Scope creep |
|
|
501
|
+
| design-review | Spec coverage, Design-spec consistency, Accessibility, Visual consistency, Exported-code fidelity |
|
|
502
|
+
| plan-review | Completeness, Testability, Task granularity, Dependency correctness, Phase structure, File coverage, Estimation accuracy |
|
|
503
|
+
| task-review | Correctness, Tests, Wiring, Drift, Quality |
|
|
504
|
+
| final | Correctness, Completeness, Wiring, Verification, Drift, Quality, Documentation |
|
|
505
|
+
|
|
506
|
+
### Scoring Rules
|
|
507
|
+
|
|
508
|
+
1. Initial and final scores MUST use the **same dimension set** — the delta is only meaningful on the same rubric.
|
|
509
|
+
2. The reviewer records which dimension set was used in each pass file.
|
|
510
|
+
3. Delta format: `Dimension: X/10 → Y/10 (+Z)`.
|
|
511
|
+
|
|
512
|
+
### Quality Delta Report Section
|
|
513
|
+
|
|
514
|
+
The end-of-phase report (see "End-of-Phase Report" below) includes a **Quality Delta** section:
|
|
515
|
+
|
|
516
|
+
```markdown
|
|
517
|
+
## Quality Delta
|
|
518
|
+
|
|
519
|
+
| Dimension | Initial | Final | Delta |
|
|
520
|
+
|-----------|---------|-------|-------|
|
|
521
|
+
| Completeness | 4/10 | 9/10 | +5 |
|
|
522
|
+
| Testability | 3/10 | 8/10 | +5 |
|
|
523
|
+
| Ambiguity | 5/10 | 9/10 | +4 |
|
|
524
|
+
```
|
|
525
|
+
|
|
526
|
+
---
|
|
527
|
+
|
|
528
|
+
## End-of-Phase Report
|
|
529
|
+
|
|
530
|
+
Every phase exit produces a report saved to `.wazir/runs/latest/reviews/<phase>-report.md` containing:
|
|
531
|
+
|
|
532
|
+
1. **Summary** — what the phase produced
|
|
533
|
+
2. **Key Changes** — first-version vs final-version highlights (not full diff — what improved)
|
|
534
|
+
3. **Quality Delta** — per-dimension before/after scores (see Phase Scoring above)
|
|
535
|
+
4. **Findings Log** — per-pass finding counts by severity (e.g., "Pass 1: 6 findings (3 blocking, 2 warning, 1 note). Pass 7: 0 findings. All resolved.")
|
|
536
|
+
5. **Usage** — token usage from `wazir capture usage` (runs before report generation)
|
|
537
|
+
6. **Context Savings** — context-mode stats if available, omit section if not
|
|
538
|
+
7. **Time Spent** — wall-clock elapsed time from phase start to end — log "codex marker not found in output, cannot extract findings" and present a warning to the user with 0 findings extracted. The raw file is preserved for manual review. Do NOT fall back to `tail` or any best-effort extraction that could leak traces into context.
|
|
@@ -35,6 +35,7 @@ This is the lookup reference for canonical roles, workflows, and their contracts
|
|
|
35
35
|
| `review` | `verify` | Adversarial quality review |
|
|
36
36
|
| `learn` | `review` | Capture scoped learnings |
|
|
37
37
|
| `prepare-next` | `learn` | Produce clean next-run handoff |
|
|
38
|
+
| `run-audit` | (standalone) | Structured codebase audit with source-backed findings |
|
|
38
39
|
|
|
39
40
|
## Role routing valid values
|
|
40
41
|
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Skill Tier Classification
|
|
2
|
+
|
|
3
|
+
Audit of Wazir skills against Superpowers v4.3.1 skills.
|
|
4
|
+
Each skill is classified into one of three tiers:
|
|
5
|
+
|
|
6
|
+
- **Delegate** -- use superpowers skill as-is, delete Wazir fork
|
|
7
|
+
- **Augment** -- use superpowers skill + inject Wazir context addendum (strictly additive, no overrides). **NOTE:** R2 validation found this tier is not implementable -- see [Augment Mechanism](#augment-mechanism) below.
|
|
8
|
+
- **Own** -- Wazir-original or structurally rewritten skill, rename to `wz:` prefix
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Classification Table
|
|
13
|
+
|
|
14
|
+
| Wazir Skill | Superpowers Equivalent | Tier | Rationale | Risk Notes |
|
|
15
|
+
|---|---|---|---|---|
|
|
16
|
+
| brainstorming | brainstorming | **Own** | Structurally rewritten. Superpowers version is a linear checklist (explore context, ask questions, propose approaches, present design, write doc, invoke writing-plans). Wazir replaces the entire process: adds Command Routing and Codebase Exploration preambles, replaces the design-doc step with a design-review loop (`--mode design-review` with canonical dimensions), outputs to `.wazir/runs/latest/clarified/design.md` instead of `docs/plans/`, and adds a complete Agent Teams multi-agent brainstorming mode (Free Thinker / Grounder / Synthesizer / Arbiter pattern using TeamCreate/SendMessage). None of the superpowers process steps survive intact. | Dropping the Agent Teams mode would lose Wazir's most differentiated brainstorming capability. |
|
|
17
|
+
| clarifier | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
18
|
+
| debugging | systematic-debugging | **Own** | Structurally rewritten. Superpowers has a 4-phase process (Root Cause Investigation with 5 substeps, Pattern Analysis, Hypothesis and Testing, Implementation) totaling ~300 lines with detailed examples, rationalization tables, and supporting technique references. Wazir condenses this to a 4-step observe-hypothesize-test-fix loop (~75 lines), replaces all codebase exploration with Wazir CLI symbol-first exploration (`wazir index search-symbols`, `wazir recall symbol` and `wazir recall file`), adds loop cap awareness (pipeline mode with `wazir capture loop-check` vs. standalone mode), and removes all superpowers examples, rationalization tables, and red-flag lists. The methodology is fundamentally different in structure despite sharing the spirit of "root cause first." | Delegating would lose Wazir CLI integration and loop cap awareness. Superpowers version is far more detailed on anti-patterns and may be worth referencing separately. |
|
|
19
|
+
| design | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
20
|
+
| dispatching-parallel-agents | dispatching-parallel-agents | **Own** | Reclassified from Augment to Own (R2). Skill shadowing is full-override, so Augment tier is not implementable via `~/.claude/skills/`. Wazir already carries the full content: superpowers core (When to Use decision tree, The Pattern with 4 steps, Agent Prompt Structure, Common Mistakes section) plus Wazir additions (Command Routing preamble, Codebase Exploration preamble, philosophical paragraph in Overview, Problem/Fix format for Common Mistakes). Drops superpowers-only sections: "When NOT to Use," "Real Example from Session," "Key Benefits," "Verification," "Real-World Impact." | Superpowers informational sections (Real Example, Key Benefits, Verification, Real-World Impact) not carried forward. Low risk -- these are teaching content, not behavioral. |
|
|
21
|
+
| executing-plans | executing-plans | **Own** | Structurally rewritten. Superpowers uses batch execution (default first 3 tasks) with report-and-wait checkpoints and explicit batch feedback loops. Wazir replaces batching with per-task execution, adds a per-task review loop (`--mode task-review` with 5 task-execution dimensions, Codex integration, review log filenames, loop cap tracking via `wazir capture loop-check`), adds standalone vs. pipeline mode detection, and adds a note recommending wz:subagent-driven-development when subagents are available. The batch-vs-per-task change is a core behavioral difference. All integration references point to `wz:` skills. | Delegating would lose per-task review loops and pipeline mode integration. |
|
|
22
|
+
| executor | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
23
|
+
| finishing-a-development-branch | finishing-a-development-branch | **Own** | Reclassified from Augment to Own (R2). Skill shadowing is full-override, so Augment tier is not implementable via `~/.claude/skills/`. Wazir already carries the full content: superpowers process (5 steps: verify tests, determine base branch, present 4 options, execute choice, cleanup worktree) preserved with identical structure and identical option semantics. Wazir adds Command Routing and Codebase Exploration preambles. Minor cosmetic changes: `<N>` removed from failure template, `<base-branch>` shortened to `<base>`, emoji checkmarks replaced with Y/-, `<commit-list>` changed to `<count>`, PR body simplified. Red Flags and Integration sections trimmed but no behavioral contradiction. | Low risk. The superpowers version has more detailed Red Flags and Integration sections not carried forward. |
|
|
24
|
+
| humanize | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
25
|
+
| init-pipeline | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
26
|
+
| prepare-next | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
27
|
+
| receiving-code-review | receiving-code-review | **Own** | Structurally rewritten. Superpowers has extensive sections: Forbidden Responses, Source-Specific Handling, YAGNI Check, Implementation Order, When To Push Back, Acknowledging Correct Feedback (with detailed anti-patterns for gratitude), Gracefully Correcting Pushback, Common Mistakes table, Real Examples, and GitHub Thread Replies. Wazir preserves the core Response Pattern and Forbidden Responses but: (1) adds Loop Tracking section (pipeline mode with `wazir capture loop-check` and standalone pass counts), (2) restructures Implementation Order to a 4-tier priority (blocking, functional, quality, nice-to-have) instead of 3-tier, (3) adds a Quick Reference decision table, (4) removes the entire "Acknowledging Correct Feedback" anti-gratitude section, the "Gracefully Correcting Pushback" section, the Common Mistakes table, all Real Examples, the "When To Push Back" enumeration, and the GitHub Thread Replies section. The Loop Tracking addition and structural deletions make this a substantive rewrite. | Delegating would lose loop tracking. The removed anti-gratitude and pushback sections from superpowers are valuable behavioral guardrails worth preserving. |
|
|
28
|
+
| requesting-code-review | requesting-code-review | **Own** | Structurally rewritten. Both skills share the same When to Request triggers and Example structure. But Wazir: (1) replaces `superpowers:code-reviewer` with `wz:code-reviewer`, (2) adds explicit review loop parameters (`--mode`, depth-aware dimensions, pass number), (3) adds `codex review --uncommitted` and `codex review --base` commands, (4) adds Codex Error Handling section, (5) adds `{REVIEW_MODE}` placeholder, (6) changes Integration section to reference per-task review checkpoints instead of batch review, (7) adds "Dispatch review without explicit `--mode`" to Red Flags. The Codex integration and review loop parameter system are structural additions that change how reviews are dispatched. | Delegating would lose Codex integration and review loop protocol. |
|
|
29
|
+
| reviewer | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
30
|
+
| run-audit | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
31
|
+
| scan-project | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
32
|
+
| self-audit | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
33
|
+
| subagent-driven-development | subagent-driven-development | **Own** | Structurally rewritten. Both share the same high-level process (fresh subagent per task, two-stage review, spec then quality). But Wazir: (1) adds `Capture PRE_TASK_SHA` step to the process flowchart for diff scoping, (2) adds Code Review Scoping section (`codex review --base <pre-task-sha>`), (3) adds Review Loop Alignment section (explicit `--mode task-review`, task-scoped log filenames, loop cap via `wazir capture loop-check`), (4) adds Codex Error Handling section, (5) adds standalone mode fallback, (6) changes all skill references from `superpowers:` to `wz:`, (7) adds "Review the wrong diff" to Red Flags, (8) removes the Example Workflow, Advantages detail, and Cost breakdown from superpowers. The diff-scoping and review-loop integration are structural process changes. | Delegating would lose diff-scoped reviews and Codex integration. The removed Example Workflow from superpowers is a useful teaching tool. |
|
|
34
|
+
| tdd | test-driven-development | **Own** | Structurally rewritten. Superpowers has an exhaustive treatment (~370 lines): detailed Red-Green-Refactor with Good/Bad code examples, Iron Law with explicit "delete and start over" rules, a Verification Checklist, extensive Why Order Matters section, Common Rationalizations table, When Stuck guide, Testing Anti-Patterns reference, and Debugging Integration. Wazir condenses to ~45 lines with 3 steps (RED, GREEN, REFACTOR), adds a single-pass test quality check in RED phase ("Are these tests testing the right behavior? Are they real assertions?"), and removes all examples, rationalization tables, and elaboration. Different description and name (`wz:tdd` vs `test-driven-development`). | Delegating would lose the test quality check. The superpowers version's extensive rationalization prevention and examples are valuable for discipline enforcement but costly in tokens. |
|
|
35
|
+
| using-git-worktrees | using-git-worktrees | **Own** | Reclassified from Augment to Own (R2). Skill shadowing is full-override, so Augment tier is not implementable via `~/.claude/skills/`. Wazir already carries the full content: superpowers core process (directory selection priority, safety verification with `git check-ignore`, creation steps, project setup auto-detection, clean baseline verification) preserved structurally intact. Wazir adds: Command Routing preamble, Codebase Exploration preamble, global directory changed from `~/.config/superpowers/worktrees/` to `~/.wazir/worktrees/`, Cleanup and Common Issues sections (submodules, lock files, stale worktrees). Drops superpowers-only sections: Example Workflow, Quick Reference table, Common Mistakes, Red Flags, Integration. | Dropped superpowers sections (Quick Reference, Common Mistakes, Red Flags, Integration) reduce operational guardrails. Could be recovered into the Own skill. |
|
|
36
|
+
| using-skills | using-superpowers | **Own** | Structurally rewritten. Both enforce the same core rule (invoke skills before any response, even at 1% chance). But Wazir: (1) renames from `using-superpowers` to `using-skills`, (2) changes all internal skill references from `superpowers:` to `wz:` throughout flowchart and examples, (3) removes the Skill Types section detail about "Rigid vs Flexible" elaboration, (4) removes User Instructions elaboration. The name change and systematic `wz:` prefix replacement throughout the flowchart make this a namespace-level rewrite. | Could potentially be Augment if namespace mapping were handled at a routing layer rather than in-skill. |
|
|
37
|
+
| verification | verification-before-completion | **Own** | Structurally rewritten. Superpowers has an exhaustive treatment (~140 lines): Iron Law, Gate Function (5-step IDENTIFY/RUN/READ/VERIFY/CLAIM), Common Failures table, Red Flags list, Rationalization Prevention table, Key Patterns (tests, regression, build, requirements, agent delegation), Why This Matters section with 24 failure memories, and When To Apply section. Wazir condenses to ~35 lines with 3 bullet requirements (what was verified, exact command, actual result), a minimum rule, and a brief "when verification fails" section. Different name (`wz:verification` vs `verification-before-completion`). | Delegating would lose the concise Wazir format. The superpowers version's extensive rationalization prevention is valuable for discipline but token-expensive. The Wazir version may be too terse to enforce the discipline effectively. |
|
|
38
|
+
| wazir | _(none)_ | **Own** | Wazir-original. No superpowers counterpart exists. | -- |
|
|
39
|
+
| writing-plans | writing-plans | **Own** | Structurally rewritten. Superpowers focuses on plan document format (header template, task structure with bite-sized steps, code examples in plan, execution handoff to subagent-driven or parallel session). Wazir: (1) changes inputs to "approved design or approved clarified direction" instead of "spec or requirements", (2) adds pipeline-aware output paths (`.wazir/runs/latest/clarified/execution-plan.md` and `.wazir/runs/latest/tasks/task-NNN/spec.md` vs. standalone `docs/plans/`), (3) removes the plan document format template entirely (no header template, no task structure template, no code examples), (4) adds Plan Review Loop section with `wz:reviewer --mode plan-review`, Codex integration via stdin pipe, Codex error handling, depth-aware pass counts, and standalone fallback. The plan review loop and pipeline path system are structural additions; the removal of the format template is a structural deletion. | Delegating would lose pipeline integration and plan review loop. The removed format template from superpowers is valuable for plan quality and could be worth recovering. |
|
|
40
|
+
| writing-skills | writing-skills | **Own** | Structurally rewritten. Both share the TDD-for-skills philosophy and RED-GREEN-REFACTOR mapping. But Wazir: (1) condenses from ~650 lines to ~170 lines, (2) removes the extensive SKILL.md Structure template, CSO (Claude Search Optimization) section, Flowchart Usage guidelines, Code Examples guidelines, Token Efficiency section, File Organization examples, Testing All Skill Types section (discipline/technique/pattern/reference), Common Rationalizations for Skipping Testing table, Bulletproofing Skills Against Rationalization section (with Cialdini psychology reference), Skill Creation Checklist, Discovery Workflow, Anti-Patterns section, and STOP deployment gate, (3) adds "Be Prescriptive, Not Descriptive" guidance, "Use Rationalization Prevention" example, "Include Decision Trees" guidance, and skill reference syntax. The massive content reduction and different teaching approach make this a structural rewrite. | Delegating would lose the concise prescriptive format. The superpowers version's CSO guidelines, testing methodology, and anti-pattern catalog are extremely valuable reference material. |
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Superpowers Skills with No Wazir Counterpart
|
|
45
|
+
|
|
46
|
+
These superpowers skills have no Wazir fork. They could be used as-is via the superpowers plugin.
|
|
47
|
+
|
|
48
|
+
| Superpowers Skill | Status | Notes |
|
|
49
|
+
|---|---|---|
|
|
50
|
+
| using-superpowers | Replaced by `wz:using-skills` | See using-skills row above. |
|
|
51
|
+
|
|
52
|
+
All 14 superpowers skills have a Wazir counterpart (using-superpowers maps to using-skills, systematic-debugging maps to debugging, test-driven-development maps to tdd, verification-before-completion maps to verification).
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Summary by Tier
|
|
57
|
+
|
|
58
|
+
| Tier | Count | Skills |
|
|
59
|
+
|---|---|---|
|
|
60
|
+
| **Own** | 25 | brainstorming, clarifier, debugging, design, dispatching-parallel-agents, executing-plans, executor, finishing-a-development-branch, humanize, init-pipeline, prepare-next, receiving-code-review, requesting-code-review, reviewer, run-audit, scan-project, self-audit, subagent-driven-development, tdd, using-git-worktrees, using-skills, verification, wazir, writing-plans, writing-skills |
|
|
61
|
+
| **Augment** | 0 | _(none -- tier not implementable, see [Augment Mechanism](#augment-mechanism))_ |
|
|
62
|
+
| **Delegate** | 0 | _(none)_ |
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Common Wazir Additions (Appear in All Forked Skills)
|
|
67
|
+
|
|
68
|
+
Every Wazir fork of a superpowers skill adds these two preamble sections:
|
|
69
|
+
|
|
70
|
+
1. **Command Routing** -- routes large commands to context-mode tools and small commands to native Bash, following `hooks/routing-matrix.json`.
|
|
71
|
+
2. **Codebase Exploration** -- prescribes symbol-first exploration via `wazir index search-symbols` and `wazir recall`, with fallback to direct file reads.
|
|
72
|
+
|
|
73
|
+
These preambles alone would justify **Augment** tier for any skill where no other structural changes exist.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Augment Mechanism
|
|
78
|
+
|
|
79
|
+
**Research date:** 2026-03-19 (R2: Composition Infrastructure Validation)
|
|
80
|
+
|
|
81
|
+
### Finding: Augment tier is not implementable
|
|
82
|
+
|
|
83
|
+
The Augment tier assumed that placing a Wazir addendum at `~/.claude/skills/<skill-name>/SKILL.md` would layer Wazir context on top of the superpowers base skill. This assumption is wrong. **Skill shadowing is full-override, not merge/append.**
|
|
84
|
+
|
|
85
|
+
### Evidence
|
|
86
|
+
|
|
87
|
+
**1. `skills-core.js` `resolveSkillPath()` (superpowers v4.3.1)**
|
|
88
|
+
|
|
89
|
+
The function at `lib/skills-core.js:108-140` checks personal skills directory first. If `~/.claude/skills/<name>/SKILL.md` exists, it returns that file immediately and never reads the superpowers version. There is no content merging.
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
// Try personal skills first (unless explicitly superpowers:)
|
|
93
|
+
if (!forceSuperpowers && personalDir) {
|
|
94
|
+
const personalSkillFile = path.join(personalDir, actualSkillName, 'SKILL.md');
|
|
95
|
+
if (fs.existsSync(personalSkillFile)) {
|
|
96
|
+
return { skillFile: personalSkillFile, sourceType: 'personal', ... };
|
|
97
|
+
// ^^^ returns here -- superpowers version never consulted
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
**2. Superpowers test suite confirms override behavior**
|
|
103
|
+
|
|
104
|
+
`tests/opencode/test-skills-core.sh` line 336 asserts:
|
|
105
|
+
```
|
|
106
|
+
[PASS] Personal skills shadow superpowers skills
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
The test creates `personal-skills/shared-skill/SKILL.md` and `superpowers-skills/shared-skill/SKILL.md`, resolves `shared-skill`, and verifies `sourceType` is `"personal"` -- the superpowers version is invisible.
|
|
110
|
+
|
|
111
|
+
**3. Superpowers RELEASE-NOTES.md v3.3.0**
|
|
112
|
+
|
|
113
|
+
Line 385 documents the behavior explicitly: "Personal skills override superpowers skills when names match."
|
|
114
|
+
|
|
115
|
+
**4. The `superpowers:` prefix bypass is not available in Claude Code**
|
|
116
|
+
|
|
117
|
+
`skills-core.js` supports `superpowers:skill-name` syntax to force resolution to the superpowers version even when a personal skill shadows it. However, `skills-core.js` is only used by the OpenCode plugin (`/.opencode/plugins/superpowers.js`). Claude Code's native `Skill` tool has its own built-in resolution logic that does not expose this prefix bypass.
|
|
118
|
+
|
|
119
|
+
### Alternatives Considered
|
|
120
|
+
|
|
121
|
+
| Approach | Viable? | Why |
|
|
122
|
+
|---|---|---|
|
|
123
|
+
| Place addendum in `~/.claude/skills/<name>/` | No | Full override -- base skill content lost |
|
|
124
|
+
| Merge base + addendum in SKILL.md at install time | Partial | Would work but creates a maintenance coupling: every superpowers update requires re-merging. This is functionally identical to Own tier. |
|
|
125
|
+
| Inject Wazir context via CLAUDE.md | No | CLAUDE.md is project-scoped; skill behavior should be global across all projects |
|
|
126
|
+
| Use `superpowers:` prefix to load base, then append | No | Prefix only works in OpenCode's `skills-core.js`, not in Claude Code's native Skill tool |
|
|
127
|
+
| Propose upstream merge/append feature | Future | Would require a superpowers or Claude Code platform change |
|
|
128
|
+
|
|
129
|
+
### Conclusion
|
|
130
|
+
|
|
131
|
+
The Augment tier is architecturally impossible with the current skill discovery mechanism. All three former Augment skills (dispatching-parallel-agents, finishing-a-development-branch, using-git-worktrees) are reclassified to **Own** tier. Since the Wazir versions already carry the full superpowers base content plus Wazir additions, no content is lost -- the skills simply cannot delegate to a shared base.
|
|
132
|
+
|
|
133
|
+
If superpowers or Claude Code introduces a composition/layering mechanism in the future (e.g., `extends: superpowers:dispatching-parallel-agents` in frontmatter), the Augment tier could be revisited.
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Observations
|
|
138
|
+
|
|
139
|
+
1. **No Delegate candidates exist.** Every Wazir fork adds at minimum the Command Routing and Codebase Exploration preambles, which prevents pure delegation.
|
|
140
|
+
|
|
141
|
+
2. **Augment tier is not implementable.** R2 validation (2026-03-19) found that skill shadowing in both superpowers `skills-core.js` and Claude Code's native Skill tool is full-override: placing a SKILL.md in `~/.claude/skills/<name>/` completely replaces the superpowers skill with the same name. There is no merge or append mechanism. The three former Augment candidates (dispatching-parallel-agents, finishing-a-development-branch, using-git-worktrees) have been reclassified to Own. See [Augment Mechanism](#augment-mechanism) for full analysis.
|
|
142
|
+
|
|
143
|
+
3. **All 14 forked skills are Own** because either (a) they introduce structural process changes (review loops, pipeline mode, Codex integration, Agent Teams, content restructuring) or (b) the Augment composition mechanism does not exist in the platform.
|
|
144
|
+
|
|
145
|
+
4. **Token cost tradeoff is significant.** Several Wazir Own skills (tdd, verification, debugging, writing-skills) are dramatically shorter than their superpowers counterparts. The superpowers versions contain valuable rationalization prevention tables, detailed examples, and anti-pattern catalogs that enforce discipline. The Wazir versions trade this for token efficiency. This tradeoff should be revisited -- some of the removed discipline content may be worth recovering as separate reference files.
|
|
146
|
+
|
|
147
|
+
5. **The `wz:` prefix is already applied** in skill names within the Wazir SKILL.md frontmatter for all forked skills, consistent with the Own tier convention.
|
|
@@ -15,6 +15,7 @@ The `wazir` CLI is minimal on purpose. It exists to validate and export the host
|
|
|
15
15
|
| `wazir validate commits` | implemented | Validates conventional commit format for commits in the range `--base..--head` (or auto-detected base to HEAD). |
|
|
16
16
|
| `wazir validate changelog` | implemented | Validates `CHANGELOG.md` structure; with `--require-entries` and `--base`, enforces new entries since the base. |
|
|
17
17
|
| `wazir validate docs-drift` | implemented | Detects when source files (roles, workflows, skills, hooks) change without corresponding documentation updates. Advisory by default; `--strict` exits non-zero on drift. |
|
|
18
|
+
| `wazir validate skills` | implemented | Validates skill frontmatter and checks for name conflicts with superpowers skills (requires `wz:` prefix). Rejects any `CONTEXT.md` files (augment tier concluded not implementable in R2). |
|
|
18
19
|
| `wazir validate artifacts` | reserved | Exits `2` until artifact-template and example validation expands. |
|
|
19
20
|
| `wazir export build` | implemented | Generates host packages under `exports/hosts/*` from canonical sources. |
|
|
20
21
|
| `wazir export --check` | implemented | Verifies generated host packages still match current canonical source hashes. |
|
|
@@ -28,7 +29,8 @@ The `wazir` CLI is minimal on purpose. It exists to validate and export the host
|
|
|
28
29
|
| `wazir recall file` | implemented | Returns an exact line-bounded slice from an indexed file. Supports `--tier L0\|L1` for summary recall. |
|
|
29
30
|
| `wazir recall symbol` | implemented | Returns an exact slice for an indexed symbol match. Supports `--tier L0\|L1` for summary recall. |
|
|
30
31
|
| `wazir doctor` | implemented | Validates the active repo surface for manifest, hooks, state-root policy, and host export directory presence. |
|
|
31
|
-
| `wazir status` | implemented | Reads run status directly from `<state-root>/runs/<run-id>/status.json`. |
|
|
32
|
+
| `wazir status` | implemented | Reads run status directly from `<state-root>/runs/<run-id>/status.json`. Includes a one-line context savings summary when usage data is available. |
|
|
33
|
+
| `wazir stats` | implemented | Shows token savings statistics for a run, including total queries, estimated tokens saved, bytes avoided, per-tool breakdown, and overall savings ratio. |
|
|
32
34
|
| `wazir capture init` | implemented | Creates a run ledger with `status.json`, `events.ndjson`, and a captures directory under the configured state root. |
|
|
33
35
|
| `wazir capture event` | implemented | Appends a run event and can update phase, status, and loop counts in `status.json`. |
|
|
34
36
|
| `wazir capture route` | implemented | Reserves a run-local capture file path for large tool output. |
|
package/docs/truth-claims.yaml
CHANGED
|
@@ -130,6 +130,12 @@
|
|
|
130
130
|
subject: wazir status
|
|
131
131
|
verifier: command_registry
|
|
132
132
|
required: true
|
|
133
|
+
- id: command-stats
|
|
134
|
+
file: docs/reference/tooling-cli.md
|
|
135
|
+
claim_type: command
|
|
136
|
+
subject: wazir stats
|
|
137
|
+
verifier: command_registry
|
|
138
|
+
required: true
|
|
133
139
|
- id: command-capture-family
|
|
134
140
|
file: docs/reference/tooling-cli.md
|
|
135
141
|
claim_type: command
|
|
@@ -202,6 +208,12 @@
|
|
|
202
208
|
subject: wazir validate docs-drift
|
|
203
209
|
verifier: command_registry
|
|
204
210
|
required: true
|
|
211
|
+
- id: command-validate-skills
|
|
212
|
+
file: docs/reference/tooling-cli.md
|
|
213
|
+
claim_type: command
|
|
214
|
+
subject: wazir validate skills
|
|
215
|
+
verifier: command_registry
|
|
216
|
+
required: true
|
|
205
217
|
- id: generated-claude-package
|
|
206
218
|
file: docs/reference/host-exports.md
|
|
207
219
|
claim_type: generated_file
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
> AI coding agents produce code that compiles, passes superficial review, and reads authoritatively -- yet harbors systematic defects that human-written code rarely exhibits. These anti-patterns arise from the fundamental mechanics of next-token prediction operating without ground truth, persistent memory, or genuine understanding. A 2026 CodeRabbit analysis of 470 open-source repositories found AI-generated code contains 1.7x more bugs than human code, with 75% more logic errors and 57% more security findings per pull request. A USENIX Security 2025 study of 576,000 code samples found 20% of AI-recommended packages do not exist. This module catalogs the 20 most damaging patterns, grounded in documented incidents and empirical research.
|
|
4
4
|
|
|
5
5
|
> **Domain:** Process -- AI-Assisted Development
|
|
6
|
-
> **Anti-patterns covered:**
|
|
6
|
+
> **Anti-patterns covered:** 22
|
|
7
7
|
> **Highest severity:** Critical
|
|
8
8
|
> **Primary audience:** AI agents performing self-evaluation; human reviewers auditing AI output
|
|
9
9
|
|
|
@@ -823,6 +823,100 @@ An AI coding agent should ask itself these questions before submitting generated
|
|
|
823
823
|
|
|
824
824
|
15. **Continuity check:** After a session break, have I reviewed existing code for conventions before generating new code?
|
|
825
825
|
|
|
826
|
+
### AP-21: Pipeline Phase Skipping
|
|
827
|
+
|
|
828
|
+
**Also known as:** Rationalized Bypass, "The Spec Is Clear Enough", Shortcut Execution
|
|
829
|
+
**Frequency:** Common
|
|
830
|
+
**Severity:** Critical
|
|
831
|
+
**Detection difficulty:** Low
|
|
832
|
+
|
|
833
|
+
**What it looks like:**
|
|
834
|
+
|
|
835
|
+
The agent receives a detailed briefing or spec and jumps directly to implementation, skipping the pipeline's clarification, specification, design, and planning phases. Typical rationalization: "The input is already detailed enough — I don't need to clarify further."
|
|
836
|
+
|
|
837
|
+
```
|
|
838
|
+
User: /wazir Build a caching layer for the API
|
|
839
|
+
Agent: [reads detailed input] This is clear. Let me start implementing...
|
|
840
|
+
[spawns parallel agents for implementation]
|
|
841
|
+
[skips clarify → specify → design → plan entirely]
|
|
842
|
+
```
|
|
843
|
+
|
|
844
|
+
**Why AI agents do it:**
|
|
845
|
+
|
|
846
|
+
When the input appears complete, the agent's next-token prediction favors the most "productive" action: writing code. The pipeline phases (clarify, specify, design, plan) feel redundant when the input already describes what to build. The agent lacks the meta-awareness that the pipeline exists precisely to catch what the input does NOT say — unstated assumptions, missing edge cases, architectural trade-offs, and scope boundaries. Skipping phases is the single most damaging process failure because it invalidates every downstream quality gate.
|
|
847
|
+
|
|
848
|
+
**Detection signals:**
|
|
849
|
+
|
|
850
|
+
- Implementation starts without `clarification.md`, `spec-hardened.md`, `design.md`, or `execution-plan.md` artifacts in the run directory
|
|
851
|
+
- Agent jumps from input scanning to code writing without user checkpoints
|
|
852
|
+
- Rationalization language in conversation: "this is already clear", "the spec is detailed enough", "we can skip clarification"
|
|
853
|
+
- `wazir capture event --phase executor` returns exit 44 (phase prerequisite gate failed)
|
|
854
|
+
- No `phase_exit` events for clarifier phase in `events.ndjson`
|
|
855
|
+
|
|
856
|
+
**Root cause:**
|
|
857
|
+
|
|
858
|
+
No enforcement mechanism between pipeline phases. The agent can read the pipeline skill and choose to interpret it loosely. Without hard gates (file-existence checks, CLI validation), the pipeline is advisory, not mandatory.
|
|
859
|
+
|
|
860
|
+
**Remediation:**
|
|
861
|
+
|
|
862
|
+
1. **Skill-level hard gates** — each phase skill contains a prerequisite check section that lists required artifacts and instructs the agent to STOP if any are missing
|
|
863
|
+
2. **CLI-level validation** — `wazir capture event --phase executor` validates that prior phases completed before allowing `phase_enter`
|
|
864
|
+
3. **Anti-rationalization instruction** — skill text explicitly names and blocks the rationalization pattern: "Do NOT skip phases because the input looks clear enough"
|
|
865
|
+
|
|
866
|
+
**Related:** Wazir pipeline enforcement (item #18), `skills/executor/SKILL.md` Phase Prerequisites section, `tooling/src/guards/phase-prerequisite-guard.js`
|
|
867
|
+
|
|
868
|
+
---
|
|
869
|
+
|
|
870
|
+
### AP-22: Autonomous Scope Reduction
|
|
871
|
+
|
|
872
|
+
**Also known as:** Silent Tiering, Unilateral Deferral, Scope Halving
|
|
873
|
+
**Frequency:** Common (observed in real pipeline runs)
|
|
874
|
+
**Severity:** Critical
|
|
875
|
+
**Detection difficulty:** Moderate
|
|
876
|
+
|
|
877
|
+
**What it looks like:**
|
|
878
|
+
|
|
879
|
+
The AI agent autonomously reduces the user's requested scope by tiering, deferring, or deprioritizing items without explicit user approval. The user asks for 10 items; the agent delivers 5 and calls the rest "future work."
|
|
880
|
+
|
|
881
|
+
**Why AI agents do it:**
|
|
882
|
+
|
|
883
|
+
Agent optimizes for completion over coverage. Large input overwhelms the context, and the agent triages by perceived difficulty. The agent confuses "prioritization suggestion" with "scope decision." No hard gate prevents the reduction.
|
|
884
|
+
|
|
885
|
+
**What goes wrong:**
|
|
886
|
+
|
|
887
|
+
User loses trust — they asked for X, got X/2. Repeated runs required to cover what should have been one run. Agent appears to make product decisions above its authority.
|
|
888
|
+
|
|
889
|
+
**Detection signals:**
|
|
890
|
+
|
|
891
|
+
- Input has N items, execution plan has fewer than N tasks
|
|
892
|
+
- Words like "deferred", "future tier", "out of scope for this run" appear without user approval
|
|
893
|
+
- Post-run review reveals missing deliverables
|
|
894
|
+
|
|
895
|
+
**The fix:**
|
|
896
|
+
|
|
897
|
+
1. **Hard gate:** `items_in_plan >= items_in_input` enforced by scope coverage guard
|
|
898
|
+
2. **Clarifier check:** Count input items vs plan items before presenting plan
|
|
899
|
+
3. **Explicit approval required:** Agent can SUGGEST prioritization but CANNOT decide it
|
|
900
|
+
4. **Anti-rationalization language:** "The input looks detailed enough to skip some items" is NOT valid reasoning
|
|
901
|
+
|
|
902
|
+
**Example:**
|
|
903
|
+
|
|
904
|
+
Bad:
|
|
905
|
+
```
|
|
906
|
+
Input: "Implement items 1-10"
|
|
907
|
+
Plan: "Tier 1 (this run): items 1-5. Tier 2 (future): items 6-10."
|
|
908
|
+
```
|
|
909
|
+
|
|
910
|
+
Good:
|
|
911
|
+
```
|
|
912
|
+
Input: "Implement items 1-10"
|
|
913
|
+
Plan: "10 tasks covering all 10 items. Suggested order: [...]"
|
|
914
|
+
```
|
|
915
|
+
|
|
916
|
+
**Related:** CrewAI Task Guardrails (mandatory task completion enforcement), AP-21 (Pipeline Phase Skipping — related pattern of skipping required steps), `tooling/src/guards/phase-prerequisite-guard.js` (`evaluateScopeCoverageGuard`)
|
|
917
|
+
|
|
918
|
+
---
|
|
919
|
+
|
|
826
920
|
## Code Smell Quick Reference
|
|
827
921
|
|
|
828
922
|
| Anti-Pattern | Severity | Frequency | Key Signal | First Action |
|
|
@@ -847,6 +941,8 @@ An AI coding agent should ask itself these questions before submitting generated
|
|
|
847
941
|
| AP-18 Fake Progress | High | Common | Hardcoded return values | Ban pass/TODO in production |
|
|
848
942
|
| AP-19 Over-Mocking | High | Common | More mocks than assertions | Require integration tests |
|
|
849
943
|
| AP-20 Resumption Errors | High | Common | Mixed ID types across files | Architecture file in every session |
|
|
944
|
+
| AP-21 Pipeline Phase Skipping | Critical | Common | Missing clarified/* artifacts | Enforce hard gates in skills + CLI |
|
|
945
|
+
| AP-22 Autonomous Scope Reduction | Critical | Common | Plan has fewer tasks than input items | Scope coverage guard + user approval |
|
|
850
946
|
|
|
851
947
|
---
|
|
852
948
|
|
|
@@ -6,7 +6,7 @@ This host package is generated from the canonical Wazir sources.
|
|
|
6
6
|
|
|
7
7
|
- project: Wazir
|
|
8
8
|
- hosts: claude, codex, gemini, cursor
|
|
9
|
-
- phases:
|
|
9
|
+
- phases: init, clarifier, executor, final_review
|
|
10
10
|
- roles: clarifier, researcher, specifier, content-author, designer, planner, executor, verifier, reviewer, learner
|
|
11
11
|
- protected paths: input, roles, workflows, schemas, exports/hosts
|
|
12
12
|
- state root default: ~/.wazir/projects/{project_slug}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"host": "claude",
|
|
3
3
|
"source_hashes": {
|
|
4
|
-
"wazir.manifest.yaml": "
|
|
4
|
+
"wazir.manifest.yaml": "f00776eb08ed3332b8f855001a2fd0b866cd0b81d009c6fbb316149c398c51ca",
|
|
5
5
|
"roles/clarifier.md": "1e1b8a2c05f1070fdcef485963cfcbffff62c4b2703a8d73fe51ac52d056e573",
|
|
6
6
|
"roles/content-author.md": "cc20b80bd70ab68b3239a9cf56bf1ffc2c06843d38afc6b190844b35a1d73c3e",
|
|
7
7
|
"roles/designer.md": "76cff5bda82975cfb4074de71681e7c8ba284e2e49d0cc98f90208642fef74fc",
|
|
@@ -27,12 +27,14 @@
|
|
|
27
27
|
"workflows/spec-challenge.md": "dc99137c28c49a6f8312924709afb6077754d128e90466dc911150ce15737897",
|
|
28
28
|
"workflows/specify.md": "53b84e74871f6dbd93cae22a881cc5907e398b29501d0a1fa08c7ed69df705cb",
|
|
29
29
|
"workflows/verify.md": "45f9c189520dfe9d24c0bc340a15e6a80c988fca1b84dc187627032a6dbaee16",
|
|
30
|
+
"hooks/definitions/context_mode_router.yaml": "a10dc927418bc130b447eb33faf0f45669ecd9c7917f56947ddd74850a4e0e37",
|
|
30
31
|
"hooks/definitions/loop_cap_guard.yaml": "f0fd220e028ab6fad3d8fd650602884fe500ca4899eff6e428cf217af058618d",
|
|
31
32
|
"hooks/definitions/post_tool_capture.yaml": "a773cd6e18972dee8eef3b7cb06fd1d319a71de4588897cebfbe643f6781a3b2",
|
|
32
33
|
"hooks/definitions/pre_compact_summary.yaml": "daa0175d79f3e0127c5ce86a7a2f8df0be3f58b5c94fe749da715a17c7b2d04e",
|
|
33
34
|
"hooks/definitions/pre_tool_capture_route.yaml": "3c2663380ff3cd09f09de5b96bcf6123266fa74d8a03dfb2d6fbe40a43fb13cf",
|
|
34
35
|
"hooks/definitions/protected_path_write_guard.yaml": "6683d41778b823e2a4e606065597569aa04363f091e135e165de9732f1fc2171",
|
|
35
36
|
"hooks/definitions/session_start.yaml": "9383fcf1f8304c87e57726478a461706c0fc73dc62bcc4d8661f2eeffa43a82d",
|
|
36
|
-
"hooks/definitions/stop_handoff_harvest.yaml": "67a3c0a8bb7cb66b88e77dc79e748082e964d278c47935662c453922a846482b"
|
|
37
|
+
"hooks/definitions/stop_handoff_harvest.yaml": "67a3c0a8bb7cb66b88e77dc79e748082e964d278c47935662c453922a846482b",
|
|
38
|
+
"hooks/hooks.json": "f255345793951b5cf6f6d8c9a8b6a6ad2d3140023453410127a6f70d8e110c26"
|
|
37
39
|
}
|
|
38
40
|
}
|
|
@@ -27,13 +27,15 @@
|
|
|
27
27
|
"workflows/spec-challenge.md",
|
|
28
28
|
"workflows/specify.md",
|
|
29
29
|
"workflows/verify.md",
|
|
30
|
+
"hooks/definitions/context_mode_router.yaml",
|
|
30
31
|
"hooks/definitions/loop_cap_guard.yaml",
|
|
31
32
|
"hooks/definitions/post_tool_capture.yaml",
|
|
32
33
|
"hooks/definitions/pre_compact_summary.yaml",
|
|
33
34
|
"hooks/definitions/pre_tool_capture_route.yaml",
|
|
34
35
|
"hooks/definitions/protected_path_write_guard.yaml",
|
|
35
36
|
"hooks/definitions/session_start.yaml",
|
|
36
|
-
"hooks/definitions/stop_handoff_harvest.yaml"
|
|
37
|
+
"hooks/definitions/stop_handoff_harvest.yaml",
|
|
38
|
+
"hooks/hooks.json"
|
|
37
39
|
],
|
|
38
40
|
"files": [
|
|
39
41
|
".claude/agents/clarifier.md",
|
|
@@ -6,7 +6,7 @@ This host package is generated from the canonical Wazir sources.
|
|
|
6
6
|
|
|
7
7
|
- project: Wazir
|
|
8
8
|
- hosts: claude, codex, gemini, cursor
|
|
9
|
-
- phases:
|
|
9
|
+
- phases: init, clarifier, executor, final_review
|
|
10
10
|
- roles: clarifier, researcher, specifier, content-author, designer, planner, executor, verifier, reviewer, learner
|
|
11
11
|
- protected paths: input, roles, workflows, schemas, exports/hosts
|
|
12
12
|
- state root default: ~/.wazir/projects/{project_slug}
|