@wazir-dev/cli 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/CHANGELOG.md +31 -2
  2. package/docs/plans/2026-03-15-cli-pipeline-integration-plan.md +1 -1
  3. package/docs/reference/review-loop-pattern.md +429 -0
  4. package/docs/reference/tooling-cli.md +2 -0
  5. package/docs/truth-claims.yaml +6 -0
  6. package/exports/hosts/claude/.claude/agents/clarifier.md +3 -0
  7. package/exports/hosts/claude/.claude/agents/designer.md +3 -0
  8. package/exports/hosts/claude/.claude/agents/executor.md +2 -0
  9. package/exports/hosts/claude/.claude/agents/planner.md +3 -0
  10. package/exports/hosts/claude/.claude/agents/researcher.md +2 -0
  11. package/exports/hosts/claude/.claude/agents/reviewer.md +5 -1
  12. package/exports/hosts/claude/.claude/agents/specifier.md +3 -0
  13. package/exports/hosts/claude/.claude/commands/clarify.md +4 -0
  14. package/exports/hosts/claude/.claude/commands/design-review.md +4 -0
  15. package/exports/hosts/claude/.claude/commands/design.md +4 -0
  16. package/exports/hosts/claude/.claude/commands/discover.md +4 -0
  17. package/exports/hosts/claude/.claude/commands/execute.md +4 -0
  18. package/exports/hosts/claude/.claude/commands/plan-review.md +4 -0
  19. package/exports/hosts/claude/.claude/commands/plan.md +4 -0
  20. package/exports/hosts/claude/.claude/commands/spec-challenge.md +4 -0
  21. package/exports/hosts/claude/.claude/commands/specify.md +4 -0
  22. package/exports/hosts/claude/.claude/commands/verify.md +4 -0
  23. package/exports/hosts/claude/export.manifest.json +19 -19
  24. package/exports/hosts/codex/export.manifest.json +19 -19
  25. package/exports/hosts/cursor/export.manifest.json +19 -19
  26. package/exports/hosts/gemini/export.manifest.json +19 -19
  27. package/hooks/definitions/loop_cap_guard.yaml +1 -1
  28. package/hooks/hooks.json +18 -0
  29. package/package.json +3 -2
  30. package/roles/clarifier.md +3 -0
  31. package/roles/designer.md +3 -0
  32. package/roles/executor.md +2 -0
  33. package/roles/planner.md +3 -0
  34. package/roles/researcher.md +2 -0
  35. package/roles/reviewer.md +5 -1
  36. package/roles/specifier.md +3 -0
  37. package/skills/brainstorming/SKILL.md +139 -38
  38. package/skills/clarifier/SKILL.md +219 -0
  39. package/skills/debugging/SKILL.md +11 -1
  40. package/skills/executing-plans/SKILL.md +15 -2
  41. package/skills/executor/SKILL.md +76 -0
  42. package/skills/init-pipeline/SKILL.md +106 -17
  43. package/skills/receiving-code-review/SKILL.md +8 -0
  44. package/skills/requesting-code-review/SKILL.md +25 -5
  45. package/skills/reviewer/SKILL.md +151 -0
  46. package/skills/subagent-driven-development/SKILL.md +25 -2
  47. package/skills/tdd/SKILL.md +8 -0
  48. package/skills/wazir/SKILL.md +250 -43
  49. package/skills/writing-plans/SKILL.md +31 -4
  50. package/templates/examples/wazir-manifest.example.yaml +1 -1
  51. package/tooling/src/capture/command.js +87 -1
  52. package/tooling/src/capture/run-config.js +21 -0
  53. package/tooling/src/checks/brand-truth.js +3 -6
  54. package/tooling/src/checks/command-registry.js +1 -0
  55. package/tooling/src/checks/docs-truth.js +1 -1
  56. package/tooling/src/checks/runtime-surface.js +3 -7
  57. package/tooling/src/cli.js +8 -3
  58. package/tooling/src/init/command.js +201 -0
  59. package/wazir.manifest.yaml +0 -3
  60. package/workflows/clarify.md +4 -0
  61. package/workflows/design-review.md +4 -0
  62. package/workflows/design.md +4 -0
  63. package/workflows/discover.md +4 -0
  64. package/workflows/execute.md +4 -0
  65. package/workflows/plan-review.md +4 -0
  66. package/workflows/plan.md +4 -0
  67. package/workflows/spec-challenge.md +4 -0
  68. package/workflows/specify.md +4 -0
  69. package/workflows/verify.md +4 -0
@@ -61,16 +61,48 @@ Run `which wazir` to check if the CLI is installed.
61
61
  >
62
62
  > **How would you like to install it?**
63
63
  >
64
- > 1. **npm** (Recommended) — `npm install -g wazir`
64
+ > 1. **npm** (Recommended) — `npm install -g @wazir-dev/cli`
65
65
  > 2. **Local link** — `npm link` from the Wazir project root
66
- > 3. **Skip** — Continue without the CLI (some features will be unavailable)
67
66
 
68
- If the user picks 1, run `npm install -g wazir` and verify with `wazir --version`.
67
+ If the user picks 1, run `npm install -g @wazir-dev/cli` and verify with `wazir --version`.
69
68
  If the user picks 2, run `npm link` from the project root and verify.
70
- If the user picks 3, warn that `wazir capture`, `wazir validate`, and `wazir index` commands will not work, then continue.
69
+
70
+ The CLI is **required** — the pipeline uses `wazir capture`, `wazir validate`, `wazir index`, and `wazir doctor` throughout execution. There is no skip option.
71
71
 
72
72
  **If installed**, run `wazir doctor --json` to verify repo health.
73
73
 
74
+ If doctor reports unhealthy:
75
+ > **Repo health check failed:** [details from doctor output]
76
+ > Fix issues before running the pipeline.
77
+
78
+ Stop. Do NOT continue the pipeline until the health check passes.
79
+
80
+ ### Branch Check
81
+
82
+ Run `wazir validate branches` to check the current git branch.
83
+
84
+ - If on `main` or `develop`:
85
+ > You're on **[branch]**. The pipeline requires a feature branch.
86
+ >
87
+ > 1. **Create feat/<slug>** (Recommended) — branch from current
88
+ > 2. **Continue on [branch]** — not recommended for feature/refactor work
89
+
90
+ Wait for the user to answer before continuing.
91
+
92
+ - If branch name is invalid (not `feat/`, `fix/`, `chore/`, etc.): warn but continue.
93
+
94
+ ### Index Check
95
+
96
+ ```bash
97
+ INDEX_STATS=$(wazir index stats --json 2>/dev/null)
98
+ FILE_COUNT=$(echo "$INDEX_STATS" | jq -r '.file_count // 0')
99
+ if [ "$FILE_COUNT" -eq 0 ]; then
100
+ wazir index build && wazir index summarize --tier all
101
+ else
102
+ wazir index refresh
103
+ fi
104
+ ```
105
+
74
106
  ### Pipeline Init Check
75
107
 
76
108
  Check if `.wazir/state/config.json` exists.
@@ -89,6 +121,12 @@ ln -sfn run-YYYYMMDD-HHMMSS .wazir/runs/latest
89
121
 
90
122
  If a previous completed run exists (check for a `completed_at` field in the previous `latest` run's `run-config.yaml`), record its `run_id` as `parent_run_id` in the new run's config.
91
123
 
124
+ After creating the run directory, initialize event capture:
125
+
126
+ ```bash
127
+ wazir capture init --run <run-id> --phase clarify --status starting
128
+ ```
129
+
92
130
  ## Step 3: Pre-Flight Configuration
93
131
 
94
132
  Build the run configuration. Skip questions that were answered via inline modifiers.
@@ -150,11 +188,21 @@ parallel_backend: none # none | claude_teams (future: subagents,
150
188
 
151
189
  # Phase policy (system-decided, not user-facing)
152
190
  phase_policy:
153
- discover: { enabled: true, reason: "feature intent requires research" }
154
- design: { enabled: true, reason: "new UI component" }
155
- spec-challenge: { enabled: true, passes: 2, reason: "standard depth" }
156
- author: { enabled: false, reason: "no i18n or seed data needed" }
157
- plan-review: { enabled: true, passes: 1 }
191
+ discover: { enabled: true, loop_cap: 10 }
192
+ clarify: { enabled: true, loop_cap: 10 }
193
+ specify: { enabled: true, loop_cap: 10 }
194
+ spec-challenge: { enabled: true, loop_cap: 10 }
195
+ author: { enabled: false, loop_cap: 10 }
196
+ design: { enabled: true, loop_cap: 10 }
197
+ design-review: { enabled: true, loop_cap: 10 }
198
+ plan: { enabled: true, loop_cap: 10 }
199
+ plan-review: { enabled: true, loop_cap: 10 }
200
+ execute: { enabled: true, loop_cap: 10 }
201
+ verify: { enabled: true, loop_cap: 5 }
202
+ review: { enabled: true, loop_cap: 10 }
203
+ learn: { enabled: false, loop_cap: 5 }
204
+ prepare_next: { enabled: false, loop_cap: 5 }
205
+ run_audit: { enabled: false, loop_cap: 10 }
158
206
 
159
207
  # Research
160
208
  research_topics: [] # populated by researcher phase
@@ -176,15 +224,15 @@ Map intent + depth to applicable phases. The system decides — the user does NO
176
224
  |-------|--------|-------|
177
225
  | **Core** (always run) | `clarify`, `verify`, `review` | Never skipped |
178
226
  | **Adaptive** (run when evidence says so) | `discover`, `design`, `author`, `specify` | Skipped for bugfix/docs/spike at quick depth |
179
- | **Scale** (intensity varies) | `spec-challenge`, `plan-review`, `design-review` | Single-pass at quick, multi-pass at deep |
227
+ | **Scale** (intensity varies) | `spec-challenge`, `plan-review`, `design-review` | Loop cap controls iteration depth |
180
228
 
181
229
  Log skip decisions to the run's `run-config.yaml` with reasons:
182
230
 
183
231
  ```yaml
184
232
  phase_policy:
185
- discover: { enabled: true }
186
- design: { enabled: false, reason: "bugfix intent — no design needed" }
187
- spec-challenge: { enabled: true, passes: 1, reason: "quick depth" }
233
+ discover: { enabled: true, loop_cap: 10 }
234
+ design: { enabled: false, loop_cap: 10, reason: "bugfix intent — no design needed" }
235
+ spec-challenge: { enabled: true, loop_cap: 10 }
188
236
  ```
189
237
 
190
238
  ### Confidence Gate
@@ -192,7 +240,7 @@ phase_policy:
192
240
  After building the run config, evaluate confidence:
193
241
 
194
242
  - **High confidence** (clear intent, depth set, no ambiguity) — show a one-line summary and proceed:
195
- > **Running: standard depth, feature, sequential. 11 of 14 phases. Proceeding...**
243
+ > **Running: standard depth, feature, sequential. 11 of 15 phases. Proceeding...**
196
244
 
197
245
  - **Low confidence** (ambiguous intent, unclear scope) — show the full plan and ask:
198
246
  > **Here's the run plan:**
@@ -205,11 +253,13 @@ After building the run config, evaluate confidence:
205
253
  > 1. **Yes, proceed** (Recommended)
206
254
  > 2. **No, let me adjust**
207
255
 
208
- ## Step 4: Run Clarifier
256
+ ## Step 4: Run Pipeline Phases
257
+
258
+ The full pipeline runs these phases in order. Each phase produces an artifact that must pass its review loop before flowing to the next phase. Review mode is always passed explicitly (`--mode`) -- no auto-detection.
209
259
 
210
- ### Source Capture
260
+ ### 4a: Source Capture
211
261
 
212
- Before invoking the clarifier, instruct the researcher to capture all referenced sources locally:
262
+ Before invoking the clarifier, capture all referenced sources locally:
213
263
 
214
264
  - Fetch all URLs referenced in `.wazir/input/` briefing files
215
265
  - Save fetched content to `.wazir/runs/<run-id>/sources/`
@@ -238,46 +288,194 @@ Before invoking the clarifier, instruct the researcher to capture all referenced
238
288
 
239
289
  Research briefs produced by the researcher must reference local paths (`sources/src-001-...`) instead of live URLs. The original URL is preserved in the manifest for provenance. Failures are recorded explicitly — never silently skipped.
240
290
 
241
- ### Clarifier Invocation
291
+ ### 4b: Clarify (clarifier role)
292
+
293
+ ```bash
294
+ wazir capture event --run <run-id> --event phase_enter --phase clarify --status in_progress
295
+ ```
242
296
 
243
- Invoke the `clarifier` skill.
297
+ Invoke the clarifier skill for Phase 1A.
298
+ Produces clarification artifact.
299
+ Review: clarification-review loop (`--mode clarification-review`, spec/clarification dimensions).
300
+ Pass count: quick=3, standard=5, deep=7. No extension.
301
+ Checkpoint: user approves clarification.
244
302
 
245
- This runs the full Phase 0 + Phase 1 pipeline:
246
- - Phase 0: Research (autonomous skipped if depth=quick and intent=bugfix)
247
- - Phase 1A: Clarify (autonomous)
248
- - Phase 1A+: Spec Harden (passes determined by depth)
249
- - Phase 1B: Brainstorm (interactive — **will pause for user approval**. If `team_mode: parallel`, uses structured dialogue with Free Thinker + Grounder + Synthesizer agents)
250
- - Phase 1C: Plan (task generation)
303
+ ```bash
304
+ wazir capture event --run <run-id> --event phase_exit --phase clarify --status completed
305
+ ```
251
306
 
252
- **Resume detection:** If `.wazir/runs/latest/clarified/` already has task specs and an execution plan, ask:
307
+ ### 4c: Research (researcher role via discover workflow)
253
308
 
254
- > **Clarification was already completed. What would you like to do?**
255
- >
256
- > 1. **Skip to execution** (Recommended) — Use existing task specs
257
- > 2. **Re-run clarifier** — Start fresh
309
+ ```bash
310
+ wazir capture event --run <run-id> --event phase_enter --phase discover --status in_progress
311
+ ```
312
+
313
+ Clarifier delegates to discover workflow (researcher role).
314
+ Produces research artifact.
315
+ Review: research-review loop (`--mode research-review`, research dimensions).
316
+ Pass count: quick=3, standard=5, deep=7. No extension.
317
+ Skip condition: depth=quick AND intent=bugfix.
318
+
319
+ ```bash
320
+ wazir capture event --run <run-id> --event phase_exit --phase discover --status completed
321
+ ```
322
+
323
+ ### 4d: Specify (specifier role)
324
+
325
+ ```bash
326
+ wazir capture event --run <run-id> --event phase_enter --phase specify --status in_progress
327
+ ```
328
+
329
+ Delegate to specify workflow.
330
+ Specifier produces measurable spec from clarification + research.
331
+ Review: spec-challenge loop (`--mode spec-challenge`, spec/clarification dimensions).
332
+ Pass count: quick=3, standard=5, deep=7. No extension.
333
+ Checkpoint: user approves spec.
334
+
335
+ ```bash
336
+ wazir capture event --run <run-id> --event phase_exit --phase specify --status completed
337
+ ```
338
+
339
+ ### 4d.5: Author (content-author role) [ADAPTIVE]
340
+
341
+ ```bash
342
+ wazir capture event --run <run-id> --event phase_enter --phase author --status in_progress
343
+ ```
344
+
345
+ Enabled when `phase_policy.author.enabled = true` (default: false).
346
+ Content-author writes non-code content artifacts.
347
+ Approval gate: human approval required (not a review loop).
348
+ Skip condition: disabled by default. Enable for content-heavy projects.
349
+
350
+ ```bash
351
+ wazir capture event --run <run-id> --event phase_exit --phase author --status completed
352
+ ```
353
+
354
+ ### 4e: Brainstorm (designer role)
355
+
356
+ ```bash
357
+ wazir capture event --run <run-id> --event phase_enter --phase design --status in_progress
358
+ ```
258
359
 
259
- ## Step 5: Run Executor
360
+ Invoke brainstorming skill for Phase 1B.
361
+ Interactive -- pauses for user approval of design concept.
362
+ After user approval: design-review loop (`--mode design-review`,
363
+ canonical design-review dimensions: spec coverage, design-spec consistency,
364
+ accessibility, visual consistency, exported-code fidelity).
365
+ Pass count: quick=3, standard=5, deep=7. No extension.
366
+ Skip condition: intent=bugfix/docs.
260
367
 
261
- Invoke the `executor` skill.
368
+ ```bash
369
+ wazir capture event --run <run-id> --event phase_exit --phase design --status completed
370
+ ```
371
+
372
+ ### 4f: Plan (planner role via wz:writing-plans)
373
+
374
+ ```bash
375
+ wazir capture event --run <run-id> --event phase_enter --phase plan --status in_progress
376
+ ```
377
+
378
+ Delegate to `wz:writing-plans`.
379
+ Planner produces execution plan and task specs.
380
+ Review: plan-review loop (`--mode plan-review`, plan dimensions).
381
+ Pass count: quick=3, standard=5, deep=7. No extension.
382
+ Checkpoint: user approves plan.
262
383
 
263
- This runs Phase 2: autonomous execution with the composition engine, TDD, and quality gates.
384
+ ```bash
385
+ wazir capture event --run <run-id> --event phase_exit --phase plan --status completed
386
+ ```
387
+
388
+ ### 4g: Execute (executor role)
389
+
390
+ ```bash
391
+ wazir capture event --run <run-id> --event phase_enter --phase execute --status in_progress
392
+ ```
393
+
394
+ **Pre-execution gate** — run before the first task:
395
+
396
+ ```bash
397
+ wazir validate manifest && wazir validate hooks
398
+ # If either fails, stop and report the failure. Do NOT proceed to task execution.
399
+ ```
400
+
401
+ Invoke executor skill for Phase 2.
402
+ Per-task review: task-review loop (`--mode task-review --task-id <NNN>`,
403
+ 5 task-execution dimensions) before each commit.
404
+ Review logs: `execute-task-<NNN>-review-pass-<N>.md`
405
+ Cap tracking: `wazir capture loop-check --task-id <NNN>`
406
+ Codex error handling: non-zero exit -> codex-unavailable, self-review only.
407
+ NOTE: per-task review is NOT the final review.
264
408
 
265
409
  If `team_mode: parallel` in run-config, the executor spawns Agent Teams for independent tasks. Otherwise, tasks run sequentially.
266
410
 
267
- **Resume detection:** If `.wazir/runs/latest/artifacts/` has completed artifacts, ask:
411
+ ```bash
412
+ wazir capture event --run <run-id> --event phase_exit --phase execute --status completed
413
+ ```
414
+
415
+ ### 4h: Verify (verifier role)
268
416
 
269
- > **Some tasks are already completed. What would you like to do?**
270
- >
271
- > 1. **Resume** (Recommended) — Continue from where it left off
272
- > 2. **Start fresh** — Re-run all tasks from scratch
417
+ ```bash
418
+ wazir capture event --run <run-id> --event phase_enter --phase verify --status in_progress
419
+ ```
420
+
421
+ Deterministic verification of execution claims.
422
+ Not a review loop -- produces proof, not findings.
273
423
 
274
- ## Step 6: Run Reviewer
424
+ ```bash
425
+ wazir capture event --run <run-id> --event phase_exit --phase verify --status completed
426
+ ```
427
+
428
+ ### 4i: Final Review (reviewer role in final mode)
429
+
430
+ ```bash
431
+ wazir capture event --run <run-id> --event phase_enter --phase review --status in_progress
432
+ ```
433
+
434
+ Invoke reviewer skill with `--mode final`.
435
+ 7-dimension scored review (correctness, completeness, wiring, verification,
436
+ drift, quality, documentation). Score 0-70.
437
+ This IS the scored final review gate.
438
+
439
+ ```bash
440
+ wazir capture event --run <run-id> --event phase_exit --phase review --status completed
441
+ ```
442
+
443
+ ### 4j: Learn (learner role) [ADAPTIVE]
444
+
445
+ Enabled when `phase_policy.learn.enabled = true` (default: false).
446
+ Extract durable learnings from the completed run.
447
+ No review loop. Learnings require explicit scope tags.
448
+ Skip condition: disabled by default. Enable for retrospective runs.
449
+
450
+ ### 4k: Prepare Next (planner role) [ADAPTIVE]
275
451
 
276
- Invoke the `reviewer` skill.
452
+ Enabled when `phase_policy.prepare_next.enabled = true` (default: false).
453
+ Prepare context and handoff for the next run.
454
+ No review loop. No implicit carry-forward of unapproved learnings.
455
+ Skip condition: disabled by default.
277
456
 
278
- This runs Phase 3: final scoring across 7 dimensions, produces a verdict.
457
+ `run_audit` is NOT part of the pipeline flow -- it is an on-demand standalone phase invoked separately.
279
458
 
280
- ## Step 7: Present Results
459
+ ### Resume Detection
460
+
461
+ If the run has partial progress, detect the latest completed phase and resume:
462
+
463
+ - If clarification exists but no spec: resume at 4d (specify)
464
+ - If spec exists but no design: resume at 4e (brainstorm)
465
+ - If design exists but no plan: resume at 4f (plan)
466
+ - If plan exists but no task artifacts: resume at 4g (execute)
467
+ - If task artifacts exist but no verification: resume at 4h (verify)
468
+ - If verification exists: resume at 4i (final review)
469
+
470
+ Present resume options:
471
+
472
+ > **Previous progress detected (completed through [phase]).**
473
+ >
474
+ > **What would you like to do?**
475
+ > 1. **Resume from [next phase]** (Recommended)
476
+ > 2. **Start fresh** — Re-run all phases from scratch
477
+
478
+ ## Step 5: Present Results
281
479
 
282
480
  After the reviewer completes, present the verdict and offer next steps with numbered options:
283
481
 
@@ -322,6 +520,15 @@ After the reviewer completes, present the verdict and offer next steps with numb
322
520
  >
323
521
  > Something fundamental went wrong. Review the findings above and decide how to proceed.
324
522
 
523
+ ### Run Summary
524
+
525
+ After presenting results (regardless of verdict), capture the run summary:
526
+
527
+ ```bash
528
+ wazir capture summary --run <run-id>
529
+ wazir status --run <run-id> --json
530
+ ```
531
+
325
532
  ## Error Handling
326
533
 
327
534
  If any phase fails or the user cancels:
@@ -364,7 +571,7 @@ After the audit completes:
364
571
  > 2. **Generate a fix plan** — turn findings into implementation tasks
365
572
  > 3. **Run the pipeline on the fix plan** — generate plan, then execute and review fixes
366
573
 
367
- If the user picks option 3, save the findings as the briefing and run the normal pipeline (Steps 3-7) with intent = `bugfix`.
574
+ If the user picks option 3, save the findings as the briefing and run the normal pipeline (Steps 3-5) with intent = `bugfix`.
368
575
 
369
576
  ---
370
577
 
@@ -11,9 +11,12 @@ Inputs:
11
11
  - current repo state
12
12
  - relevant research findings
13
13
 
14
- Output:
14
+ Output path:
15
15
 
16
- - one implementation plan in `docs/plans/YYYY-MM-DD-<topic>-implementation.md`
16
+ - **Inside a pipeline run** (`.wazir/runs/latest/` exists): write to `.wazir/runs/latest/clarified/execution-plan.md` and task specs to `.wazir/runs/latest/tasks/task-NNN/spec.md`
17
+ - **Standalone** (no active run): write to `docs/plans/YYYY-MM-DD-<topic>-implementation.md`
18
+
19
+ To detect: check if `.wazir/runs/latest/clarified/` exists. If yes, use run paths.
17
20
 
18
21
  The plan must include:
19
22
 
@@ -25,6 +28,30 @@ The plan must include:
25
28
 
26
29
  Rules:
27
30
 
28
- - do not write repo-local task files outside the plan directory
29
- - do not rely on retired `run-*` workflow wrappers
31
+ - do not write implementation code during planning
30
32
  - make the plan detailed enough that another weak model can execute it without inventing missing steps
33
+ - each task spec must have testable acceptance criteria, not vague descriptions
34
+
35
+ ## Plan Review Loop
36
+
37
+ After writing the plan, the reviewer role runs the plan-review loop with `--mode plan-review` using plan dimensions (see `workflows/plan-review.md` and `docs/reference/review-loop-pattern.md`).
38
+
39
+ The planner resolves findings from each pass. The loop runs for `pass_counts[depth]` passes (quick=3, standard=5, deep=7). No extension.
40
+
41
+ For non-code artifacts (the plan itself), Codex review uses stdin pipe:
42
+
43
+ ```bash
44
+ CODEX_MODEL=$(jq -r '.multi_tool.codex.model // empty' .wazir/state/config.json 2>/dev/null)
45
+ CODEX_MODEL=${CODEX_MODEL:-gpt-5.4}
46
+ cat <plan-path> | codex exec -c model="$CODEX_MODEL" "Review this implementation plan focusing on [dimension]..."
47
+ ```
48
+
49
+ `codex review -c model="$CODEX_MODEL"` is used only for code artifacts, not plans.
50
+
51
+ Codex error handling: if `codex` exits non-zero, log the error, mark the pass as `codex-unavailable`, and use self-review findings only. Never treat a Codex failure as a clean pass.
52
+
53
+ Loop depth follows the project's depth config (quick/standard/deep).
54
+
55
+ Standalone mode: if no `.wazir/runs/latest/` exists, artifacts go to `docs/plans/` and review logs go alongside (`docs/plans/YYYY-MM-DD-<topic>-review-pass-N.md`). Loop cap guard is not invoked in standalone mode.
56
+
57
+ After the loop completes, present findings summary and wait for user approval before completing.
@@ -49,7 +49,7 @@ protected_paths:
49
49
  - input
50
50
  - exports/hosts
51
51
  prohibited_terms:
52
- - agent-os
52
+ - legacy-name
53
53
  adapters:
54
54
  context_mode:
55
55
  enabled_by_default: false
@@ -16,7 +16,9 @@ import {
16
16
  writeStatus,
17
17
  writeSummary,
18
18
  } from './store.js';
19
+ import { readRunConfig, getPhaseLoopCap } from './run-config.js';
19
20
  import { readUsage, generateReport, initUsage, recordCaptureSavings, recordPhaseUsage } from './usage.js';
21
+ import { evaluateLoopCapGuard } from '../guards/loop-cap-guard.js';
20
22
 
21
23
  function formatResult(payload, options = {}) {
22
24
  if (options.json) {
@@ -68,6 +70,7 @@ function resolveCaptureContext(parsed, context = {}) {
68
70
  'capture-path',
69
71
  'command',
70
72
  'exit-code',
73
+ 'task-id',
71
74
  ],
72
75
  });
73
76
  const stateRoot = resolveStateRoot(projectRoot, manifest, {
@@ -346,6 +349,87 @@ function handleUsage(parsed, context = {}) {
346
349
  };
347
350
  }
348
351
 
352
+ function handleLoopCheck(parsed, context = {}) {
353
+ const { stateRoot, options } = resolveCaptureContext(parsed, context);
354
+
355
+ requireOption(options, 'run', 'Usage: wazir capture loop-check --run <id> --phase <phase> --loop-count <n> [--task-id <id>] [--state-root <path>] [--json]');
356
+ requireOption(options, 'phase', 'Usage: wazir capture loop-check --run <id> --phase <phase> --loop-count <n> [--task-id <id>] [--state-root <path>] [--json]');
357
+ requireOption(options, 'loopCount', 'Usage: wazir capture loop-check --run <id> --phase <phase> --loop-count <n> [--task-id <id>] [--state-root <path>] [--json]');
358
+
359
+ const runPaths = getRunPaths(stateRoot, options.run);
360
+
361
+ // Standalone mode: if status.json doesn't exist, allow (exit 0)
362
+ if (!fs.existsSync(runPaths.statusPath)) {
363
+ const notice = 'loop-check: standalone mode (no status.json), allowing.\n';
364
+ return {
365
+ exitCode: 0,
366
+ stdout: options.json ? `${JSON.stringify({ allowed: true, reason: 'standalone mode' }, null, 2)}\n` : '',
367
+ stderr: options.json ? '' : notice,
368
+ };
369
+ }
370
+
371
+ // Record the event and update loop count in status.json
372
+ const status = readStatus(runPaths);
373
+ const loopCount = parsePositiveInteger(options.loopCount, '--loop-count');
374
+ const loopPhase = options.phase;
375
+ const loopKey = options.taskId ? `${loopPhase}:${options.taskId}` : loopPhase;
376
+
377
+ status.phase_loop_counts = {
378
+ ...(status.phase_loop_counts ?? {}),
379
+ [loopKey]: loopCount,
380
+ };
381
+
382
+ const event = createBaseEvent('loop_iteration', {
383
+ run_id: options.run,
384
+ phase: loopPhase,
385
+ status: status.status,
386
+ loop_count: loopCount,
387
+ loop_key: loopKey,
388
+ });
389
+
390
+ if (options.taskId) {
391
+ event.task_id = options.taskId;
392
+ }
393
+
394
+ status.updated_at = event.created_at;
395
+ status.last_event = 'loop_iteration';
396
+
397
+ appendEvent(runPaths, event);
398
+ writeStatus(runPaths, status);
399
+
400
+ // Read run-config for loop_cap
401
+ const runConfig = readRunConfig(runPaths);
402
+ const loopCap = getPhaseLoopCap(runConfig, loopPhase);
403
+
404
+ // Evaluate the guard using loopKey (task-scoped or phase-scoped).
405
+ // Cap is per-phase but counts are per-task — each task gets its own
406
+ // budget up to the phase cap. This is intentional: task-scoped tracking
407
+ // prevents parallel tasks from sharing a single counter.
408
+ const guardResult = evaluateLoopCapGuard({
409
+ run_id: options.run,
410
+ phase: loopKey,
411
+ state_root: stateRoot,
412
+ loop_cap: loopCap,
413
+ });
414
+
415
+ if (!guardResult.allowed) {
416
+ return {
417
+ exitCode: 43,
418
+ stderr: `${guardResult.reason}\n`,
419
+ stdout: options.json ? `${JSON.stringify(guardResult, null, 2)}\n` : '',
420
+ };
421
+ }
422
+
423
+ return formatResult({
424
+ run_id: options.run,
425
+ phase: loopPhase,
426
+ loop_key: loopKey,
427
+ loop_count: loopCount,
428
+ loop_cap: loopCap,
429
+ allowed: true,
430
+ }, { json: options.json });
431
+ }
432
+
349
433
  export function runCaptureCommand(parsed, context = {}) {
350
434
  try {
351
435
  switch (parsed.subcommand) {
@@ -361,10 +445,12 @@ export function runCaptureCommand(parsed, context = {}) {
361
445
  return handleSummary(parsed, context);
362
446
  case 'usage':
363
447
  return handleUsage(parsed, context);
448
+ case 'loop-check':
449
+ return handleLoopCheck(parsed, context);
364
450
  default:
365
451
  return {
366
452
  exitCode: 1,
367
- stderr: 'Usage: wazir capture <init|event|route|output|summary|usage> ...\n',
453
+ stderr: 'Usage: wazir capture <init|event|route|output|summary|usage|loop-check> ...\n',
368
454
  };
369
455
  }
370
456
  } catch (error) {
@@ -0,0 +1,21 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { readYamlFile } from '../loaders.js';
4
+
5
+ const DEFAULT_PHASE_POLICY = {
6
+ loop_cap: 10,
7
+ enabled: true,
8
+ };
9
+
10
+ export function readRunConfig(runPaths) {
11
+ const configPath = path.join(runPaths.runRoot, 'run-config.yaml');
12
+ if (!fs.existsSync(configPath)) {
13
+ return { phase_policy: {} };
14
+ }
15
+ return readYamlFile(configPath);
16
+ }
17
+
18
+ export function getPhaseLoopCap(runConfig, phase) {
19
+ const policy = runConfig?.phase_policy?.[phase] ?? DEFAULT_PHASE_POLICY;
20
+ return policy.loop_cap ?? DEFAULT_PHASE_POLICY.loop_cap;
21
+ }
@@ -7,17 +7,14 @@ const EXCLUDED_DOC_FILES = new Set([
7
7
  ]);
8
8
 
9
9
  const BRAND_PATTERNS = [
10
- { label: 'Agent OS', regex: /\bAgent OS\b/g },
11
- { label: 'agent-os', regex: /\bagent-os\b/g },
12
- { label: 'Symphony', regex: /\bSymphony\b/g },
13
10
  { label: 'Wazir OS', regex: /\bWazir OS\b/g },
14
11
  ];
15
12
 
16
13
  function normalizeAllowedLegacyReferences(content) {
17
14
  return content
18
- .replace(/archive\/legacy-agent-os\/[^\s)`]*/g, 'archive/<legacy>')
19
- .replace(/archive\/v5\.1-agent-os-daemon\/[^\s)`]*/g, 'archive/<legacy>')
20
- .replace(/migration\/v5\.1-agent-os-to-wazir\.md/g, 'migration/<legacy>');
15
+ .replace(/archive\/legacy-wazir\/[^\s)`]*/g, 'archive/<legacy>')
16
+ .replace(/archive\/v5\.1-wazir-daemon\/[^\s)`]*/g, 'archive/<legacy>')
17
+ .replace(/migration\/v5\.1-wazir-rename\.md/g, 'migration/<legacy>');
21
18
  }
22
19
 
23
20
  function walkMarkdownFiles(dirPath, files = []) {
@@ -33,4 +33,5 @@ export const SUPPORTED_COMMAND_SUBJECTS = new Set([
33
33
  'wazir capture output',
34
34
  'wazir capture summary',
35
35
  'wazir capture usage',
36
+ 'wazir capture loop-check',
36
37
  ]);
@@ -5,7 +5,7 @@ import { readJsonFile, readYamlFile } from '../loaders.js';
5
5
  import { validateAgainstSchema } from '../schema-validator.js';
6
6
  import { SUPPORTED_COMMAND_SUBJECTS } from './command-registry.js';
7
7
 
8
- const EXCLUDED_DOC_DIRS = new Set(['daemon', 'plans', 'research', 'audit']);
8
+ const EXCLUDED_DOC_DIRS = new Set(['plans', 'research', 'audit']);
9
9
 
10
10
  function walkMarkdownFiles(dirPath, files = []) {
11
11
  for (const entry of fs.readdirSync(dirPath, { withFileTypes: true })) {
@@ -9,13 +9,9 @@ const EXCLUDED_DOC_FILES = new Set([
9
9
  ]);
10
10
 
11
11
  const FORBIDDEN_TEXT_PATTERNS = [
12
- { label: '.agent-os path', regex: /\.agent-os\//g },
13
12
  { label: 'tasks/input path', regex: /\btasks\/input\//g },
14
13
  { label: 'tasks/clarified path', regex: /\btasks\/clarified\//g },
15
14
  { label: 'legacy run wrapper', regex: /\/run-(clarifier|orchestrator|opus-reviewer)\b/g },
16
- { label: 'legacy daemon binary', regex: /\bagent-os-(daemon|run|review|orchestrate)\b/g },
17
- { label: 'legacy npx invocation', regex: /\bnpx agent-os-[a-z-]+\b/g },
18
- { label: 'daemon workflow config', regex: /daemon\/WORKFLOW\.md/g },
19
15
  ];
20
16
 
21
17
  const FORBIDDEN_DEPENDENCIES = new Set(['express', 'fastify', 'koa', 'socket.io']);
@@ -93,9 +89,9 @@ function collectRuntimeSurfaceFiles(projectRoot) {
93
89
 
94
90
  function normalizeAllowedLegacyReferences(content) {
95
91
  return content
96
- .replace(/archive\/legacy-agent-os\/[^\s)`]*/g, 'archive/<legacy>')
97
- .replace(/archive\/v5\.1-agent-os-daemon\/[^\s)`]*/g, 'archive/<legacy>')
98
- .replace(/migration\/v5\.1-agent-os-to-wazir\.md/g, 'migration/<legacy>');
92
+ .replace(/archive\/legacy-wazir\/[^\s)`]*/g, 'archive/<legacy>')
93
+ .replace(/archive\/v5\.1-wazir-daemon\/[^\s)`]*/g, 'archive/<legacy>')
94
+ .replace(/migration\/v5\.1-wazir-rename\.md/g, 'migration/<legacy>');
99
95
  }
100
96
 
101
97
  function assertGlobalPatternConfiguration() {