@wazir-dev/cli 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -2
- package/docs/plans/2026-03-15-cli-pipeline-integration-plan.md +1 -1
- package/docs/reference/review-loop-pattern.md +429 -0
- package/docs/reference/tooling-cli.md +2 -0
- package/docs/truth-claims.yaml +6 -0
- package/exports/hosts/claude/.claude/agents/clarifier.md +3 -0
- package/exports/hosts/claude/.claude/agents/designer.md +3 -0
- package/exports/hosts/claude/.claude/agents/executor.md +2 -0
- package/exports/hosts/claude/.claude/agents/planner.md +3 -0
- package/exports/hosts/claude/.claude/agents/researcher.md +2 -0
- package/exports/hosts/claude/.claude/agents/reviewer.md +5 -1
- package/exports/hosts/claude/.claude/agents/specifier.md +3 -0
- package/exports/hosts/claude/.claude/commands/clarify.md +4 -0
- package/exports/hosts/claude/.claude/commands/design-review.md +4 -0
- package/exports/hosts/claude/.claude/commands/design.md +4 -0
- package/exports/hosts/claude/.claude/commands/discover.md +4 -0
- package/exports/hosts/claude/.claude/commands/execute.md +4 -0
- package/exports/hosts/claude/.claude/commands/plan-review.md +4 -0
- package/exports/hosts/claude/.claude/commands/plan.md +4 -0
- package/exports/hosts/claude/.claude/commands/spec-challenge.md +4 -0
- package/exports/hosts/claude/.claude/commands/specify.md +4 -0
- package/exports/hosts/claude/.claude/commands/verify.md +4 -0
- package/exports/hosts/claude/export.manifest.json +19 -19
- package/exports/hosts/codex/export.manifest.json +19 -19
- package/exports/hosts/cursor/export.manifest.json +19 -19
- package/exports/hosts/gemini/export.manifest.json +19 -19
- package/hooks/definitions/loop_cap_guard.yaml +1 -1
- package/hooks/hooks.json +18 -0
- package/package.json +3 -2
- package/roles/clarifier.md +3 -0
- package/roles/designer.md +3 -0
- package/roles/executor.md +2 -0
- package/roles/planner.md +3 -0
- package/roles/researcher.md +2 -0
- package/roles/reviewer.md +5 -1
- package/roles/specifier.md +3 -0
- package/skills/brainstorming/SKILL.md +139 -38
- package/skills/clarifier/SKILL.md +219 -0
- package/skills/debugging/SKILL.md +11 -1
- package/skills/executing-plans/SKILL.md +15 -2
- package/skills/executor/SKILL.md +76 -0
- package/skills/init-pipeline/SKILL.md +106 -17
- package/skills/receiving-code-review/SKILL.md +8 -0
- package/skills/requesting-code-review/SKILL.md +25 -5
- package/skills/reviewer/SKILL.md +151 -0
- package/skills/subagent-driven-development/SKILL.md +25 -2
- package/skills/tdd/SKILL.md +8 -0
- package/skills/wazir/SKILL.md +250 -43
- package/skills/writing-plans/SKILL.md +31 -4
- package/templates/examples/wazir-manifest.example.yaml +1 -1
- package/tooling/src/capture/command.js +87 -1
- package/tooling/src/capture/run-config.js +21 -0
- package/tooling/src/checks/brand-truth.js +3 -6
- package/tooling/src/checks/command-registry.js +1 -0
- package/tooling/src/checks/docs-truth.js +1 -1
- package/tooling/src/checks/runtime-surface.js +3 -7
- package/tooling/src/cli.js +8 -3
- package/tooling/src/init/command.js +201 -0
- package/wazir.manifest.yaml +0 -3
- package/workflows/clarify.md +4 -0
- package/workflows/design-review.md +4 -0
- package/workflows/design.md +4 -0
- package/workflows/discover.md +4 -0
- package/workflows/execute.md +4 -0
- package/workflows/plan-review.md +4 -0
- package/workflows/plan.md +4 -0
- package/workflows/spec-challenge.md +4 -0
- package/workflows/specify.md +4 -0
- package/workflows/verify.md +4 -0
package/skills/wazir/SKILL.md
CHANGED
|
@@ -61,16 +61,48 @@ Run `which wazir` to check if the CLI is installed.
|
|
|
61
61
|
>
|
|
62
62
|
> **How would you like to install it?**
|
|
63
63
|
>
|
|
64
|
-
> 1. **npm** (Recommended) — `npm install -g wazir`
|
|
64
|
+
> 1. **npm** (Recommended) — `npm install -g @wazir-dev/cli`
|
|
65
65
|
> 2. **Local link** — `npm link` from the Wazir project root
|
|
66
|
-
> 3. **Skip** — Continue without the CLI (some features will be unavailable)
|
|
67
66
|
|
|
68
|
-
If the user picks 1, run `npm install -g wazir` and verify with `wazir --version`.
|
|
67
|
+
If the user picks 1, run `npm install -g @wazir-dev/cli` and verify with `wazir --version`.
|
|
69
68
|
If the user picks 2, run `npm link` from the project root and verify.
|
|
70
|
-
|
|
69
|
+
|
|
70
|
+
The CLI is **required** — the pipeline uses `wazir capture`, `wazir validate`, `wazir index`, and `wazir doctor` throughout execution. There is no skip option.
|
|
71
71
|
|
|
72
72
|
**If installed**, run `wazir doctor --json` to verify repo health.
|
|
73
73
|
|
|
74
|
+
If doctor reports unhealthy:
|
|
75
|
+
> **Repo health check failed:** [details from doctor output]
|
|
76
|
+
> Fix issues before running the pipeline.
|
|
77
|
+
|
|
78
|
+
Stop. Do NOT continue the pipeline until the health check passes.
|
|
79
|
+
|
|
80
|
+
### Branch Check
|
|
81
|
+
|
|
82
|
+
Run `wazir validate branches` to check the current git branch.
|
|
83
|
+
|
|
84
|
+
- If on `main` or `develop`:
|
|
85
|
+
> You're on **[branch]**. The pipeline requires a feature branch.
|
|
86
|
+
>
|
|
87
|
+
> 1. **Create feat/<slug>** (Recommended) — branch from current
|
|
88
|
+
> 2. **Continue on [branch]** — not recommended for feature/refactor work
|
|
89
|
+
|
|
90
|
+
Wait for the user to answer before continuing.
|
|
91
|
+
|
|
92
|
+
- If branch name is invalid (not `feat/`, `fix/`, `chore/`, etc.): warn but continue.
|
|
93
|
+
|
|
94
|
+
### Index Check
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
INDEX_STATS=$(wazir index stats --json 2>/dev/null)
|
|
98
|
+
FILE_COUNT=$(echo "$INDEX_STATS" | jq -r '.file_count // 0')
|
|
99
|
+
if [ "$FILE_COUNT" -eq 0 ]; then
|
|
100
|
+
wazir index build && wazir index summarize --tier all
|
|
101
|
+
else
|
|
102
|
+
wazir index refresh
|
|
103
|
+
fi
|
|
104
|
+
```
|
|
105
|
+
|
|
74
106
|
### Pipeline Init Check
|
|
75
107
|
|
|
76
108
|
Check if `.wazir/state/config.json` exists.
|
|
@@ -89,6 +121,12 @@ ln -sfn run-YYYYMMDD-HHMMSS .wazir/runs/latest
|
|
|
89
121
|
|
|
90
122
|
If a previous completed run exists (check for a `completed_at` field in the previous `latest` run's `run-config.yaml`), record its `run_id` as `parent_run_id` in the new run's config.
|
|
91
123
|
|
|
124
|
+
After creating the run directory, initialize event capture:
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
wazir capture init --run <run-id> --phase clarify --status starting
|
|
128
|
+
```
|
|
129
|
+
|
|
92
130
|
## Step 3: Pre-Flight Configuration
|
|
93
131
|
|
|
94
132
|
Build the run configuration. Skip questions that were answered via inline modifiers.
|
|
@@ -150,11 +188,21 @@ parallel_backend: none # none | claude_teams (future: subagents,
|
|
|
150
188
|
|
|
151
189
|
# Phase policy (system-decided, not user-facing)
|
|
152
190
|
phase_policy:
|
|
153
|
-
discover:
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
191
|
+
discover: { enabled: true, loop_cap: 10 }
|
|
192
|
+
clarify: { enabled: true, loop_cap: 10 }
|
|
193
|
+
specify: { enabled: true, loop_cap: 10 }
|
|
194
|
+
spec-challenge: { enabled: true, loop_cap: 10 }
|
|
195
|
+
author: { enabled: false, loop_cap: 10 }
|
|
196
|
+
design: { enabled: true, loop_cap: 10 }
|
|
197
|
+
design-review: { enabled: true, loop_cap: 10 }
|
|
198
|
+
plan: { enabled: true, loop_cap: 10 }
|
|
199
|
+
plan-review: { enabled: true, loop_cap: 10 }
|
|
200
|
+
execute: { enabled: true, loop_cap: 10 }
|
|
201
|
+
verify: { enabled: true, loop_cap: 5 }
|
|
202
|
+
review: { enabled: true, loop_cap: 10 }
|
|
203
|
+
learn: { enabled: false, loop_cap: 5 }
|
|
204
|
+
prepare_next: { enabled: false, loop_cap: 5 }
|
|
205
|
+
run_audit: { enabled: false, loop_cap: 10 }
|
|
158
206
|
|
|
159
207
|
# Research
|
|
160
208
|
research_topics: [] # populated by researcher phase
|
|
@@ -176,15 +224,15 @@ Map intent + depth to applicable phases. The system decides — the user does NO
|
|
|
176
224
|
|-------|--------|-------|
|
|
177
225
|
| **Core** (always run) | `clarify`, `verify`, `review` | Never skipped |
|
|
178
226
|
| **Adaptive** (run when evidence says so) | `discover`, `design`, `author`, `specify` | Skipped for bugfix/docs/spike at quick depth |
|
|
179
|
-
| **Scale** (intensity varies) | `spec-challenge`, `plan-review`, `design-review` |
|
|
227
|
+
| **Scale** (intensity varies) | `spec-challenge`, `plan-review`, `design-review` | Loop cap controls iteration depth |
|
|
180
228
|
|
|
181
229
|
Log skip decisions to the run's `run-config.yaml` with reasons:
|
|
182
230
|
|
|
183
231
|
```yaml
|
|
184
232
|
phase_policy:
|
|
185
|
-
discover:
|
|
186
|
-
design:
|
|
187
|
-
spec-challenge: { enabled: true,
|
|
233
|
+
discover: { enabled: true, loop_cap: 10 }
|
|
234
|
+
design: { enabled: false, loop_cap: 10, reason: "bugfix intent — no design needed" }
|
|
235
|
+
spec-challenge: { enabled: true, loop_cap: 10 }
|
|
188
236
|
```
|
|
189
237
|
|
|
190
238
|
### Confidence Gate
|
|
@@ -192,7 +240,7 @@ phase_policy:
|
|
|
192
240
|
After building the run config, evaluate confidence:
|
|
193
241
|
|
|
194
242
|
- **High confidence** (clear intent, depth set, no ambiguity) — show a one-line summary and proceed:
|
|
195
|
-
> **Running: standard depth, feature, sequential. 11 of
|
|
243
|
+
> **Running: standard depth, feature, sequential. 11 of 15 phases. Proceeding...**
|
|
196
244
|
|
|
197
245
|
- **Low confidence** (ambiguous intent, unclear scope) — show the full plan and ask:
|
|
198
246
|
> **Here's the run plan:**
|
|
@@ -205,11 +253,13 @@ After building the run config, evaluate confidence:
|
|
|
205
253
|
> 1. **Yes, proceed** (Recommended)
|
|
206
254
|
> 2. **No, let me adjust**
|
|
207
255
|
|
|
208
|
-
## Step 4: Run
|
|
256
|
+
## Step 4: Run Pipeline Phases
|
|
257
|
+
|
|
258
|
+
The full pipeline runs these phases in order. Each phase produces an artifact that must pass its review loop before flowing to the next phase. Review mode is always passed explicitly (`--mode`) -- no auto-detection.
|
|
209
259
|
|
|
210
|
-
### Source Capture
|
|
260
|
+
### 4a: Source Capture
|
|
211
261
|
|
|
212
|
-
Before invoking the clarifier,
|
|
262
|
+
Before invoking the clarifier, capture all referenced sources locally:
|
|
213
263
|
|
|
214
264
|
- Fetch all URLs referenced in `.wazir/input/` briefing files
|
|
215
265
|
- Save fetched content to `.wazir/runs/<run-id>/sources/`
|
|
@@ -238,46 +288,194 @@ Before invoking the clarifier, instruct the researcher to capture all referenced
|
|
|
238
288
|
|
|
239
289
|
Research briefs produced by the researcher must reference local paths (`sources/src-001-...`) instead of live URLs. The original URL is preserved in the manifest for provenance. Failures are recorded explicitly — never silently skipped.
|
|
240
290
|
|
|
241
|
-
###
|
|
291
|
+
### 4b: Clarify (clarifier role)
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
wazir capture event --run <run-id> --event phase_enter --phase clarify --status in_progress
|
|
295
|
+
```
|
|
242
296
|
|
|
243
|
-
Invoke the
|
|
297
|
+
Invoke the clarifier skill for Phase 1A.
|
|
298
|
+
Produces clarification artifact.
|
|
299
|
+
Review: clarification-review loop (`--mode clarification-review`, spec/clarification dimensions).
|
|
300
|
+
Pass count: quick=3, standard=5, deep=7. No extension.
|
|
301
|
+
Checkpoint: user approves clarification.
|
|
244
302
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
- Phase 1A+: Spec Harden (passes determined by depth)
|
|
249
|
-
- Phase 1B: Brainstorm (interactive — **will pause for user approval**. If `team_mode: parallel`, uses structured dialogue with Free Thinker + Grounder + Synthesizer agents)
|
|
250
|
-
- Phase 1C: Plan (task generation)
|
|
303
|
+
```bash
|
|
304
|
+
wazir capture event --run <run-id> --event phase_exit --phase clarify --status completed
|
|
305
|
+
```
|
|
251
306
|
|
|
252
|
-
|
|
307
|
+
### 4c: Research (researcher role via discover workflow)
|
|
253
308
|
|
|
254
|
-
|
|
255
|
-
>
|
|
256
|
-
|
|
257
|
-
|
|
309
|
+
```bash
|
|
310
|
+
wazir capture event --run <run-id> --event phase_enter --phase discover --status in_progress
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
Clarifier delegates to discover workflow (researcher role).
|
|
314
|
+
Produces research artifact.
|
|
315
|
+
Review: research-review loop (`--mode research-review`, research dimensions).
|
|
316
|
+
Pass count: quick=3, standard=5, deep=7. No extension.
|
|
317
|
+
Skip condition: depth=quick AND intent=bugfix.
|
|
318
|
+
|
|
319
|
+
```bash
|
|
320
|
+
wazir capture event --run <run-id> --event phase_exit --phase discover --status completed
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### 4d: Specify (specifier role)
|
|
324
|
+
|
|
325
|
+
```bash
|
|
326
|
+
wazir capture event --run <run-id> --event phase_enter --phase specify --status in_progress
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
Delegate to specify workflow.
|
|
330
|
+
Specifier produces measurable spec from clarification + research.
|
|
331
|
+
Review: spec-challenge loop (`--mode spec-challenge`, spec/clarification dimensions).
|
|
332
|
+
Pass count: quick=3, standard=5, deep=7. No extension.
|
|
333
|
+
Checkpoint: user approves spec.
|
|
334
|
+
|
|
335
|
+
```bash
|
|
336
|
+
wazir capture event --run <run-id> --event phase_exit --phase specify --status completed
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
### 4d.5: Author (content-author role) [ADAPTIVE]
|
|
340
|
+
|
|
341
|
+
```bash
|
|
342
|
+
wazir capture event --run <run-id> --event phase_enter --phase author --status in_progress
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
Enabled when `phase_policy.author.enabled = true` (default: false).
|
|
346
|
+
Content-author writes non-code content artifacts.
|
|
347
|
+
Approval gate: human approval required (not a review loop).
|
|
348
|
+
Skip condition: disabled by default. Enable for content-heavy projects.
|
|
349
|
+
|
|
350
|
+
```bash
|
|
351
|
+
wazir capture event --run <run-id> --event phase_exit --phase author --status completed
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
### 4e: Brainstorm (designer role)
|
|
355
|
+
|
|
356
|
+
```bash
|
|
357
|
+
wazir capture event --run <run-id> --event phase_enter --phase design --status in_progress
|
|
358
|
+
```
|
|
258
359
|
|
|
259
|
-
|
|
360
|
+
Invoke brainstorming skill for Phase 1B.
|
|
361
|
+
Interactive -- pauses for user approval of design concept.
|
|
362
|
+
After user approval: design-review loop (`--mode design-review`,
|
|
363
|
+
canonical design-review dimensions: spec coverage, design-spec consistency,
|
|
364
|
+
accessibility, visual consistency, exported-code fidelity).
|
|
365
|
+
Pass count: quick=3, standard=5, deep=7. No extension.
|
|
366
|
+
Skip condition: intent=bugfix/docs.
|
|
260
367
|
|
|
261
|
-
|
|
368
|
+
```bash
|
|
369
|
+
wazir capture event --run <run-id> --event phase_exit --phase design --status completed
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
### 4f: Plan (planner role via wz:writing-plans)
|
|
373
|
+
|
|
374
|
+
```bash
|
|
375
|
+
wazir capture event --run <run-id> --event phase_enter --phase plan --status in_progress
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
Delegate to `wz:writing-plans`.
|
|
379
|
+
Planner produces execution plan and task specs.
|
|
380
|
+
Review: plan-review loop (`--mode plan-review`, plan dimensions).
|
|
381
|
+
Pass count: quick=3, standard=5, deep=7. No extension.
|
|
382
|
+
Checkpoint: user approves plan.
|
|
262
383
|
|
|
263
|
-
|
|
384
|
+
```bash
|
|
385
|
+
wazir capture event --run <run-id> --event phase_exit --phase plan --status completed
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
### 4g: Execute (executor role)
|
|
389
|
+
|
|
390
|
+
```bash
|
|
391
|
+
wazir capture event --run <run-id> --event phase_enter --phase execute --status in_progress
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
**Pre-execution gate** — run before the first task:
|
|
395
|
+
|
|
396
|
+
```bash
|
|
397
|
+
wazir validate manifest && wazir validate hooks
|
|
398
|
+
# If either fails, stop and report the failure. Do NOT proceed to task execution.
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
Invoke executor skill for Phase 2.
|
|
402
|
+
Per-task review: task-review loop (`--mode task-review --task-id <NNN>`,
|
|
403
|
+
5 task-execution dimensions) before each commit.
|
|
404
|
+
Review logs: `execute-task-<NNN>-review-pass-<N>.md`
|
|
405
|
+
Cap tracking: `wazir capture loop-check --task-id <NNN>`
|
|
406
|
+
Codex error handling: non-zero exit -> codex-unavailable, self-review only.
|
|
407
|
+
NOTE: per-task review is NOT the final review.
|
|
264
408
|
|
|
265
409
|
If `team_mode: parallel` in run-config, the executor spawns Agent Teams for independent tasks. Otherwise, tasks run sequentially.
|
|
266
410
|
|
|
267
|
-
|
|
411
|
+
```bash
|
|
412
|
+
wazir capture event --run <run-id> --event phase_exit --phase execute --status completed
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
### 4h: Verify (verifier role)
|
|
268
416
|
|
|
269
|
-
|
|
270
|
-
>
|
|
271
|
-
|
|
272
|
-
|
|
417
|
+
```bash
|
|
418
|
+
wazir capture event --run <run-id> --event phase_enter --phase verify --status in_progress
|
|
419
|
+
```
|
|
420
|
+
|
|
421
|
+
Deterministic verification of execution claims.
|
|
422
|
+
Not a review loop -- produces proof, not findings.
|
|
273
423
|
|
|
274
|
-
|
|
424
|
+
```bash
|
|
425
|
+
wazir capture event --run <run-id> --event phase_exit --phase verify --status completed
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
### 4i: Final Review (reviewer role in final mode)
|
|
429
|
+
|
|
430
|
+
```bash
|
|
431
|
+
wazir capture event --run <run-id> --event phase_enter --phase review --status in_progress
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
Invoke reviewer skill with `--mode final`.
|
|
435
|
+
7-dimension scored review (correctness, completeness, wiring, verification,
|
|
436
|
+
drift, quality, documentation). Score 0-70.
|
|
437
|
+
This IS the scored final review gate.
|
|
438
|
+
|
|
439
|
+
```bash
|
|
440
|
+
wazir capture event --run <run-id> --event phase_exit --phase review --status completed
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
### 4j: Learn (learner role) [ADAPTIVE]
|
|
444
|
+
|
|
445
|
+
Enabled when `phase_policy.learn.enabled = true` (default: false).
|
|
446
|
+
Extract durable learnings from the completed run.
|
|
447
|
+
No review loop. Learnings require explicit scope tags.
|
|
448
|
+
Skip condition: disabled by default. Enable for retrospective runs.
|
|
449
|
+
|
|
450
|
+
### 4k: Prepare Next (planner role) [ADAPTIVE]
|
|
275
451
|
|
|
276
|
-
|
|
452
|
+
Enabled when `phase_policy.prepare_next.enabled = true` (default: false).
|
|
453
|
+
Prepare context and handoff for the next run.
|
|
454
|
+
No review loop. No implicit carry-forward of unapproved learnings.
|
|
455
|
+
Skip condition: disabled by default.
|
|
277
456
|
|
|
278
|
-
|
|
457
|
+
`run_audit` is NOT part of the pipeline flow -- it is an on-demand standalone phase invoked separately.
|
|
279
458
|
|
|
280
|
-
|
|
459
|
+
### Resume Detection
|
|
460
|
+
|
|
461
|
+
If the run has partial progress, detect the latest completed phase and resume:
|
|
462
|
+
|
|
463
|
+
- If clarification exists but no spec: resume at 4d (specify)
|
|
464
|
+
- If spec exists but no design: resume at 4e (brainstorm)
|
|
465
|
+
- If design exists but no plan: resume at 4f (plan)
|
|
466
|
+
- If plan exists but no task artifacts: resume at 4g (execute)
|
|
467
|
+
- If task artifacts exist but no verification: resume at 4h (verify)
|
|
468
|
+
- If verification exists: resume at 4i (final review)
|
|
469
|
+
|
|
470
|
+
Present resume options:
|
|
471
|
+
|
|
472
|
+
> **Previous progress detected (completed through [phase]).**
|
|
473
|
+
>
|
|
474
|
+
> **What would you like to do?**
|
|
475
|
+
> 1. **Resume from [next phase]** (Recommended)
|
|
476
|
+
> 2. **Start fresh** — Re-run all phases from scratch
|
|
477
|
+
|
|
478
|
+
## Step 5: Present Results
|
|
281
479
|
|
|
282
480
|
After the reviewer completes, present the verdict and offer next steps with numbered options:
|
|
283
481
|
|
|
@@ -322,6 +520,15 @@ After the reviewer completes, present the verdict and offer next steps with numb
|
|
|
322
520
|
>
|
|
323
521
|
> Something fundamental went wrong. Review the findings above and decide how to proceed.
|
|
324
522
|
|
|
523
|
+
### Run Summary
|
|
524
|
+
|
|
525
|
+
After presenting results (regardless of verdict), capture the run summary:
|
|
526
|
+
|
|
527
|
+
```bash
|
|
528
|
+
wazir capture summary --run <run-id>
|
|
529
|
+
wazir status --run <run-id> --json
|
|
530
|
+
```
|
|
531
|
+
|
|
325
532
|
## Error Handling
|
|
326
533
|
|
|
327
534
|
If any phase fails or the user cancels:
|
|
@@ -364,7 +571,7 @@ After the audit completes:
|
|
|
364
571
|
> 2. **Generate a fix plan** — turn findings into implementation tasks
|
|
365
572
|
> 3. **Run the pipeline on the fix plan** — generate plan, then execute and review fixes
|
|
366
573
|
|
|
367
|
-
If the user picks option 3, save the findings as the briefing and run the normal pipeline (Steps 3-
|
|
574
|
+
If the user picks option 3, save the findings as the briefing and run the normal pipeline (Steps 3-5) with intent = `bugfix`.
|
|
368
575
|
|
|
369
576
|
---
|
|
370
577
|
|
|
@@ -11,9 +11,12 @@ Inputs:
|
|
|
11
11
|
- current repo state
|
|
12
12
|
- relevant research findings
|
|
13
13
|
|
|
14
|
-
Output:
|
|
14
|
+
Output path:
|
|
15
15
|
|
|
16
|
-
-
|
|
16
|
+
- **Inside a pipeline run** (`.wazir/runs/latest/` exists): write to `.wazir/runs/latest/clarified/execution-plan.md` and task specs to `.wazir/runs/latest/tasks/task-NNN/spec.md`
|
|
17
|
+
- **Standalone** (no active run): write to `docs/plans/YYYY-MM-DD-<topic>-implementation.md`
|
|
18
|
+
|
|
19
|
+
To detect: check if `.wazir/runs/latest/clarified/` exists. If yes, use run paths.
|
|
17
20
|
|
|
18
21
|
The plan must include:
|
|
19
22
|
|
|
@@ -25,6 +28,30 @@ The plan must include:
|
|
|
25
28
|
|
|
26
29
|
Rules:
|
|
27
30
|
|
|
28
|
-
- do not write
|
|
29
|
-
- do not rely on retired `run-*` workflow wrappers
|
|
31
|
+
- do not write implementation code during planning
|
|
30
32
|
- make the plan detailed enough that another weak model can execute it without inventing missing steps
|
|
33
|
+
- each task spec must have testable acceptance criteria, not vague descriptions
|
|
34
|
+
|
|
35
|
+
## Plan Review Loop
|
|
36
|
+
|
|
37
|
+
After writing the plan, the reviewer role runs the plan-review loop with `--mode plan-review` using plan dimensions (see `workflows/plan-review.md` and `docs/reference/review-loop-pattern.md`).
|
|
38
|
+
|
|
39
|
+
The planner resolves findings from each pass. The loop runs for `pass_counts[depth]` passes (quick=3, standard=5, deep=7). No extension.
|
|
40
|
+
|
|
41
|
+
For non-code artifacts (the plan itself), Codex review uses stdin pipe:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
CODEX_MODEL=$(jq -r '.multi_tool.codex.model // empty' .wazir/state/config.json 2>/dev/null)
|
|
45
|
+
CODEX_MODEL=${CODEX_MODEL:-gpt-5.4}
|
|
46
|
+
cat <plan-path> | codex exec -c model="$CODEX_MODEL" "Review this implementation plan focusing on [dimension]..."
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
`codex review -c model="$CODEX_MODEL"` is used only for code artifacts, not plans.
|
|
50
|
+
|
|
51
|
+
Codex error handling: if `codex` exits non-zero, log the error, mark the pass as `codex-unavailable`, and use self-review findings only. Never treat a Codex failure as a clean pass.
|
|
52
|
+
|
|
53
|
+
Loop depth follows the project's depth config (quick/standard/deep).
|
|
54
|
+
|
|
55
|
+
Standalone mode: if no `.wazir/runs/latest/` exists, artifacts go to `docs/plans/` and review logs go alongside (`docs/plans/YYYY-MM-DD-<topic>-review-pass-N.md`). Loop cap guard is not invoked in standalone mode.
|
|
56
|
+
|
|
57
|
+
After the loop completes, present findings summary and wait for user approval before completing.
|
|
@@ -16,7 +16,9 @@ import {
|
|
|
16
16
|
writeStatus,
|
|
17
17
|
writeSummary,
|
|
18
18
|
} from './store.js';
|
|
19
|
+
import { readRunConfig, getPhaseLoopCap } from './run-config.js';
|
|
19
20
|
import { readUsage, generateReport, initUsage, recordCaptureSavings, recordPhaseUsage } from './usage.js';
|
|
21
|
+
import { evaluateLoopCapGuard } from '../guards/loop-cap-guard.js';
|
|
20
22
|
|
|
21
23
|
function formatResult(payload, options = {}) {
|
|
22
24
|
if (options.json) {
|
|
@@ -68,6 +70,7 @@ function resolveCaptureContext(parsed, context = {}) {
|
|
|
68
70
|
'capture-path',
|
|
69
71
|
'command',
|
|
70
72
|
'exit-code',
|
|
73
|
+
'task-id',
|
|
71
74
|
],
|
|
72
75
|
});
|
|
73
76
|
const stateRoot = resolveStateRoot(projectRoot, manifest, {
|
|
@@ -346,6 +349,87 @@ function handleUsage(parsed, context = {}) {
|
|
|
346
349
|
};
|
|
347
350
|
}
|
|
348
351
|
|
|
352
|
+
function handleLoopCheck(parsed, context = {}) {
|
|
353
|
+
const { stateRoot, options } = resolveCaptureContext(parsed, context);
|
|
354
|
+
|
|
355
|
+
requireOption(options, 'run', 'Usage: wazir capture loop-check --run <id> --phase <phase> --loop-count <n> [--task-id <id>] [--state-root <path>] [--json]');
|
|
356
|
+
requireOption(options, 'phase', 'Usage: wazir capture loop-check --run <id> --phase <phase> --loop-count <n> [--task-id <id>] [--state-root <path>] [--json]');
|
|
357
|
+
requireOption(options, 'loopCount', 'Usage: wazir capture loop-check --run <id> --phase <phase> --loop-count <n> [--task-id <id>] [--state-root <path>] [--json]');
|
|
358
|
+
|
|
359
|
+
const runPaths = getRunPaths(stateRoot, options.run);
|
|
360
|
+
|
|
361
|
+
// Standalone mode: if status.json doesn't exist, allow (exit 0)
|
|
362
|
+
if (!fs.existsSync(runPaths.statusPath)) {
|
|
363
|
+
const notice = 'loop-check: standalone mode (no status.json), allowing.\n';
|
|
364
|
+
return {
|
|
365
|
+
exitCode: 0,
|
|
366
|
+
stdout: options.json ? `${JSON.stringify({ allowed: true, reason: 'standalone mode' }, null, 2)}\n` : '',
|
|
367
|
+
stderr: options.json ? '' : notice,
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Record the event and update loop count in status.json
|
|
372
|
+
const status = readStatus(runPaths);
|
|
373
|
+
const loopCount = parsePositiveInteger(options.loopCount, '--loop-count');
|
|
374
|
+
const loopPhase = options.phase;
|
|
375
|
+
const loopKey = options.taskId ? `${loopPhase}:${options.taskId}` : loopPhase;
|
|
376
|
+
|
|
377
|
+
status.phase_loop_counts = {
|
|
378
|
+
...(status.phase_loop_counts ?? {}),
|
|
379
|
+
[loopKey]: loopCount,
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
const event = createBaseEvent('loop_iteration', {
|
|
383
|
+
run_id: options.run,
|
|
384
|
+
phase: loopPhase,
|
|
385
|
+
status: status.status,
|
|
386
|
+
loop_count: loopCount,
|
|
387
|
+
loop_key: loopKey,
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
if (options.taskId) {
|
|
391
|
+
event.task_id = options.taskId;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
status.updated_at = event.created_at;
|
|
395
|
+
status.last_event = 'loop_iteration';
|
|
396
|
+
|
|
397
|
+
appendEvent(runPaths, event);
|
|
398
|
+
writeStatus(runPaths, status);
|
|
399
|
+
|
|
400
|
+
// Read run-config for loop_cap
|
|
401
|
+
const runConfig = readRunConfig(runPaths);
|
|
402
|
+
const loopCap = getPhaseLoopCap(runConfig, loopPhase);
|
|
403
|
+
|
|
404
|
+
// Evaluate the guard using loopKey (task-scoped or phase-scoped).
|
|
405
|
+
// Cap is per-phase but counts are per-task — each task gets its own
|
|
406
|
+
// budget up to the phase cap. This is intentional: task-scoped tracking
|
|
407
|
+
// prevents parallel tasks from sharing a single counter.
|
|
408
|
+
const guardResult = evaluateLoopCapGuard({
|
|
409
|
+
run_id: options.run,
|
|
410
|
+
phase: loopKey,
|
|
411
|
+
state_root: stateRoot,
|
|
412
|
+
loop_cap: loopCap,
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
if (!guardResult.allowed) {
|
|
416
|
+
return {
|
|
417
|
+
exitCode: 43,
|
|
418
|
+
stderr: `${guardResult.reason}\n`,
|
|
419
|
+
stdout: options.json ? `${JSON.stringify(guardResult, null, 2)}\n` : '',
|
|
420
|
+
};
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
return formatResult({
|
|
424
|
+
run_id: options.run,
|
|
425
|
+
phase: loopPhase,
|
|
426
|
+
loop_key: loopKey,
|
|
427
|
+
loop_count: loopCount,
|
|
428
|
+
loop_cap: loopCap,
|
|
429
|
+
allowed: true,
|
|
430
|
+
}, { json: options.json });
|
|
431
|
+
}
|
|
432
|
+
|
|
349
433
|
export function runCaptureCommand(parsed, context = {}) {
|
|
350
434
|
try {
|
|
351
435
|
switch (parsed.subcommand) {
|
|
@@ -361,10 +445,12 @@ export function runCaptureCommand(parsed, context = {}) {
|
|
|
361
445
|
return handleSummary(parsed, context);
|
|
362
446
|
case 'usage':
|
|
363
447
|
return handleUsage(parsed, context);
|
|
448
|
+
case 'loop-check':
|
|
449
|
+
return handleLoopCheck(parsed, context);
|
|
364
450
|
default:
|
|
365
451
|
return {
|
|
366
452
|
exitCode: 1,
|
|
367
|
-
stderr: 'Usage: wazir capture <init|event|route|output|summary|usage> ...\n',
|
|
453
|
+
stderr: 'Usage: wazir capture <init|event|route|output|summary|usage|loop-check> ...\n',
|
|
368
454
|
};
|
|
369
455
|
}
|
|
370
456
|
} catch (error) {
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { readYamlFile } from '../loaders.js';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_PHASE_POLICY = {
|
|
6
|
+
loop_cap: 10,
|
|
7
|
+
enabled: true,
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export function readRunConfig(runPaths) {
|
|
11
|
+
const configPath = path.join(runPaths.runRoot, 'run-config.yaml');
|
|
12
|
+
if (!fs.existsSync(configPath)) {
|
|
13
|
+
return { phase_policy: {} };
|
|
14
|
+
}
|
|
15
|
+
return readYamlFile(configPath);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function getPhaseLoopCap(runConfig, phase) {
|
|
19
|
+
const policy = runConfig?.phase_policy?.[phase] ?? DEFAULT_PHASE_POLICY;
|
|
20
|
+
return policy.loop_cap ?? DEFAULT_PHASE_POLICY.loop_cap;
|
|
21
|
+
}
|
|
@@ -7,17 +7,14 @@ const EXCLUDED_DOC_FILES = new Set([
|
|
|
7
7
|
]);
|
|
8
8
|
|
|
9
9
|
const BRAND_PATTERNS = [
|
|
10
|
-
{ label: 'Agent OS', regex: /\bAgent OS\b/g },
|
|
11
|
-
{ label: 'agent-os', regex: /\bagent-os\b/g },
|
|
12
|
-
{ label: 'Symphony', regex: /\bSymphony\b/g },
|
|
13
10
|
{ label: 'Wazir OS', regex: /\bWazir OS\b/g },
|
|
14
11
|
];
|
|
15
12
|
|
|
16
13
|
function normalizeAllowedLegacyReferences(content) {
|
|
17
14
|
return content
|
|
18
|
-
.replace(/archive\/legacy-
|
|
19
|
-
.replace(/archive\/v5\.1-
|
|
20
|
-
.replace(/migration\/v5\.1-
|
|
15
|
+
.replace(/archive\/legacy-wazir\/[^\s)`]*/g, 'archive/<legacy>')
|
|
16
|
+
.replace(/archive\/v5\.1-wazir-daemon\/[^\s)`]*/g, 'archive/<legacy>')
|
|
17
|
+
.replace(/migration\/v5\.1-wazir-rename\.md/g, 'migration/<legacy>');
|
|
21
18
|
}
|
|
22
19
|
|
|
23
20
|
function walkMarkdownFiles(dirPath, files = []) {
|
|
@@ -5,7 +5,7 @@ import { readJsonFile, readYamlFile } from '../loaders.js';
|
|
|
5
5
|
import { validateAgainstSchema } from '../schema-validator.js';
|
|
6
6
|
import { SUPPORTED_COMMAND_SUBJECTS } from './command-registry.js';
|
|
7
7
|
|
|
8
|
-
const EXCLUDED_DOC_DIRS = new Set(['
|
|
8
|
+
const EXCLUDED_DOC_DIRS = new Set(['plans', 'research', 'audit']);
|
|
9
9
|
|
|
10
10
|
function walkMarkdownFiles(dirPath, files = []) {
|
|
11
11
|
for (const entry of fs.readdirSync(dirPath, { withFileTypes: true })) {
|
|
@@ -9,13 +9,9 @@ const EXCLUDED_DOC_FILES = new Set([
|
|
|
9
9
|
]);
|
|
10
10
|
|
|
11
11
|
const FORBIDDEN_TEXT_PATTERNS = [
|
|
12
|
-
{ label: '.agent-os path', regex: /\.agent-os\//g },
|
|
13
12
|
{ label: 'tasks/input path', regex: /\btasks\/input\//g },
|
|
14
13
|
{ label: 'tasks/clarified path', regex: /\btasks\/clarified\//g },
|
|
15
14
|
{ label: 'legacy run wrapper', regex: /\/run-(clarifier|orchestrator|opus-reviewer)\b/g },
|
|
16
|
-
{ label: 'legacy daemon binary', regex: /\bagent-os-(daemon|run|review|orchestrate)\b/g },
|
|
17
|
-
{ label: 'legacy npx invocation', regex: /\bnpx agent-os-[a-z-]+\b/g },
|
|
18
|
-
{ label: 'daemon workflow config', regex: /daemon\/WORKFLOW\.md/g },
|
|
19
15
|
];
|
|
20
16
|
|
|
21
17
|
const FORBIDDEN_DEPENDENCIES = new Set(['express', 'fastify', 'koa', 'socket.io']);
|
|
@@ -93,9 +89,9 @@ function collectRuntimeSurfaceFiles(projectRoot) {
|
|
|
93
89
|
|
|
94
90
|
function normalizeAllowedLegacyReferences(content) {
|
|
95
91
|
return content
|
|
96
|
-
.replace(/archive\/legacy-
|
|
97
|
-
.replace(/archive\/v5\.1-
|
|
98
|
-
.replace(/migration\/v5\.1-
|
|
92
|
+
.replace(/archive\/legacy-wazir\/[^\s)`]*/g, 'archive/<legacy>')
|
|
93
|
+
.replace(/archive\/v5\.1-wazir-daemon\/[^\s)`]*/g, 'archive/<legacy>')
|
|
94
|
+
.replace(/migration\/v5\.1-wazir-rename\.md/g, 'migration/<legacy>');
|
|
99
95
|
}
|
|
100
96
|
|
|
101
97
|
function assertGlobalPatternConfiguration() {
|