@wazir-dev/cli 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -44
- package/README.md +13 -13
- package/assets/demo.cast +47 -0
- package/assets/demo.gif +0 -0
- package/docs/anti-patterns/AP-23-skipping-enabled-workflows.md +28 -0
- package/docs/anti-patterns/AP-24-clarifier-deciding-scope.md +34 -0
- package/docs/concepts/architecture.md +1 -1
- package/docs/concepts/why-wazir.md +1 -1
- package/docs/readmes/INDEX.md +1 -1
- package/docs/readmes/features/expertise/README.md +1 -1
- package/docs/readmes/features/hooks/pre-compact-summary.md +1 -1
- package/docs/reference/hooks.md +1 -0
- package/docs/reference/launch-checklist.md +3 -3
- package/docs/reference/review-loop-pattern.md +3 -2
- package/docs/reference/skill-tiers.md +2 -2
- package/expertise/antipatterns/process/ai-coding-antipatterns.md +117 -0
- package/exports/hosts/claude/.claude/commands/plan-review.md +3 -1
- package/exports/hosts/claude/.claude/commands/verify.md +30 -1
- package/exports/hosts/claude/export.manifest.json +2 -2
- package/exports/hosts/codex/export.manifest.json +2 -2
- package/exports/hosts/cursor/export.manifest.json +2 -2
- package/exports/hosts/gemini/export.manifest.json +2 -2
- package/llms-full.txt +48 -18
- package/package.json +2 -3
- package/schemas/phase-report.schema.json +9 -0
- package/skills/brainstorming/SKILL.md +14 -2
- package/skills/clarifier/SKILL.md +189 -35
- package/skills/executor/SKILL.md +67 -0
- package/skills/init-pipeline/SKILL.md +0 -1
- package/skills/reviewer/SKILL.md +86 -13
- package/skills/self-audit/SKILL.md +20 -0
- package/skills/skill-research/SKILL.md +188 -0
- package/skills/verification/SKILL.md +41 -3
- package/skills/wazir/SKILL.md +304 -38
- package/tooling/src/capture/command.js +17 -1
- package/tooling/src/capture/store.js +32 -0
- package/tooling/src/capture/user-input.js +66 -0
- package/tooling/src/checks/security-sensitivity.js +69 -0
- package/tooling/src/cli.js +28 -26
- package/tooling/src/guards/phase-prerequisite-guard.js +58 -0
- package/tooling/src/init/auto-detect.js +0 -2
- package/tooling/src/init/command.js +3 -95
- package/tooling/src/status/command.js +6 -1
- package/tooling/src/verify/proof-collector.js +299 -0
- package/workflows/plan-review.md +3 -1
- package/workflows/verify.md +30 -1
package/skills/wazir/SKILL.md
CHANGED
|
@@ -9,6 +9,16 @@ The user typed `/wazir <their request>`. Run the entire pipeline end-to-end, han
|
|
|
9
9
|
|
|
10
10
|
All questions use **numbered interactive options** — one question at a time, defaults marked "(Recommended)", wait for user response before proceeding.
|
|
11
11
|
|
|
12
|
+
## User Input Capture
|
|
13
|
+
|
|
14
|
+
After every user response (approval, correction, rejection, redirect, instruction), capture it:
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
captureUserInput(runDir, { phase: '<current-phase>', type: '<instruction|approval|correction|rejection|redirect>', content: '<user message>', context: '<what prompted the question>' })
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
This uses `tooling/src/capture/user-input.js`. The log at `user-input-log.ndjson` feeds the learning system — user corrections are the strongest signal for improvement. At run end, prune logs older than 10 runs via `pruneOldInputLogs(stateRoot, 10)`.
|
|
21
|
+
|
|
12
22
|
## Command Routing
|
|
13
23
|
Follow the Canonical Command Matrix in `hooks/routing-matrix.json`.
|
|
14
24
|
- Large commands (test runners, builds, diffs, dependency trees, linting) → context-mode tools
|
|
@@ -82,6 +92,9 @@ Parse the request for inline modifiers before the main text:
|
|
|
82
92
|
|
|
83
93
|
Recognized modifiers:
|
|
84
94
|
- **Depth:** `quick`, `deep` (standard is default when omitted)
|
|
95
|
+
- **Interaction mode:** `auto`, `interactive` (guided is default when omitted)
|
|
96
|
+
- `/wazir auto fix the auth bug` → interaction_mode = auto
|
|
97
|
+
- `/wazir interactive design the onboarding` → interaction_mode = interactive
|
|
85
98
|
- **Intent:** `bugfix`, `feature`, `refactor`, `docs`, `spike`
|
|
86
99
|
|
|
87
100
|
## Step 2: Check Prerequisites
|
|
@@ -93,11 +106,14 @@ Run `which wazir` to check if the CLI is installed.
|
|
|
93
106
|
**If not installed**, present:
|
|
94
107
|
|
|
95
108
|
> **The Wazir CLI is not installed. It's required for event capture, validation, and indexing.**
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
109
|
+
|
|
110
|
+
Ask the user via AskUserQuestion:
|
|
111
|
+
- **Question:** "The Wazir CLI is not installed. How would you like to install it?"
|
|
112
|
+
- **Options:**
|
|
113
|
+
1. "npm install -g @wazir-dev/cli" *(Recommended)*
|
|
114
|
+
2. "npm link from the Wazir project root"
|
|
115
|
+
|
|
116
|
+
Wait for the user's selection before continuing.
|
|
101
117
|
|
|
102
118
|
The CLI is **required** — the pipeline uses `wazir capture`, `wazir validate`, `wazir index`, and `wazir doctor` throughout execution.
|
|
103
119
|
|
|
@@ -109,9 +125,14 @@ Run `wazir validate branches` to check the current git branch.
|
|
|
109
125
|
|
|
110
126
|
- If on `main` or `develop`:
|
|
111
127
|
> You're on **[branch]**. The pipeline requires a feature branch.
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
128
|
+
|
|
129
|
+
Ask the user via AskUserQuestion:
|
|
130
|
+
- **Question:** "You're on a protected branch. Create a feature branch?"
|
|
131
|
+
- **Options:**
|
|
132
|
+
1. "Create feat/<slug> from current branch" *(Recommended)*
|
|
133
|
+
2. "Continue on current branch — not recommended"
|
|
134
|
+
|
|
135
|
+
Wait for the user's selection before continuing.
|
|
115
136
|
|
|
116
137
|
### Index Check
|
|
117
138
|
|
|
@@ -154,9 +175,14 @@ Check if a previous incomplete run exists (via `latest` symlink pointing to a ru
|
|
|
154
175
|
**If previous incomplete run found**, present:
|
|
155
176
|
|
|
156
177
|
> **A previous incomplete run was detected:** `<previous-run-id>`
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
178
|
+
|
|
179
|
+
Ask the user via AskUserQuestion:
|
|
180
|
+
- **Question:** "A previous incomplete run was detected. Resume or start fresh?"
|
|
181
|
+
- **Options:**
|
|
182
|
+
1. "Resume from the last completed phase" *(Recommended)*
|
|
183
|
+
2. "Start fresh with a new empty run"
|
|
184
|
+
|
|
185
|
+
Wait for the user's selection before continuing.
|
|
160
186
|
|
|
161
187
|
**If Resume:**
|
|
162
188
|
- Copy `clarified/` from previous run into new run, EXCEPT `user-feedback.md`.
|
|
@@ -196,8 +222,7 @@ parsed_intent: feature
|
|
|
196
222
|
entry_point: "/wazir"
|
|
197
223
|
|
|
198
224
|
depth: standard
|
|
199
|
-
|
|
200
|
-
parallel_backend: none
|
|
225
|
+
interaction_mode: guided # auto | guided | interactive
|
|
201
226
|
|
|
202
227
|
# Workflow policy — individual workflows within each phase
|
|
203
228
|
workflow_policy:
|
|
@@ -247,18 +272,124 @@ After building run config:
|
|
|
247
272
|
> **Running: standard depth, feature, sequential. Proceeding...**
|
|
248
273
|
|
|
249
274
|
- **Low confidence** — show plan and ask:
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
275
|
+
|
|
276
|
+
Ask the user via AskUserQuestion:
|
|
277
|
+
- **Question:** "Does this run configuration look right?"
|
|
278
|
+
- **Options:**
|
|
279
|
+
1. "Yes, proceed" *(Recommended)*
|
|
280
|
+
2. "No, let me adjust"
|
|
281
|
+
|
|
282
|
+
Wait for the user's selection before continuing.
|
|
253
283
|
|
|
254
284
|
```bash
|
|
255
285
|
wazir capture event --run <run-id> --event phase_exit --phase init --status completed
|
|
256
286
|
```
|
|
257
287
|
|
|
288
|
+
Run the phase report and display it to the user:
|
|
289
|
+
```bash
|
|
290
|
+
wazir report phase --run <run-id> --phase init
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
Output the report content to the user in the conversation.
|
|
294
|
+
|
|
295
|
+
---
|
|
296
|
+
|
|
297
|
+
# Interaction Modes
|
|
298
|
+
|
|
299
|
+
The `interaction_mode` field in run-config controls how the pipeline interacts with the user:
|
|
300
|
+
|
|
301
|
+
| Mode | Inline modifier | Behavior | Best for |
|
|
302
|
+
|------|----------------|----------|----------|
|
|
303
|
+
| **`guided`** | (default) | Pipeline runs, pauses at phase checkpoints for user approval. Current default behavior. | Most work |
|
|
304
|
+
| **`auto`** | `/wazir auto ...` | No human checkpoints. Codex reviews all. Gating agent decides continue/loop_back/escalate. Stops ONLY on escalate. | Overnight, clear spec, well-understood domain |
|
|
305
|
+
| **`interactive`** | `/wazir interactive ...` | More questions, more discussion, co-designs with user. Researcher presents options. Executor checks approach before coding. | Ambiguous requirements, new domain, learning |
|
|
306
|
+
|
|
307
|
+
## `auto` mode constraints
|
|
308
|
+
|
|
309
|
+
- **Codex REQUIRED** — refuse to start auto mode if `multi_tool.codex` is not configured in `.wazir/state/config.json`. Error: "Auto mode requires an external reviewer (Codex). Configure it first or use guided mode."
|
|
310
|
+
- **On escalate:** STOP immediately, write the escalation reason to `.wazir/runs/<id>/escalations/`, and wait for user input
|
|
311
|
+
- **Wall-clock limit:** default 4 hours. If exceeded, stop with escalation.
|
|
312
|
+
- **Never auto-commits to main** — always work on feature branch
|
|
313
|
+
- All checkpoints (AskUserQuestion) are skipped — gating agent evaluates phase reports and decides
|
|
314
|
+
|
|
315
|
+
## `guided` mode (default)
|
|
316
|
+
|
|
317
|
+
Current behavior — no changes needed. Checkpoints at phase boundaries, user approves before advancing.
|
|
318
|
+
|
|
319
|
+
## `interactive` mode
|
|
320
|
+
|
|
321
|
+
- **Clarifier:** asks more detailed questions, presents research findings with options: "I found 3 approaches — which interests you?"
|
|
322
|
+
- **Executor:** checks approach before coding: "I'm about to implement auth with Supabase — sound right?"
|
|
323
|
+
- **Reviewer:** discusses findings with user, not just presents verdict: "I found a potential auth bypass — here's why I think it's high severity, do you agree?"
|
|
324
|
+
- Slower but highest quality for complex/ambiguous work
|
|
325
|
+
|
|
326
|
+
## Mode checking in phase skills
|
|
327
|
+
|
|
328
|
+
All phase skills check `interaction_mode` from run-config at every checkpoint:
|
|
329
|
+
|
|
330
|
+
```
|
|
331
|
+
# Read from run-config
|
|
332
|
+
interaction_mode = run_config.interaction_mode ?? 'guided'
|
|
333
|
+
|
|
334
|
+
# At each checkpoint:
|
|
335
|
+
if interaction_mode == 'auto':
|
|
336
|
+
# Skip checkpoint, let gating agent decide
|
|
337
|
+
elif interaction_mode == 'interactive':
|
|
338
|
+
# More detailed question, present options, discuss
|
|
339
|
+
else:
|
|
340
|
+
# guided — standard checkpoint with AskUserQuestion
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
---
|
|
344
|
+
|
|
345
|
+
# Two-Level Phase Model
|
|
346
|
+
|
|
347
|
+
The pipeline has 4 top-level **phases**, each containing multiple **workflows** with review loops:
|
|
348
|
+
|
|
349
|
+
```
|
|
350
|
+
Phase 1: Init
|
|
351
|
+
└── (inline — no sub-workflows)
|
|
352
|
+
|
|
353
|
+
Phase 2: Clarifier
|
|
354
|
+
├── discover (research) ← research-review loop
|
|
355
|
+
├── clarify ← clarification-review loop
|
|
356
|
+
├── specify ← spec-challenge loop
|
|
357
|
+
├── author (adaptive) ← approval gate
|
|
358
|
+
├── design ← design-review loop
|
|
359
|
+
└── plan ← plan-review loop
|
|
360
|
+
|
|
361
|
+
Phase 3: Executor
|
|
362
|
+
├── execute (per-task) ← task-review loop per task
|
|
363
|
+
└── verify
|
|
364
|
+
|
|
365
|
+
Phase 4: Final Review
|
|
366
|
+
├── review (final) ← scored review
|
|
367
|
+
├── learn
|
|
368
|
+
└── prepare_next
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
**Event capture uses both levels.** When emitting phase events, include `--parent-phase`:
|
|
372
|
+
```bash
|
|
373
|
+
wazir capture event --run <id> --event phase_enter --phase discover --parent-phase clarifier --status in_progress
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
**Progress markers between workflows:** After each workflow completes, output:
|
|
377
|
+
> Phase 2: Clarifier > Workflow: specify (3 of 6 workflows complete)
|
|
378
|
+
|
|
379
|
+
**`wazir status` shows both levels:** "Phase 2: Clarifier > Workflow: specify"
|
|
380
|
+
|
|
258
381
|
---
|
|
259
382
|
|
|
260
383
|
# Phase 2: Clarifier
|
|
261
384
|
|
|
385
|
+
**Before starting this phase, output to the user:**
|
|
386
|
+
|
|
387
|
+
> **Clarifier Phase** — About to research your codebase, clarify requirements, harden the spec, brainstorm designs, and produce an execution plan.
|
|
388
|
+
>
|
|
389
|
+
> **Why this matters:** Without this, I'd guess your tech stack, misunderstand constraints, miss edge cases in the spec, and build the wrong architecture. Every ambiguity left unresolved here becomes a bug or rework cycle later.
|
|
390
|
+
>
|
|
391
|
+
> **Looking for:** Unstated assumptions, scope boundaries, conflicting requirements, missing acceptance criteria
|
|
392
|
+
|
|
262
393
|
```bash
|
|
263
394
|
wazir capture event --run <run-id> --event phase_enter --phase clarifier --status in_progress
|
|
264
395
|
```
|
|
@@ -280,14 +411,43 @@ Each sub-workflow has its own review loop. User checkpoints between major steps.
|
|
|
280
411
|
|
|
281
412
|
Output: approved spec + design + execution plan in `.wazir/runs/latest/clarified/`.
|
|
282
413
|
|
|
414
|
+
**After completing this phase, output to the user:**
|
|
415
|
+
|
|
416
|
+
> **Clarifier Phase complete.**
|
|
417
|
+
>
|
|
418
|
+
> **Found:** [N] ambiguities resolved, [N] assumptions made explicit, [N] scope boundaries drawn, [N] acceptance criteria hardened
|
|
419
|
+
>
|
|
420
|
+
> **Without this phase:** Requirements would be interpreted differently across tasks, acceptance criteria would be vague and untestable, the design would be ad-hoc, and the plan would miss dependency ordering
|
|
421
|
+
>
|
|
422
|
+
> **Changed because of this work:** [List spec tightening changes, resolved questions, design decisions, scope adjustments]
|
|
423
|
+
|
|
283
424
|
```bash
|
|
284
425
|
wazir capture event --run <run-id> --event phase_exit --phase clarifier --status completed
|
|
285
426
|
```
|
|
286
427
|
|
|
428
|
+
Run the phase report and display savings to the user:
|
|
429
|
+
```bash
|
|
430
|
+
wazir report phase --run <run-id> --phase clarifier
|
|
431
|
+
wazir stats --run <run-id>
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
**Show savings in conversation output:**
|
|
435
|
+
> **Context savings this phase:** Used wazir index for [N] queries and context-mode for [M] commands, saving ~[X] tokens ([Y]% reduction). Without these, this phase would have consumed [A] tokens instead of [B].
|
|
436
|
+
|
|
437
|
+
Output the report content to the user in the conversation.
|
|
438
|
+
|
|
287
439
|
---
|
|
288
440
|
|
|
289
441
|
# Phase 3: Executor
|
|
290
442
|
|
|
443
|
+
**Before starting this phase, output to the user:**
|
|
444
|
+
|
|
445
|
+
> **Executor Phase** — About to implement [N] tasks in dependency order with TDD (test-first), per-task code review, and verification before each commit.
|
|
446
|
+
>
|
|
447
|
+
> **Why this matters:** Without this discipline, tests get skipped, edge cases get missed, integration points break silently, and review catches problems too late when they're expensive to fix.
|
|
448
|
+
>
|
|
449
|
+
> **Looking for:** Correct dependency ordering, test coverage for each task, clean per-task review passes, no implementation drift from the approved plan
|
|
450
|
+
|
|
291
451
|
## Phase Gate (Hard Gate)
|
|
292
452
|
|
|
293
453
|
Before entering the Executor phase, verify ALL clarifier artifacts exist:
|
|
@@ -328,14 +488,43 @@ Tasks always run sequentially.
|
|
|
328
488
|
|
|
329
489
|
Output: code changes + verification proof in `.wazir/runs/latest/artifacts/`.
|
|
330
490
|
|
|
491
|
+
**After completing this phase, output to the user:**
|
|
492
|
+
|
|
493
|
+
> **Executor Phase complete.**
|
|
494
|
+
>
|
|
495
|
+
> **Found:** [N]/[N] tasks implemented, [N] tests written, [N] per-task review passes completed, [N] findings fixed before commit
|
|
496
|
+
>
|
|
497
|
+
> **Without this phase:** Code would ship without tests, review findings would accumulate until final review (10x more expensive to fix), and verification claims would be unsubstantiated
|
|
498
|
+
>
|
|
499
|
+
> **Changed because of this work:** [List of commits with conventional commit messages, test counts, verification evidence collected]
|
|
500
|
+
|
|
331
501
|
```bash
|
|
332
502
|
wazir capture event --run <run-id> --event phase_exit --phase executor --status completed
|
|
333
503
|
```
|
|
334
504
|
|
|
505
|
+
Run the phase report and display savings to the user:
|
|
506
|
+
```bash
|
|
507
|
+
wazir report phase --run <run-id> --phase executor
|
|
508
|
+
wazir stats --run <run-id>
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
Output the report content to the user in the conversation.
|
|
512
|
+
|
|
513
|
+
**Show savings in conversation output:**
|
|
514
|
+
> **Context savings this phase:** Used wazir index for [N] queries and context-mode for [M] commands, saving ~[X] tokens ([Y]% reduction).
|
|
515
|
+
|
|
335
516
|
---
|
|
336
517
|
|
|
337
518
|
# Phase 4: Final Review
|
|
338
519
|
|
|
520
|
+
**Before starting this phase, output to the user:**
|
|
521
|
+
|
|
522
|
+
> **Final Review Phase** — About to run adversarial 7-dimension review comparing the implementation against your original input, extract durable learnings, and prepare the handoff.
|
|
523
|
+
>
|
|
524
|
+
> **Why this matters:** Without this, implementation drift ships undetected, missing acceptance criteria go unnoticed, untested code paths hide bugs, and the same mistakes repeat in the next run.
|
|
525
|
+
>
|
|
526
|
+
> **Looking for:** Spec violations, missing features, dead code paths, unsubstantiated claims, scope creep, security gaps, stale documentation
|
|
527
|
+
|
|
339
528
|
## Phase Gate (Hard Gate)
|
|
340
529
|
|
|
341
530
|
Before entering the Final Review phase, verify the Executor produced its proof:
|
|
@@ -375,10 +564,27 @@ Prepare context and handoff for the next run:
|
|
|
375
564
|
- Compress/archive unneeded files
|
|
376
565
|
- Record what's left to do
|
|
377
566
|
|
|
567
|
+
**After completing this phase, output to the user:**
|
|
568
|
+
|
|
569
|
+
> **Final Review Phase complete.**
|
|
570
|
+
>
|
|
571
|
+
> **Found:** [N] findings across 7 dimensions, [N] blocking issues, [N] warnings, [N] learnings proposed for future runs
|
|
572
|
+
>
|
|
573
|
+
> **Without this phase:** Implementation drift from the original request would ship undetected, untested paths would hide production bugs, and recurring mistakes would never get captured as learnings
|
|
574
|
+
>
|
|
575
|
+
> **Changed because of this work:** [List of findings fixed, score achieved, learnings extracted, handoff prepared]
|
|
576
|
+
|
|
378
577
|
```bash
|
|
379
578
|
wazir capture event --run <run-id> --event phase_exit --phase final_review --status completed
|
|
380
579
|
```
|
|
381
580
|
|
|
581
|
+
Run the phase report and display it to the user:
|
|
582
|
+
```bash
|
|
583
|
+
wazir report phase --run <run-id> --phase final_review
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
Output the report content to the user in the conversation.
|
|
587
|
+
|
|
382
588
|
---
|
|
383
589
|
|
|
384
590
|
## Step 5: CHANGELOG + Gitflow Validation (Hard Gates)
|
|
@@ -399,26 +605,41 @@ After the reviewer completes, present verdict with numbered options:
|
|
|
399
605
|
### If PASS (score 56+):
|
|
400
606
|
|
|
401
607
|
> **Result: PASS (score/70)**
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
608
|
+
|
|
609
|
+
Ask the user via AskUserQuestion:
|
|
610
|
+
- **Question:** "Pipeline passed. What would you like to do next?"
|
|
611
|
+
- **Options:**
|
|
612
|
+
1. "Create a PR" *(Recommended)*
|
|
613
|
+
2. "Merge directly"
|
|
614
|
+
3. "Review the changes first"
|
|
615
|
+
|
|
616
|
+
Wait for the user's selection before continuing.
|
|
406
617
|
|
|
407
618
|
### If NEEDS MINOR FIXES (score 42-55):
|
|
408
619
|
|
|
409
620
|
> **Result: NEEDS MINOR FIXES (score/70)**
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
621
|
+
|
|
622
|
+
Ask the user via AskUserQuestion:
|
|
623
|
+
- **Question:** "Minor issues found. How should we handle them?"
|
|
624
|
+
- **Options:**
|
|
625
|
+
1. "Auto-fix and re-review" *(Recommended)*
|
|
626
|
+
2. "Fix manually"
|
|
627
|
+
3. "Accept as-is"
|
|
628
|
+
|
|
629
|
+
Wait for the user's selection before continuing.
|
|
414
630
|
|
|
415
631
|
### If NEEDS REWORK (score 28-41):
|
|
416
632
|
|
|
417
633
|
> **Result: NEEDS REWORK (score/70)**
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
634
|
+
|
|
635
|
+
Ask the user via AskUserQuestion:
|
|
636
|
+
- **Question:** "Significant issues found. How should we proceed?"
|
|
637
|
+
- **Options:**
|
|
638
|
+
1. "Re-run affected tasks" *(Recommended)*
|
|
639
|
+
2. "Review findings in detail"
|
|
640
|
+
3. "Abandon this run"
|
|
641
|
+
|
|
642
|
+
Wait for the user's selection before continuing.
|
|
422
643
|
|
|
423
644
|
### If FAIL (score 0-27):
|
|
424
645
|
|
|
@@ -438,10 +659,15 @@ wazir status --run <run-id> --json
|
|
|
438
659
|
If any phase fails:
|
|
439
660
|
|
|
440
661
|
> **Phase [name] failed: [reason]**
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
662
|
+
|
|
663
|
+
Ask the user via AskUserQuestion:
|
|
664
|
+
- **Question:** "Phase [name] failed: [reason]. How should we proceed?"
|
|
665
|
+
- **Options:**
|
|
666
|
+
1. "Retry this phase" *(Recommended)*
|
|
667
|
+
2. "Skip and continue" *(only if workflows within phase are adaptive)*
|
|
668
|
+
3. "Abort the run"
|
|
669
|
+
|
|
670
|
+
Wait for the user's selection before continuing.
|
|
445
671
|
|
|
446
672
|
---
|
|
447
673
|
|
|
@@ -455,9 +681,14 @@ Parse inline audit types: `/wazir audit security` → skip Question 1.
|
|
|
455
681
|
|
|
456
682
|
After audit:
|
|
457
683
|
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
684
|
+
Ask the user via AskUserQuestion:
|
|
685
|
+
- **Question:** "Audit complete. What would you like to do with the findings?"
|
|
686
|
+
- **Options:**
|
|
687
|
+
1. "Review the findings" *(Recommended)*
|
|
688
|
+
2. "Generate a fix plan"
|
|
689
|
+
3. "Run the pipeline on the fix plan"
|
|
690
|
+
|
|
691
|
+
Wait for the user's selection before continuing.
|
|
461
692
|
|
|
462
693
|
If option 3, save findings as briefing and run pipeline with intent = `bugfix`.
|
|
463
694
|
|
|
@@ -471,12 +702,47 @@ Generates a PRD from a completed run. Reads approved design, task specs, executi
|
|
|
471
702
|
|
|
472
703
|
After generation:
|
|
473
704
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
705
|
+
Ask the user via AskUserQuestion:
|
|
706
|
+
- **Question:** "PRD generated. What would you like to do?"
|
|
707
|
+
- **Options:**
|
|
708
|
+
1. "Review the PRD" *(Recommended)*
|
|
709
|
+
2. "Commit it"
|
|
710
|
+
3. "Edit before committing"
|
|
711
|
+
|
|
712
|
+
Wait for the user's selection before continuing.
|
|
477
713
|
|
|
478
714
|
---
|
|
479
715
|
|
|
716
|
+
## Reasoning Chain Output
|
|
717
|
+
|
|
718
|
+
Every phase produces reasoning output at two layers:
|
|
719
|
+
|
|
720
|
+
### Layer 1: Conversation Output (concise — for the user)
|
|
721
|
+
|
|
722
|
+
Before each major decision, output one trigger sentence and one reasoning sentence:
|
|
723
|
+
|
|
724
|
+
> "Your request mentions 'overnight autonomous run' — researching how Devin and Karpathy's autoresearch handle this, because unattended runs need different safety constraints than interactive ones."
|
|
725
|
+
|
|
726
|
+
After each phase, output what was found and a counterfactual:
|
|
727
|
+
|
|
728
|
+
> "Found: you use Supabase auth (not custom JWT). If I'd skipped research, I would have built JWT middleware — completely wrong."
|
|
729
|
+
|
|
730
|
+
### Layer 2: File Output (detailed — for learning and reports)
|
|
731
|
+
|
|
732
|
+
Save full reasoning chain to `.wazir/runs/<id>/reasoning/phase-<name>-reasoning.md` with entries:
|
|
733
|
+
|
|
734
|
+
```markdown
|
|
735
|
+
### Decision: [title]
|
|
736
|
+
- **Trigger:** What prompted this decision
|
|
737
|
+
- **Options considered:** List of alternatives
|
|
738
|
+
- **Chosen:** The selected option
|
|
739
|
+
- **Reasoning:** Why this option was chosen
|
|
740
|
+
- **Confidence:** high | medium | low
|
|
741
|
+
- **Counterfactual:** What would have gone wrong without this information
|
|
742
|
+
```
|
|
743
|
+
|
|
744
|
+
Create the `reasoning/` directory during run init. Every phase skill (clarifier, executor, reviewer) writes its own reasoning file. Counterfactuals appear in BOTH conversation output AND reasoning files.
|
|
745
|
+
|
|
480
746
|
## Interaction Rules
|
|
481
747
|
|
|
482
748
|
- **One question at a time** — never combine multiple questions
|
|
@@ -3,6 +3,7 @@ import path from 'node:path';
|
|
|
3
3
|
|
|
4
4
|
import { parseCommandOptions, parsePositiveInteger } from '../command-options.js';
|
|
5
5
|
import { readYamlFile } from '../loaders.js';
|
|
6
|
+
import { validateRunCompletion } from '../guards/phase-prerequisite-guard.js';
|
|
6
7
|
import { findProjectRoot } from '../project-root.js';
|
|
7
8
|
import { resolveStateRoot } from '../state-root.js';
|
|
8
9
|
import {
|
|
@@ -57,7 +58,7 @@ function resolveCaptureContext(parsed, context = {}) {
|
|
|
57
58
|
const projectRoot = findProjectRoot(context.cwd ?? process.cwd());
|
|
58
59
|
const manifest = readYamlFile(path.join(projectRoot, 'wazir.manifest.yaml'));
|
|
59
60
|
const { options } = parseCommandOptions(parsed.args, {
|
|
60
|
-
boolean: ['json'],
|
|
61
|
+
boolean: ['json', 'complete'],
|
|
61
62
|
string: [
|
|
62
63
|
'run',
|
|
63
64
|
'phase',
|
|
@@ -326,6 +327,21 @@ function handleSummary(parsed, context = {}) {
|
|
|
326
327
|
|
|
327
328
|
const runPaths = getRunPaths(stateRoot, options.run);
|
|
328
329
|
const status = readStatus(runPaths);
|
|
330
|
+
|
|
331
|
+
// Enforce workflow completion before allowing summary to finalize
|
|
332
|
+
if (options.complete) {
|
|
333
|
+
const projectRoot = findProjectRoot();
|
|
334
|
+
const manifestPath = path.join(projectRoot, 'wazir.manifest.yaml');
|
|
335
|
+
const result = validateRunCompletion(runPaths.runRoot, manifestPath);
|
|
336
|
+
if (!result.complete) {
|
|
337
|
+
const msg = `Run incomplete: ${result.missing.length} workflow(s) not finished: ${result.missing.join(', ')}`;
|
|
338
|
+
if (options.json) {
|
|
339
|
+
return { exitCode: 1, stdout: JSON.stringify({ run_id: options.run, complete: false, missing_workflows: result.missing, error: msg }, null, 2) + '\n' };
|
|
340
|
+
}
|
|
341
|
+
return { exitCode: 1, stderr: msg + '\n' };
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
329
345
|
const eventName = options.event ?? 'pre_compact_summary';
|
|
330
346
|
const summaryContent = readInput();
|
|
331
347
|
const summaryPath = writeSummary(runPaths, summaryContent);
|
|
@@ -116,6 +116,38 @@ export function readPhaseExitEvents(runPaths) {
|
|
|
116
116
|
return completedPhases;
|
|
117
117
|
}
|
|
118
118
|
|
|
119
|
+
/**
|
|
120
|
+
* Read phase exit events with full two-level detail (parent_phase + workflow).
|
|
121
|
+
*/
|
|
122
|
+
export function readPhaseExitEventsDetailed(runPaths) {
|
|
123
|
+
if (!fs.existsSync(runPaths.eventsPath)) {
|
|
124
|
+
return [];
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const content = fs.readFileSync(runPaths.eventsPath, 'utf8');
|
|
128
|
+
const events = [];
|
|
129
|
+
|
|
130
|
+
for (const line of content.split('\n')) {
|
|
131
|
+
const trimmed = line.trim();
|
|
132
|
+
if (!trimmed) continue;
|
|
133
|
+
try {
|
|
134
|
+
const event = JSON.parse(trimmed);
|
|
135
|
+
if (event.event === 'phase_exit' && event.phase) {
|
|
136
|
+
events.push({
|
|
137
|
+
phase: event.phase,
|
|
138
|
+
parent_phase: event.parent_phase ?? event.phase,
|
|
139
|
+
workflow: event.workflow ?? event.phase,
|
|
140
|
+
status: event.status,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
} catch {
|
|
144
|
+
// Skip malformed lines
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return events;
|
|
149
|
+
}
|
|
150
|
+
|
|
119
151
|
export function writeSummary(runPaths, content) {
|
|
120
152
|
ensureRunDirectories(runPaths);
|
|
121
153
|
fs.writeFileSync(runPaths.summaryPath, content);
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Append a user input entry to the run's NDJSON log.
|
|
6
|
+
*
|
|
7
|
+
* @param {string} runDir - Absolute path to the run directory
|
|
8
|
+
* @param {object} entry - { phase, type, content, context }
|
|
9
|
+
* type: 'instruction' | 'approval' | 'correction' | 'rejection' | 'redirect'
|
|
10
|
+
*/
|
|
11
|
+
export function captureUserInput(runDir, { phase, type, content, context }) {
|
|
12
|
+
const logPath = path.join(runDir, 'user-input-log.ndjson');
|
|
13
|
+
const record = {
|
|
14
|
+
timestamp: new Date().toISOString(),
|
|
15
|
+
phase: phase ?? 'unknown',
|
|
16
|
+
type: type ?? 'instruction',
|
|
17
|
+
content: content ?? '',
|
|
18
|
+
context: context ?? '',
|
|
19
|
+
};
|
|
20
|
+
fs.appendFileSync(logPath, JSON.stringify(record) + '\n');
|
|
21
|
+
return logPath;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Read all entries from a run's user input log.
|
|
26
|
+
*/
|
|
27
|
+
export function readUserInputLog(runDir) {
|
|
28
|
+
const logPath = path.join(runDir, 'user-input-log.ndjson');
|
|
29
|
+
if (!fs.existsSync(logPath)) return [];
|
|
30
|
+
|
|
31
|
+
return fs.readFileSync(logPath, 'utf8')
|
|
32
|
+
.split('\n')
|
|
33
|
+
.filter(line => line.trim())
|
|
34
|
+
.map(line => {
|
|
35
|
+
try { return JSON.parse(line); }
|
|
36
|
+
catch { return null; }
|
|
37
|
+
})
|
|
38
|
+
.filter(Boolean);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Prune old user-input-log.ndjson files, keeping the most recent `keep` runs.
|
|
43
|
+
*
|
|
44
|
+
* @param {string} stateRoot - Absolute path to the state root (e.g. ~/.wazir/projects/foo)
|
|
45
|
+
* @param {number} keep - Number of recent runs to keep (default 10)
|
|
46
|
+
*/
|
|
47
|
+
export function pruneOldInputLogs(stateRoot, keep = 10) {
|
|
48
|
+
const runsDir = path.join(stateRoot, 'runs');
|
|
49
|
+
if (!fs.existsSync(runsDir)) return { pruned: 0 };
|
|
50
|
+
|
|
51
|
+
const entries = fs.readdirSync(runsDir)
|
|
52
|
+
.filter(name => name.startsWith('run-') && fs.statSync(path.join(runsDir, name)).isDirectory())
|
|
53
|
+
.sort()
|
|
54
|
+
.reverse();
|
|
55
|
+
|
|
56
|
+
let pruned = 0;
|
|
57
|
+
for (let i = keep; i < entries.length; i++) {
|
|
58
|
+
const logPath = path.join(runsDir, entries[i], 'user-input-log.ndjson');
|
|
59
|
+
if (fs.existsSync(logPath)) {
|
|
60
|
+
fs.unlinkSync(logPath);
|
|
61
|
+
pruned++;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return { pruned };
|
|
66
|
+
}
|