@wazir-dev/cli 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/CHANGELOG.md +31 -2
  2. package/docs/plans/2026-03-15-cli-pipeline-integration-plan.md +1 -1
  3. package/docs/reference/review-loop-pattern.md +429 -0
  4. package/docs/reference/tooling-cli.md +2 -0
  5. package/docs/truth-claims.yaml +6 -0
  6. package/exports/hosts/claude/.claude/agents/clarifier.md +3 -0
  7. package/exports/hosts/claude/.claude/agents/designer.md +3 -0
  8. package/exports/hosts/claude/.claude/agents/executor.md +2 -0
  9. package/exports/hosts/claude/.claude/agents/planner.md +3 -0
  10. package/exports/hosts/claude/.claude/agents/researcher.md +2 -0
  11. package/exports/hosts/claude/.claude/agents/reviewer.md +5 -1
  12. package/exports/hosts/claude/.claude/agents/specifier.md +3 -0
  13. package/exports/hosts/claude/.claude/commands/clarify.md +4 -0
  14. package/exports/hosts/claude/.claude/commands/design-review.md +4 -0
  15. package/exports/hosts/claude/.claude/commands/design.md +4 -0
  16. package/exports/hosts/claude/.claude/commands/discover.md +4 -0
  17. package/exports/hosts/claude/.claude/commands/execute.md +4 -0
  18. package/exports/hosts/claude/.claude/commands/plan-review.md +4 -0
  19. package/exports/hosts/claude/.claude/commands/plan.md +4 -0
  20. package/exports/hosts/claude/.claude/commands/spec-challenge.md +4 -0
  21. package/exports/hosts/claude/.claude/commands/specify.md +4 -0
  22. package/exports/hosts/claude/.claude/commands/verify.md +4 -0
  23. package/exports/hosts/claude/export.manifest.json +19 -19
  24. package/exports/hosts/codex/export.manifest.json +19 -19
  25. package/exports/hosts/cursor/export.manifest.json +19 -19
  26. package/exports/hosts/gemini/export.manifest.json +19 -19
  27. package/hooks/definitions/loop_cap_guard.yaml +1 -1
  28. package/hooks/hooks.json +18 -0
  29. package/package.json +3 -2
  30. package/roles/clarifier.md +3 -0
  31. package/roles/designer.md +3 -0
  32. package/roles/executor.md +2 -0
  33. package/roles/planner.md +3 -0
  34. package/roles/researcher.md +2 -0
  35. package/roles/reviewer.md +5 -1
  36. package/roles/specifier.md +3 -0
  37. package/skills/brainstorming/SKILL.md +139 -38
  38. package/skills/clarifier/SKILL.md +219 -0
  39. package/skills/debugging/SKILL.md +11 -1
  40. package/skills/executing-plans/SKILL.md +15 -2
  41. package/skills/executor/SKILL.md +76 -0
  42. package/skills/init-pipeline/SKILL.md +106 -17
  43. package/skills/receiving-code-review/SKILL.md +8 -0
  44. package/skills/requesting-code-review/SKILL.md +25 -5
  45. package/skills/reviewer/SKILL.md +151 -0
  46. package/skills/subagent-driven-development/SKILL.md +25 -2
  47. package/skills/tdd/SKILL.md +8 -0
  48. package/skills/wazir/SKILL.md +250 -43
  49. package/skills/writing-plans/SKILL.md +31 -4
  50. package/templates/examples/wazir-manifest.example.yaml +1 -1
  51. package/tooling/src/capture/command.js +87 -1
  52. package/tooling/src/capture/run-config.js +21 -0
  53. package/tooling/src/checks/brand-truth.js +3 -6
  54. package/tooling/src/checks/command-registry.js +1 -0
  55. package/tooling/src/checks/docs-truth.js +1 -1
  56. package/tooling/src/checks/runtime-surface.js +3 -7
  57. package/tooling/src/cli.js +8 -3
  58. package/tooling/src/init/command.js +201 -0
  59. package/wazir.manifest.yaml +0 -3
  60. package/workflows/clarify.md +4 -0
  61. package/workflows/design-review.md +4 -0
  62. package/workflows/design.md +4 -0
  63. package/workflows/discover.md +4 -0
  64. package/workflows/execute.md +4 -0
  65. package/workflows/plan-review.md +4 -0
  66. package/workflows/plan.md +4 -0
  67. package/workflows/spec-challenge.md +4 -0
  68. package/workflows/specify.md +4 -0
  69. package/workflows/verify.md +4 -0
@@ -8,6 +8,7 @@ import { runValidateCommand } from './commands/validate.js';
8
8
  import { runDoctorCommand } from './doctor/command.js';
9
9
  import { runExportCommand as runGeneratedExportCommand } from './export/command.js';
10
10
  import { runIndexCommand } from './index/command.js';
11
+ import { runInitCommand } from './init/command.js';
11
12
  import { runRecallCommand } from './recall/command.js';
12
13
  import { runStatusCommand } from './status/command.js';
13
14
 
@@ -16,6 +17,7 @@ const COMMAND_FAMILIES = [
16
17
  'validate',
17
18
  'doctor',
18
19
  'index',
20
+ 'init',
19
21
  'recall',
20
22
  'status',
21
23
  'capture'
@@ -26,6 +28,7 @@ const COMMAND_HANDLERS = {
26
28
  validate: runValidateCommand,
27
29
  doctor: runDoctorCommand,
28
30
  index: runIndexCommand,
31
+ init: runInitCommand,
29
32
  recall: runRecallCommand,
30
33
  status: runStatusCommand,
31
34
  capture: runCaptureCommand,
@@ -63,7 +66,7 @@ export function renderHelp() {
63
66
  ].join('\n');
64
67
  }
65
68
 
66
- export function main(argv = process.argv.slice(2)) {
69
+ export async function main(argv = process.argv.slice(2)) {
67
70
  const parsed = parseArgs(argv);
68
71
 
69
72
  if (parsed.help || !parsed.command) {
@@ -86,7 +89,7 @@ export function main(argv = process.argv.slice(2)) {
86
89
  let result;
87
90
 
88
91
  try {
89
- result = handler(parsed);
92
+ result = await handler(parsed);
90
93
  } catch (error) {
91
94
  console.error(error.message);
92
95
  return 1;
@@ -112,5 +115,7 @@ function isDirectExecution() {
112
115
  }
113
116
 
114
117
  if (isDirectExecution()) {
115
- process.exitCode = main();
118
+ main().then((code) => {
119
+ process.exitCode = code;
120
+ });
116
121
  }
@@ -0,0 +1,201 @@
1
+ import { execFileSync } from 'node:child_process';
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+ import { select } from '@inquirer/prompts';
5
+
6
+ export async function runInitCommand(parsed, context = {}) {
7
+ const cwd = context.cwd ?? process.cwd();
8
+ const wazirDir = path.join(cwd, '.wazir');
9
+ const configPath = path.join(wazirDir, 'state', 'config.json');
10
+
11
+ if (fs.existsSync(configPath) && !parsed.args.includes('--force')) {
12
+ return {
13
+ exitCode: 1,
14
+ stderr: 'Pipeline already initialized. Use --force to reinitialize.\n',
15
+ };
16
+ }
17
+
18
+ try {
19
+ // Create directories
20
+ for (const dir of ['input', 'state', 'runs']) {
21
+ fs.mkdirSync(path.join(wazirDir, dir), { recursive: true });
22
+ }
23
+
24
+ // Pipeline mode
25
+ const modelMode = await select({
26
+ message: 'How should Wazir run in this project?',
27
+ choices: [
28
+ { name: 'Single model (Recommended) — slash commands only', value: 'claude-only' },
29
+ { name: 'Multi-model — routes by complexity (Haiku/Sonnet/Opus)', value: 'multi-model' },
30
+ { name: 'Multi-tool — current model + external tools for reviews', value: 'multi-tool' },
31
+ ],
32
+ default: 'claude-only',
33
+ });
34
+
35
+ // Multi-tool tools (conditional)
36
+ let multiToolTools = [];
37
+ if (modelMode === 'multi-tool') {
38
+ const toolChoice = await select({
39
+ message: 'Which external tools should Wazir use for reviews?',
40
+ choices: [
41
+ { name: 'Codex — Send reviews to OpenAI Codex', value: 'codex' },
42
+ { name: 'Gemini — Send reviews to Google Gemini', value: 'gemini' },
43
+ { name: 'Both — Use Codex and Gemini', value: 'both' },
44
+ ],
45
+ });
46
+ multiToolTools = toolChoice === 'both' ? ['codex', 'gemini'] : [toolChoice];
47
+ }
48
+
49
+ // Codex model (conditional)
50
+ let codexModel = null;
51
+ if (multiToolTools.includes('codex')) {
52
+ codexModel = await select({
53
+ message: 'Which Codex model should Wazir use?',
54
+ choices: [
55
+ { name: 'gpt-5.3-codex-spark (Recommended) — fast, good for review loops', value: 'gpt-5.3-codex-spark' },
56
+ { name: 'gpt-5.4 — slower, deeper analysis for complex reviews', value: 'gpt-5.4' },
57
+ ],
58
+ default: 'gpt-5.3-codex-spark',
59
+ });
60
+ }
61
+
62
+ // Default depth
63
+ const defaultDepth = await select({
64
+ message: 'What default depth should runs use?',
65
+ choices: [
66
+ { name: 'Quick — minimal research, single-pass review', value: 'quick' },
67
+ { name: 'Standard (Recommended) — balanced research, multi-pass hardening', value: 'standard' },
68
+ { name: 'Deep — extended research, strict review thresholds', value: 'deep' },
69
+ ],
70
+ default: 'standard',
71
+ });
72
+
73
+ // Default intent
74
+ const defaultIntent = await select({
75
+ message: 'What kind of work does this project mostly involve?',
76
+ choices: [
77
+ { name: 'Feature (Recommended) — new functionality or enhancement', value: 'feature' },
78
+ { name: 'Bugfix — fix broken behavior', value: 'bugfix' },
79
+ { name: 'Refactor — restructure without changing behavior', value: 'refactor' },
80
+ { name: 'Docs — documentation only', value: 'docs' },
81
+ { name: 'Spike — research and exploration', value: 'spike' },
82
+ ],
83
+ default: 'feature',
84
+ });
85
+
86
+ // Agent Teams (conditional)
87
+ let teamMode = 'sequential';
88
+ let parallelBackend = 'none';
89
+
90
+ const depthAllows = defaultDepth === 'standard' || defaultDepth === 'deep';
91
+ const intentAllows = defaultIntent === 'feature' || defaultIntent === 'refactor';
92
+
93
+ if (depthAllows && intentAllows) {
94
+ const useTeams = await select({
95
+ message: 'Would you like to use Agent Teams for parallel execution?',
96
+ choices: [
97
+ { name: 'No (Recommended) — sequential, predictable, lower cost', value: 'sequential' },
98
+ { name: 'Yes — parallel teammates, faster but experimental (Opus only)', value: 'parallel' },
99
+ ],
100
+ default: 'sequential',
101
+ });
102
+ teamMode = useTeams;
103
+ parallelBackend = useTeams === 'parallel' ? 'claude_teams' : 'none';
104
+
105
+ if (teamMode === 'parallel') {
106
+ try {
107
+ execFileSync('claude', ['config', 'set', 'env.CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS', '1'], { stdio: 'pipe' });
108
+ } catch {
109
+ // claude CLI not available — user will need to set it manually
110
+ }
111
+ }
112
+ }
113
+
114
+ // Write config
115
+ const config = {
116
+ model_mode: modelMode,
117
+ ...(modelMode === 'multi-tool' && {
118
+ multi_tool: {
119
+ tools: multiToolTools,
120
+ ...(codexModel && { codex: { model: codexModel } }),
121
+ },
122
+ }),
123
+ default_depth: defaultDepth,
124
+ default_intent: defaultIntent,
125
+ team_mode: teamMode,
126
+ parallel_backend: parallelBackend,
127
+ };
128
+ fs.writeFileSync(configPath, JSON.stringify(config, null, 2) + '\n');
129
+
130
+ // Runtime-specific setup
131
+ const filesCreated = ['.wazir/input/', '.wazir/state/', '.wazir/runs/', '.wazir/state/config.json'];
132
+
133
+ if (multiToolTools.includes('codex')) {
134
+ const content = [
135
+ '# Wazir Pipeline',
136
+ '',
137
+ 'Agent protocols are at `~/.claude/agents/` (global).',
138
+ '',
139
+ '## Running the Pipeline',
140
+ '1. Clarifier: read and follow `~/.claude/agents/clarifier.md` — tasks are in `.wazir/input/`',
141
+ '2. Orchestrator: read and follow `~/.claude/agents/orchestrator.md` — start from task 1',
142
+ '3. Opus Reviewer: read and follow `~/.claude/agents/opus-reviewer.md` — run all phases',
143
+ '',
144
+ '## Review Mode',
145
+ 'This project uses Codex as a secondary reviewer. Review artifacts are in `.wazir/reviews/`.',
146
+ '',
147
+ ].join('\n');
148
+ fs.writeFileSync(path.join(cwd, 'AGENTS.md'), content);
149
+ filesCreated.push('AGENTS.md');
150
+ }
151
+
152
+ if (multiToolTools.includes('gemini')) {
153
+ const content = [
154
+ '# Wazir Pipeline',
155
+ '',
156
+ 'Agent protocols are at `~/.claude/agents/` (global).',
157
+ '',
158
+ '## Running the Pipeline',
159
+ '1. Clarifier: read and follow `~/.claude/agents/clarifier.md` — tasks are in `.wazir/input/`',
160
+ '2. Orchestrator: read and follow `~/.claude/agents/orchestrator.md` — start from task 1',
161
+ '3. Opus Reviewer: read and follow `~/.claude/agents/opus-reviewer.md` — run all phases',
162
+ '',
163
+ '## Review Mode',
164
+ 'This project uses Gemini as a secondary reviewer. Review artifacts are in `.wazir/reviews/`.',
165
+ '',
166
+ ].join('\n');
167
+ fs.writeFileSync(path.join(cwd, 'GEMINI.md'), content);
168
+ filesCreated.push('GEMINI.md');
169
+ }
170
+
171
+ const lines = [
172
+ '',
173
+ '\u2705 Pipeline initialized!',
174
+ '',
175
+ ` Mode: ${modelMode}`,
176
+ ` Depth: ${defaultDepth}`,
177
+ ` Intent: ${defaultIntent}`,
178
+ ` Teams: ${teamMode}`,
179
+ '',
180
+ 'Files created:',
181
+ ...filesCreated.map((f) => ` - ${f}`),
182
+ '',
183
+ 'You can now use:',
184
+ ' /wazir <your request> \u2014 Run the full pipeline',
185
+ ' /clarifier \u2014 Research, clarify, plan',
186
+ ' /executor \u2014 Autonomous execution',
187
+ ' /reviewer \u2014 Final review and scoring',
188
+ '',
189
+ ];
190
+
191
+ return {
192
+ exitCode: 0,
193
+ stdout: lines.join('\n'),
194
+ };
195
+ } catch (error) {
196
+ if (error.name === 'ExitPromptError') {
197
+ return { exitCode: 130, stderr: '\nInit cancelled.\n' };
198
+ }
199
+ return { exitCode: 1, stderr: `${error.message}\n` };
200
+ }
201
+ }
@@ -92,11 +92,8 @@ protected_paths:
92
92
  - schemas
93
93
  - exports/hosts
94
94
  prohibited_terms:
95
- - Agent OS
96
- - daemon
97
95
  - HTTP control plane
98
96
  - web UI
99
- - OpenAI Symphony
100
97
  - Elixir
101
98
  adapters:
102
99
  context_mode:
@@ -24,6 +24,10 @@ On entering this phase, run:
24
24
  - unresolved questions list
25
25
  - scope summary
26
26
 
27
+ ## Review Loop
28
+
29
+ Clarification artifact is reviewed by the reviewer role using the review loop pattern with spec/clarification dimensions. The reviewer is invoked with `--mode clarification-review`. The clarifier resolves findings. Clarification does not flow to specify until all review passes complete.
30
+
27
31
  ## Approval Gate
28
32
 
29
33
  - no formal approval gate, but unresolved material ambiguity must be escalated
@@ -39,6 +39,10 @@ On rejection: `wazir capture event --run <run-id> --event gate_rejected --phase
39
39
  On completing this phase, run:
40
40
  `wazir capture event --run <run-id> --event phase_exit --phase <phase-name> --status completed`
41
41
 
42
+ ## Loop Structure
43
+
44
+ Follows the review loop pattern in `docs/reference/review-loop-pattern.md` with the canonical design-review dimensions (spec coverage, design-spec consistency, accessibility, visual consistency, exported-code fidelity). The designer role resolves findings. Starts when the approved design artifact enters the `design_review` phase. Pass count determined by depth. No extension.
45
+
42
46
  ## Failure Conditions
43
47
 
44
48
  - vague findings without visual evidence
@@ -33,6 +33,10 @@ On entering this phase, run:
33
33
 
34
34
  - explicit human approval required before design-review
35
35
 
36
+ ## Review Loop
37
+
38
+ After user approval, design artifact is reviewed via the design-review workflow (`workflows/design-review.md`) using the review loop pattern with the canonical design-review dimensions (spec coverage, design-spec consistency, accessibility, visual consistency, exported-code fidelity). The reviewer is invoked with `--mode design-review`. Design does not flow to planning until all review passes complete.
39
+
36
40
  ## Phase exit
37
41
 
38
42
  On completing this phase, run:
@@ -23,6 +23,10 @@ On entering this phase, run:
23
23
  - research artifact
24
24
  - cited findings
25
25
 
26
+ ## Review Loop
27
+
28
+ Research artifact is reviewed by the reviewer role using the review loop pattern (`docs/reference/review-loop-pattern.md`) with research dimensions (coverage, source quality, relevance, gaps, contradictions). The reviewer is invoked with `--mode research-review`. The researcher resolves findings. Research does not flow to specify until all review passes complete.
29
+
26
30
  ## Approval Gate
27
31
 
28
32
  - no formal approval gate, but unsupported research cannot flow forward
@@ -34,6 +34,10 @@ If either check fails:
34
34
  - code and docs changes
35
35
  - execution notes
36
36
 
37
+ ## Per-Task Review
38
+
39
+ Each task's output is reviewed using the review loop pattern with the 5 task-execution dimensions (correctness, tests, wiring, drift, quality). The reviewer is invoked with `--mode task-review --task-id <NNN>`. This is NOT the final review -- it is a per-task gate. Review happens BEFORE commit. Review logs use task-scoped filenames: `<phase>-task-<NNN>-review-pass-<N>.md`. See `docs/reference/review-loop-pattern.md` for code review scoping rules.
40
+
37
41
  ## Approval Gate
38
42
 
39
43
  - no new scope without explicit approval
@@ -37,6 +37,10 @@ On rejection: `wazir capture event --run <run-id> --event gate_rejected --phase
37
37
  On completing this phase, run:
38
38
  `wazir capture event --run <run-id> --event phase_exit --phase <phase-name> --status completed`
39
39
 
40
+ ## Loop Structure
41
+
42
+ Follows the review loop pattern in `docs/reference/review-loop-pattern.md` with plan dimensions. The planner role resolves findings. Pass count determined by depth. No extension.
43
+
40
44
  ## Failure Conditions
41
45
 
42
46
  - sequence gaps survive review
package/workflows/plan.md CHANGED
@@ -25,6 +25,10 @@ On entering this phase, run:
25
25
  - implementation plan artifact
26
26
  - ordered tasks and verification steps
27
27
 
28
+ ## Review Loop
29
+
30
+ Plan artifact is reviewed via the plan-review workflow (`workflows/plan-review.md`) using the review loop pattern with plan dimensions. The reviewer is invoked with `--mode plan-review`.
31
+
28
32
  ## Approval Gate
29
33
 
30
34
  - explicit human approval required before execution
@@ -36,6 +36,10 @@ On rejection: `wazir capture event --run <run-id> --event gate_rejected --phase
36
36
  On completing this phase, run:
37
37
  `wazir capture event --run <run-id> --event phase_exit --phase <phase-name> --status completed`
38
38
 
39
+ ## Loop Structure
40
+
41
+ This workflow IS a review loop. Follows the pattern in `docs/reference/review-loop-pattern.md` with spec/clarification dimensions. The specifier role resolves findings. Loop count tracked via `wazir capture loop-check --mode spec-challenge`. Pass count determined by depth (quick=3, standard=5, deep=7). No extension beyond depth pass count.
42
+
39
43
  ## Failure Conditions
40
44
 
41
45
  - rubber-stamp review
@@ -24,6 +24,10 @@ On entering this phase, run:
24
24
  - acceptance criteria
25
25
  - assumptions and non-goals
26
26
 
27
+ ## Review Loop
28
+
29
+ Spec artifact is reviewed via the spec-challenge workflow (`workflows/spec-challenge.md`) using the review loop pattern with spec dimensions. The reviewer is invoked with `--mode spec-challenge`. The specifier resolves findings.
30
+
27
31
  ## Approval Gate
28
32
 
29
33
  - explicit human approval required before planning
@@ -32,6 +32,10 @@ On entering this phase, run:
32
32
  On completing this phase, run:
33
33
  `wazir capture event --run <run-id> --event phase_exit --phase <phase-name> --status completed`
34
34
 
35
+ ## Relationship to Review Loops
36
+
37
+ Verification is invoked per-task during execution, not as a review loop. It produces deterministic proof, not adversarial findings.
38
+
35
39
  ## Failure Conditions
36
40
 
37
41
  - stale or partial verification