maestro-flow 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/.claude/commands/maestro-analyze.md +1 -1
  2. package/.claude/commands/maestro-brainstorm.md +1 -1
  3. package/.claude/commands/maestro-collab.md +1 -1
  4. package/.claude/commands/maestro-execute.md +10 -1
  5. package/.claude/commands/maestro-guard.md +101 -0
  6. package/.claude/commands/maestro-impeccable.md +77 -74
  7. package/.claude/commands/maestro-plan.md +15 -2
  8. package/.claude/commands/maestro-ralph-execute.md +9 -2
  9. package/.claude/commands/maestro-ralph.md +8 -1
  10. package/.claude/commands/maestro-verify.md +15 -1
  11. package/.claude/commands/quality-auto-test.md +1 -1
  12. package/.claude/commands/quality-debug.md +1 -1
  13. package/.claude/commands/quality-refactor.md +1 -1
  14. package/.claude/commands/quality-retrospective.md +1 -1
  15. package/.claude/commands/quality-review.md +15 -1
  16. package/.claude/commands/quality-test.md +1 -1
  17. package/.claude/commands/security-audit.md +154 -0
  18. package/.claude/skills/maestro-help/index/catalog.json +2 -0
  19. package/.codex/skills/maestro-analyze/SKILL.md +18 -1
  20. package/.codex/skills/maestro-brainstorm/SKILL.md +17 -4
  21. package/.codex/skills/maestro-collab/SKILL.md +7 -1
  22. package/.codex/skills/maestro-execute/SKILL.md +365 -348
  23. package/.codex/skills/maestro-guard/SKILL.md +97 -0
  24. package/.codex/skills/maestro-impeccable/SKILL.md +76 -73
  25. package/.codex/skills/maestro-plan/SKILL.md +66 -7
  26. package/.codex/skills/maestro-ralph/SKILL.md +1 -1
  27. package/.codex/skills/maestro-verify/SKILL.md +18 -1
  28. package/.codex/skills/quality-auto-test/SKILL.md +13 -3
  29. package/.codex/skills/quality-debug/SKILL.md +362 -346
  30. package/.codex/skills/quality-refactor/SKILL.md +1 -1
  31. package/.codex/skills/quality-retrospective/SKILL.md +292 -292
  32. package/.codex/skills/quality-review/SKILL.md +374 -365
  33. package/.codex/skills/quality-test/SKILL.md +1 -1
  34. package/.codex/skills/security-audit/SKILL.md +154 -0
  35. package/bin/maestro-hook-runner.js +21 -1
  36. package/dashboard/dist-server/src/coordinator/output-parser.js +27 -0
  37. package/dashboard/dist-server/src/coordinator/output-parser.js.map +1 -1
  38. package/dist/src/commands/coordinate.d.ts.map +1 -1
  39. package/dist/src/commands/coordinate.js +2 -0
  40. package/dist/src/commands/coordinate.js.map +1 -1
  41. package/dist/src/commands/hooks.d.ts +49 -0
  42. package/dist/src/commands/hooks.d.ts.map +1 -1
  43. package/dist/src/commands/hooks.js +236 -33
  44. package/dist/src/commands/hooks.js.map +1 -1
  45. package/dist/src/commands/install-backend.d.ts +2 -0
  46. package/dist/src/commands/install-backend.d.ts.map +1 -1
  47. package/dist/src/commands/install-backend.js +72 -0
  48. package/dist/src/commands/install-backend.js.map +1 -1
  49. package/dist/src/commands/install.d.ts.map +1 -1
  50. package/dist/src/commands/install.js +15 -2
  51. package/dist/src/commands/install.js.map +1 -1
  52. package/dist/src/coordinator/output-parser.d.ts.map +1 -1
  53. package/dist/src/coordinator/output-parser.js +27 -0
  54. package/dist/src/coordinator/output-parser.js.map +1 -1
  55. package/dist/src/hooks/delegate-monitor.d.ts +1 -0
  56. package/dist/src/hooks/delegate-monitor.d.ts.map +1 -1
  57. package/dist/src/hooks/delegate-monitor.js +1 -1
  58. package/dist/src/hooks/delegate-monitor.js.map +1 -1
  59. package/dist/src/hooks/guards/workflow-guard.d.ts +15 -0
  60. package/dist/src/hooks/guards/workflow-guard.d.ts.map +1 -1
  61. package/dist/src/hooks/guards/workflow-guard.js +61 -1
  62. package/dist/src/hooks/guards/workflow-guard.js.map +1 -1
  63. package/dist/src/hooks/plugins/decision-log-plugin.d.ts +19 -0
  64. package/dist/src/hooks/plugins/decision-log-plugin.d.ts.map +1 -0
  65. package/dist/src/hooks/plugins/decision-log-plugin.js +28 -0
  66. package/dist/src/hooks/plugins/decision-log-plugin.js.map +1 -0
  67. package/dist/src/hooks/plugins/index.d.ts +2 -0
  68. package/dist/src/hooks/plugins/index.d.ts.map +1 -1
  69. package/dist/src/hooks/plugins/index.js +1 -0
  70. package/dist/src/hooks/plugins/index.js.map +1 -1
  71. package/dist/src/hooks/session-context.d.ts +1 -0
  72. package/dist/src/hooks/session-context.d.ts.map +1 -1
  73. package/dist/src/hooks/session-context.js +1 -1
  74. package/dist/src/hooks/session-context.js.map +1 -1
  75. package/dist/src/hooks/skill-context.d.ts +1 -0
  76. package/dist/src/hooks/skill-context.d.ts.map +1 -1
  77. package/dist/src/hooks/skill-context.js +1 -1
  78. package/dist/src/hooks/skill-context.js.map +1 -1
  79. package/dist/src/hooks/spec-injector.d.ts.map +1 -1
  80. package/dist/src/hooks/spec-injector.js +2 -0
  81. package/dist/src/hooks/spec-injector.js.map +1 -1
  82. package/dist/src/i18n/locales/en.d.ts.map +1 -1
  83. package/dist/src/i18n/locales/en.js +13 -0
  84. package/dist/src/i18n/locales/en.js.map +1 -1
  85. package/dist/src/i18n/locales/zh.d.ts.map +1 -1
  86. package/dist/src/i18n/locales/zh.js +13 -0
  87. package/dist/src/i18n/locales/zh.js.map +1 -1
  88. package/dist/src/i18n/types.d.ts +7 -0
  89. package/dist/src/i18n/types.d.ts.map +1 -1
  90. package/dist/src/tui/install-ui/InstallConfirm.d.ts +5 -0
  91. package/dist/src/tui/install-ui/InstallConfirm.d.ts.map +1 -1
  92. package/dist/src/tui/install-ui/InstallConfirm.js +1 -1
  93. package/dist/src/tui/install-ui/InstallConfirm.js.map +1 -1
  94. package/dist/src/tui/install-ui/InstallExecution.d.ts +2 -0
  95. package/dist/src/tui/install-ui/InstallExecution.d.ts.map +1 -1
  96. package/dist/src/tui/install-ui/InstallExecution.js +22 -3
  97. package/dist/src/tui/install-ui/InstallExecution.js.map +1 -1
  98. package/dist/src/tui/install-ui/InstallFlow.d.ts +1 -1
  99. package/dist/src/tui/install-ui/InstallFlow.d.ts.map +1 -1
  100. package/dist/src/tui/install-ui/InstallFlow.js +25 -4
  101. package/dist/src/tui/install-ui/InstallFlow.js.map +1 -1
  102. package/dist/src/tui/install-ui/InstallHub.d.ts +5 -0
  103. package/dist/src/tui/install-ui/InstallHub.d.ts.map +1 -1
  104. package/dist/src/tui/install-ui/InstallHub.js +16 -0
  105. package/dist/src/tui/install-ui/InstallHub.js.map +1 -1
  106. package/dist/src/tui/install-ui/InstallResult.d.ts.map +1 -1
  107. package/dist/src/tui/install-ui/InstallResult.js +1 -1
  108. package/dist/src/tui/install-ui/InstallResult.js.map +1 -1
  109. package/package.json +1 -1
  110. package/workflows/debug.md +73 -0
  111. package/workflows/execute.md +27 -0
  112. package/workflows/plan.md +11 -0
  113. package/workflows/review.md +33 -1
  114. package/workflows/tdd.md +257 -0
  115. package/workflows/verify.md +57 -0
@@ -0,0 +1,97 @@
1
+ ---
2
+ name: maestro-guard
3
+ description: Manage editing boundary restrictions
4
+ argument-hint: "<on|off|status|allow <path>|deny <path>>"
5
+ allowed-tools: Read, Write, Bash, Glob
6
+ ---
7
+ <purpose>
8
+ Configure directory-level write boundaries enforced by the workflow-guard PreToolUse hook.
9
+ When enabled, Write and Edit tool calls targeting files outside allowed paths are blocked.
10
+
11
+ Subcommands:
12
+ - **on** -- Enable path guard (defaults to `src/` if no paths configured)
13
+ - **off** -- Disable path guard (preserves path list)
14
+ - **status** -- Show current guard configuration
15
+ - **allow `<path>`** -- Add a directory to the allowed paths list
16
+ - **deny `<path>`** -- Switch to deny mode and add path to deny list
17
+ </purpose>
18
+
19
+ <context>
20
+ $ARGUMENTS -- Parse subcommand and optional path argument.
21
+
22
+ **Config location:** `.workflow/config.json` -> `guard` section
23
+
24
+ ```json
25
+ {
26
+ "guard": {
27
+ "enabled": false,
28
+ "mode": "allow",
29
+ "paths": []
30
+ }
31
+ }
32
+ ```
33
+
34
+ **Enforcement:** The `workflow-guard` hook (PreToolUse on Write/Edit) reads this config
35
+ and blocks operations targeting files outside boundaries. Requires hooks level >= `full`.
36
+ </context>
37
+
38
+ <execution>
39
+
40
+ **Step 1: Parse subcommand**
41
+
42
+ Extract from $ARGUMENTS:
43
+ - `on` / `off` / `status` / `allow <path>` / `deny <path>`
44
+ - If no subcommand, default to `status`
45
+
46
+ **Step 2: Read config**
47
+
48
+ Read `.workflow/config.json`. If file missing, initialize with empty guard section.
49
+
50
+ **Step 3: Execute subcommand**
51
+
52
+ **`status`:**
53
+ - Display: enabled/disabled, mode (allow/deny), paths list
54
+ - Check if workflow-guard hook is active (read `.codex/settings.json` for hook presence)
55
+ - If guard enabled but hook not active, warn: "WARNING: PathGuard enabled but workflow-guard hook not installed. Run `maestro hooks level full` to activate."
56
+
57
+ **`on`:**
58
+ - Set `guard.enabled = true`
59
+ - If `guard.paths` is empty, set default: `["src/", "tests/", ".workflow/"]`
60
+ - Check hook level, warn if < full
61
+ - Write config
62
+
63
+ **`off`:**
64
+ - Set `guard.enabled = false`
65
+ - Preserve existing paths and mode
66
+ - Write config
67
+
68
+ **`allow <path>`:**
69
+ - Normalize path to forward slashes, ensure trailing slash for directories
70
+ - If `guard.mode` is `deny`, switch to `allow` and clear paths with warning
71
+ - Add path to `guard.paths` (deduplicate)
72
+ - Set `guard.enabled = true` if not already
73
+ - Write config
74
+
75
+ **`deny <path>`:**
76
+ - Normalize path to forward slashes
77
+ - Set `guard.mode = "deny"`
78
+ - Add path to `guard.paths` (deduplicate)
79
+ - Set `guard.enabled = true` if not already
80
+ - Write config
81
+
82
+ **Step 4: Confirm**
83
+
84
+ Display updated guard configuration.
85
+
86
+ </execution>
87
+
88
+ <error_codes>
89
+ - E001: `.workflow/config.json` not found and cannot be created (not a maestro project)
90
+ - W001: PathGuard enabled but workflow-guard hook not installed
91
+ </error_codes>
92
+
93
+ <success_criteria>
94
+ - [ ] Config read/written correctly
95
+ - [ ] Hook level warning displayed when applicable
96
+ - [ ] Updated configuration shown after changes
97
+ </success_criteria>
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: maestro-impeccable
3
- description: Production-grade UI design 24 commands + chain orchestration with quality gates + design search
3
+ description: Use when designing, auditing, polishing, or improving frontend UI websites, dashboards, landing pages, components
4
4
  argument-hint: "<command|chain|intent> [target] [flags]"
5
5
  allowed-tools: Read, Write, Edit, Bash, Glob, Grep, request_user_input
6
6
  ---
@@ -70,14 +70,14 @@ responsive-design.md, spatial-design.md, typography.md, ux-writing.md
70
70
 
71
71
  | Chain | Steps | Scenario |
72
72
  |-------|-------|----------|
73
- | build | teach? → explore? → shape → craft → critique → [refine] → audit → polish | 从零新建 |
74
- | redesign | document → explore → shape → craft → critique → [refine] → audit → polish | 基于现有代码重设计 |
75
- | improve | critique → [refine] → polish → audit | 迭代改进 |
76
- | enhance | {cmd...} → critique → [refine] → polish | 定向增强(支持多命令) |
77
- | launch | harden → adapt → optimize → audit → polish | 全方位上线准备 |
78
- | harden | harden → audit → polish | 边界加固 |
79
- | foundation | teach? → explore → document → extract | 纯设计系统建设 |
80
- | live | live | 实时迭代 |
73
+ | build | teach? → explore? → shape → craft → critique → [refine] → audit → polish | New from scratch |
74
+ | redesign | document → explore → shape → craft → critique → [refine] → audit → polish | Redesign existing code |
75
+ | improve | critique → [refine] → polish → audit | Iterative improvement |
76
+ | enhance | {cmd...} → critique → [refine] → polish | Targeted enhancement (multi-command) |
77
+ | launch | harden → adapt → optimize → audit → polish | Full production readiness |
78
+ | harden | harden → audit → polish | Edge case hardening |
79
+ | foundation | teach? → explore → document → extract | Design system setup |
80
+ | live | live | Real-time iteration |
81
81
 
82
82
  - `?` = conditional: teach if PRODUCT.md missing; explore if DESIGN.md missing and --skip-design not set
83
83
  - `[refine]` = quality gate loop: gate fails → auto-select fix commands from findings → re-gate
@@ -85,64 +85,67 @@ responsive-design.md, spatial-design.md, typography.md, ux-writing.md
85
85
 
86
86
  ## Free Text Routing
87
87
 
88
- Three-layer priority matching. Stop on first match.
88
+ Three-layer priority matching. Stop on first match — do not continue to lower layers.
89
89
 
90
- ### Layer 1: Intent matches single command → Direct
90
+ ### Layer 1: Single command intent → Direct
91
91
 
92
- Match user description against Command Routing descriptions. Route to the closest single command.
92
+ Semantically match user description against the Command Routing table's Description column. Match the closest **single** command.
93
+
94
+ **Skip condition**: If the prompt also contains a Layer 2 chain keyword AND does not focus on a single design dimension, skip this layer.
95
+ Example: `enhance colors and typography` — "enhance" is a chain keyword + multiple design dimensions → skip to Layer 2.
93
96
 
94
97
  | Intent signal | Command |
95
98
  |---------------|---------|
96
- | review, UX check, heuristic, 评审, 评分 | critique |
97
- | a11y, audit, accessibility, performance audit, 技术检查 | audit |
98
- | animation, motion, transitions, 动效, 加动画 | animate |
99
- | color, palette, contrast, OKLCH, 配色, 颜色 | colorize |
100
- | typography, font, type scale, 字体, 排版 | typeset |
101
- | layout, spacing, grid, alignment, 布局, 间距 | layout |
102
- | tone down, too loud, 太花, 视觉噪音 | quieter |
103
- | too bland, bolder, more personality, 太平淡 | bolder |
104
- | simplify, strip, too complex, cognitive load, 太复杂 | distill |
105
- | polish, micro-adjust, pixel perfect, 打磨 | polish |
106
- | copy, labels, error messages, UX writing, 文案 | clarify |
107
- | responsive, mobile, breakpoints, 适配 | adapt |
108
- | performance, loading, bundle, jank, 性能 | optimize |
109
- | edge cases, error states, i18n, overflow, 边界 | harden |
110
- | onboarding, first-run, empty state, 引导 | onboard |
111
- | delight, personality, joy, memorable, 趣味 | delight |
112
- | extraordinary, push limits, 炫酷, 极限 | overdrive |
113
- | plan UX, wireframe, information architecture, 规划 | shape |
114
- | variants, compare styles, multi-style, 多风格 | explore |
115
- | PRODUCT.md, brand definition, 品牌定义 | teach |
116
- | DESIGN.md, design documentation, 设计文档 | document |
117
- | pull tokens, extract components, 提取组件 | extract |
118
- | browser iteration, 实时迭代 | live |
119
-
120
- ### Layer 2: Concrete build task Direct craft
121
-
122
- Layer 1 missed, but intent is "build/create specific thing":
123
- - Has specific file path or target
124
- - Has detailed visual specs (layout, style, palette)
125
- - Has reference material
126
-
127
- → Route to **craft** (Direct)
128
-
129
- ### Layer 3: Project intent → Chain
130
-
131
- Layer 1+2 missed, broad project direction:
99
+ | review, check UX, score, heuristic, evaluate usability | critique |
100
+ | audit, a11y, accessibility, technical check, performance audit, code quality | audit |
101
+ | add animation, motion, transitions, micro-interactions | animate |
102
+ | color, palette, OKLCH, contrast, color scheme | colorize |
103
+ | font, typography, type scale, line height, font pairing | typeset |
104
+ | layout, spacing, grid, alignment, visual hierarchy | layout |
105
+ | too loud, tone down, visual noise, make it simpler, too busy | quieter |
106
+ | too bland, bolder, more personality, stronger, more contrast | bolder |
107
+ | too complex, simplify, strip, remove clutter, cognitive load | distill |
108
+ | polish, fine-tune, pixel perfect, final pass, refine details | polish |
109
+ | copy, labels, error messages, UX writing, microcopy, CTAs | clarify |
110
+ | responsive, mobile, adapt, breakpoints, touch targets | adapt |
111
+ | performance, loading, bundle, jank, speed, rendering | optimize |
112
+ | edge cases, error states, i18n, overflow, empty state hardening | harden |
113
+ | onboarding, first-run, empty state, activation, progressive disclosure | onboard |
114
+ | fun, surprise, personality, memorable, joy, delight | delight |
115
+ | extraordinary, push limits, ambitious effects, cutting-edge | overdrive |
116
+ | plan UX, wireframe, information architecture, visual direction | shape |
117
+ | multi-style, variants, compare styles, style comparison | explore |
118
+ | brand definition, PRODUCT.md, product context | teach |
119
+ | extract design, DESIGN.md, document design system | document |
120
+ | pull tokens, extract components, design system extraction | extract |
121
+ | real-time, browser iteration, live editing | live |
122
+
123
+ ### Layer 2: Project intentChain
124
+
125
+ Layer 1 did not match. Check for chain-level keywords — even if the prompt also contains a specific target/path, chain matching takes priority.
132
126
 
133
127
  | Pattern | Chain |
134
128
  |---------|-------|
135
- | create, build, new, from scratch | build |
136
- | redesign, rethink, restyle | redesign |
137
- | improve, iterate, better | improve |
138
- | enhance, visual upgrade | enhance |
139
- | launch, deploy, ship, production-ready | launch |
140
- | harden, production, edge cases | harden |
141
- | design system, tokens, design spec | foundation |
142
- | live, browser | live |
129
+ | new, create, build, from scratch, start fresh | build |
130
+ | redo, redesign, rethink, restyle, overhaul, revamp | redesign |
131
+ | improve, iterate, better, refine overall | improve |
132
+ | enhance, visual upgrade, level up | enhance |
133
+ | launch, deploy, ship, production-ready, go live | launch |
134
+ | harden, production-harden, edge cases | harden |
135
+ | design system, tokens, design foundation, design infrastructure | foundation |
136
+ | real-time, live, browser | live |
143
137
 
144
138
  Ambiguous + no `-y` → `request_user_input`.
145
139
 
140
+ ### Layer 3: Concrete build task → Direct craft
141
+
142
+ Layer 1+2 both did not match, but intent is to build/create a specific thing:
143
+ - Contains a specific file path or target (`d:\path`, `src/pages/`, `index.html`)
144
+ - Contains detailed visual specs (layout, style, color scheme)
145
+ - Contains reference material (`based on...`, `like...`, `similar to...`)
146
+
147
+ → Route to **craft** (Direct)
148
+
146
149
  <invariants>
147
150
  1. Prerequisites before any design work — never skip context loading or register detection
148
151
  2. Read workflow file before execution — never execute a command without loading its .md
@@ -161,23 +164,23 @@ Before reading any command workflow:
161
164
  ## Direct Execution
162
165
 
163
166
  1. Prerequisites ✓
164
- 2. **显示执行信息**:
167
+ 2. **Display execution info**:
165
168
  ```
166
169
  ── Command: {command} ────────────────────
167
170
  Category: {category} | Target: {target}
168
171
  ─────────────────────────────────────────
169
172
  ```
170
173
  3. Read `~/.maestro/workflows/impeccable/{command}.md`
171
- 4. **TodoWrite 跟踪**:按 workflow 文件中的主要阶段创建 todo
172
- - 格式:`[{command}] {phase description}`
173
- - 每个阶段完成后立即标记 completed
174
+ 4. **Progress tracking**: create todo items for each major phase in the workflow file
175
+ - Format: `[{command}] {phase description}`
176
+ - Mark each phase completed immediately upon finishing
174
177
  5. Follow workflow file instructions
175
178
  6. Post: suggest logical next command (teach→shape, shape→craft, craft→critique, etc.)
176
179
 
177
180
  ## Chain Execution
178
181
 
179
182
  1. Prerequisites ✓
180
- 2. **显示执行链**:解析 chain 定义,输出完整步骤预览:
183
+ 2. **Display chain preview**: parse chain definition, output full step preview:
181
184
  ```
182
185
  ── Chain: build ──────────────────────────
183
186
  1. teach (conditional: PRODUCT.md missing)
@@ -191,31 +194,31 @@ Before reading any command workflow:
191
194
  ─────────────────────────────────────────
192
195
  Target: {target}
193
196
  ```
194
- - `◆` 标记 quality gate 步骤,显示阈值
195
- - `↺` 标记 refine loop,显示最大循环次数
196
- - conditional 步骤注明触发条件
197
- - 跳过的 conditional 步骤标记 `(skipped)`
197
+ - `◆` marks quality gate steps with threshold
198
+ - `↺` marks refine loop with max iteration count
199
+ - Conditional steps show trigger condition
200
+ - Skipped conditional steps marked `(skipped)`
198
201
  3. Create session: `.workflow/.maestro/ui-craft-{YYYYMMDD-HHmmss}/status.json`
199
202
  ```json
200
203
  { "chain_type": "...", "target": "...", "steps": [...], "current_step": 0,
201
204
  "gate_history": [], "loop_count": 0, "status": "running" }
202
205
  ```
203
- 4. **TodoWrite 初始化**:为 chain 所有步骤创建 todo
204
- - 每步一项,格式:`[chain] step N: {command} — {description}`
205
- - conditional 步骤若跳过,立即标记 completed
206
- - quality gate 步骤标注阈值:`[chain] step 5: critique ◆ gate ≥26/40`
206
+ 4. **Init tracking**: create todo items for all chain steps
207
+ - One item per step, format: `[chain] step N: {command} — {description}`
208
+ - If conditional step is skipped, immediately mark completed
209
+ - Quality gate steps include threshold: `[chain] step 5: critique ◆ gate ≥26/40`
207
210
  5. For each step:
208
211
  - Read `~/.maestro/workflows/impeccable/{command}.md` → execute
209
- - **步骤开始**:TodoWrite 标记当前步骤 in_progress
210
- - **步骤完成**:TodoWrite 标记 completed + update status.json (`current_step`, step `status`)
211
- - **步骤失败**:TodoWrite 标记 completed(with note) + 记录原因
212
+ - **Step start**: mark current step in_progress
213
+ - **Step done**: mark completed + update status.json (`current_step`, step `status`)
214
+ - **Step failed**: mark completed (with note) + record reason
212
215
  6. **Quality gate** (critique/audit steps):
213
216
  - Parse score: critique `**Total** | | **N/40**`, audit `**Total** | | **N/20**`
214
217
  - Count `[P0]` / `[P1]` tags
215
218
  - Pass: score ≥ threshold AND P0 == 0 → advance
216
219
  - Fail: collect suggested commands from findings → execute → re-gate
217
220
  - Max loops exceeded → force advance with warning
218
- - TodoWrite:gate 结果记入当前步骤备注(score, P0/P1 count, pass/fail
221
+ - Record gate result in current step notes (score, P0/P1 count, pass/fail)
219
222
  7. Final report: scores + trend + commands executed
220
223
 
221
224
  ## Resume
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: maestro-plan
3
- description: Plan phase execution with exploration and verification
3
+ description: Use when creating, revising, or verifying an execution plan for a phase or task
4
4
  argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"<phase> [--dir <path>] [--gaps] [--spec SPEC-xxx] [--collab]\""
5
5
  allowed-tools: spawn_agents_on_csv, Read, Write, Edit, Bash, Glob, Grep, request_user_input
6
6
  ---
@@ -8,13 +8,64 @@ allowed-tools: spawn_agents_on_csv, Read, Write, Edit, Bash, Glob, Grep, request
8
8
  <purpose>
9
9
  Wave-based planning via `spawn_agents_on_csv`. Wave 1 explores codebase in parallel across multiple angles, Wave 2 generates verified execution plan consuming all exploration findings.
10
10
 
11
- Supports: Create (default), Revise (`--revise`), Check (`--check`), Gaps (`--gaps`).
11
+ Supports: Create (default), Revise (`--revise`), Check (`--check`), Gaps (`--gaps`), TDD (`--tdd`).
12
12
  </purpose>
13
13
 
14
+ <tdd_mode>
15
+
16
+ ## TDD Mode (`--tdd`)
17
+
18
+ When `--tdd` is active, the planning agent in Wave 2 decomposes each behavior into RED-GREEN-REFACTOR triplets.
19
+
20
+ ### Iron Law
21
+
22
+ **NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST.** Write code before the test? Delete it. Start over.
23
+
24
+ ### Task Chain Structure
25
+
26
+ For each behavior B:
27
+ - **TASK-{N}a (RED)**: Write failing test. Verify it FAILS (not errors). type=test, tdd_phase=red.
28
+ - **TASK-{N}b (GREEN)**: Write minimal code to pass. Verify ALL tests pass. type=feature, tdd_phase=green, depends_on=[TASK-{N}a].
29
+ - **TASK-{N}c (REFACTOR)**: Clean up. Keep tests green. No new behavior. type=refactor, tdd_phase=refactor, depends_on=[TASK-{N}b]. Skip if GREEN code already clean.
30
+
31
+ ### Wave Assignment
32
+ ```
33
+ Wave 1: TASK-1a, TASK-2a (RED — parallel if independent)
34
+ Wave 2: TASK-1b, TASK-2b (GREEN — parallel)
35
+ Wave 3: TASK-1c, TASK-2c (REFACTOR — parallel)
36
+ ```
37
+ Within a group: `{N}a → {N}b → {N}c` (strict dependency).
38
+
39
+ ### plan.json Output
40
+ ```json
41
+ { "tdd_mode": true, "tdd_groups": [{ "group": 1, "behavior": "...", "tasks": ["TASK-1a","TASK-1b","TASK-1c"] }] }
42
+ ```
43
+ Standard plan.json + .task/TASK-*.json — consumable by maestro-execute without modification.
44
+
45
+ ### Execution Enforcement
46
+ - RED task: verify test exists AND fails. If passes → BLOCKED "wrong test".
47
+ - GREEN task: verify ALL tests pass. If RED test still fails → BLOCKED.
48
+ - REFACTOR task: verify ALL tests still pass. If fails → undo.
49
+
50
+ ### Red Flags — These Thoughts Mean STOP
51
+ - "Too simple to need TDD" / "I'll write tests after" / "Let me explore first, then add tests"
52
+ - "Tests after achieve the same goals" / "TDD will slow me down"
53
+ All mean: **follow the cycle anyway**.
54
+
55
+ ### Rationalization Table
56
+ | Excuse | Reality |
57
+ |--------|---------|
58
+ | "Too simple to test" | Simple code breaks. Test takes 30 seconds. |
59
+ | "I'll test after" | Tests passing immediately prove nothing. |
60
+ | "Need to explore first" | Fine. Throw away exploration, start fresh with TDD. |
61
+ | "Test hard = design unclear" | Listen to the test. Hard to test = hard to use. |
62
+
63
+ </tdd_mode>
64
+
14
65
  <context>
15
66
  $ARGUMENTS — phase number/text and optional flags.
16
67
 
17
- **Flags**: `-y` (auto), `-c N` (concurrency, default 4), `--continue` (resume), `--dir <path>`, `--gaps` (issue-linked), `--spec SPEC-xxx`, `--collab`, `--revise`, `--check`
68
+ **Flags**: `-y` (auto), `-c N` (concurrency, default 4), `--continue` (resume), `--dir <path>`, `--gaps` (issue-linked), `--spec SPEC-xxx`, `--collab`, `--revise`, `--check`, `--tdd` (RED-GREEN-REFACTOR task chains)
18
69
 
19
70
  **Scope routing** (priority): --dir → from parent artifact; no args → milestone; digit → phase; text → adhoc/standalone.
20
71
 
@@ -68,7 +119,7 @@ S_RESUME → S_CHECK WHEN: W2 done, check pending
68
119
 
69
120
  S_CONTEXT → S_CSV_GEN DO: load context.md, conclusions.json, specs, wiki, codebase docs
70
121
 
71
- S_CSV_GEN → S_WAVE_1 DO: determine exploration angles, generate tasks.csv, user validates (skip -y)
122
+ S_CSV_GEN → S_WAVE_1 DO: pre-flight (`maestro collab preflight --phase N`; exit 1 → warn + ask), determine exploration angles, generate tasks.csv, user validates (skip -y)
72
123
 
73
124
  S_WAVE_1 → S_WAVE_2 DO: spawn parallel explorations, merge results, build prev_context
74
125
 
@@ -130,7 +181,15 @@ Collision detection against same-milestone plans.
130
181
 
131
182
  <success_criteria>
132
183
  - [ ] Parallel explorations + sequential planning via spawn_agents_on_csv
133
- - [ ] plan.json + TASK files with read_first and grep-verifiable convergence criteria
134
- - [ ] Plan confidence scored, readiness gate checked, collision detected
135
- - [ ] PLN artifact registered, issues linked if --gaps
184
+ - [ ] plan.json with summary, approach, task_ids, waves (with phase labels), confidence section
185
+ - [ ] .task/TASK-*.json with read_first[] (file being modified + source of truth files)
186
+ - [ ] Every task has convergence.criteria[] with grep-verifiable conditions (no subjective language)
187
+ - [ ] Every task action and implementation contain concrete values (no "align X with Y")
188
+ - [ ] Plan confidence scored with 5-dimension factor model
189
+ - [ ] Readiness gate checked before collision detection
190
+ - [ ] Pressure pass completed on highest-complexity task
191
+ - [ ] Collision detection against same-milestone plans (non-blocking)
192
+ - [ ] Plan-checker passed (or minor issues acknowledged, max 3 iterations)
193
+ - [ ] PLN artifact registered in state.json
194
+ - [ ] If --gaps: issues linked bidirectionally (task_refs[], task_plan_dir in issues.jsonl)
136
195
  </success_criteria>
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: maestro-ralph
3
- description: Adaptive lifecycle engine -- infer state, build command chain
3
+ description: Use when the optimal command sequence is unclear and needs automated state-based determination
4
4
  argument-hint: "\"intent\" [-y] | status | continue | execute"
5
5
  allowed-tools: spawn_agents_on_csv, Read, Write, Edit, Bash, Glob, Grep, request_user_input
6
6
  ---
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: maestro-verify
3
- description: Verify goals with must-have checks and test coverage validation
3
+ description: Use after execution to verify goals are actually achieved with evidence-based structural checks
4
4
  argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"<phase> [--skip-tests] [--skip-antipattern]\""
5
5
  allowed-tools: spawn_agents_on_csv, Read, Write, Edit, Bash, Glob, Grep, request_user_input
6
6
  ---
@@ -10,6 +10,18 @@ Wave-based 3-layer Goal-Backward verification using `spawn_agents_on_csv`.
10
10
  Wave 1 (truth + artifact existence) -> Wave 2 (substance + wiring) -> Wave 3 (anti-pattern + Nyquist audit).
11
11
 
12
12
  **Core principle**: Task completion != Goal achievement. A task marked complete may contain stubs/placeholders. This verifier checks that goals are actually achieved.
13
+
14
+ ## Iron Law
15
+
16
+ **NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE IN THIS MESSAGE.** Before any success claim: IDENTIFY what command proves it → RUN it fresh → READ full output → VERIFY it confirms the claim → ONLY THEN make the claim.
17
+
18
+ ## Forbidden Wording
19
+ BANNED: "Should work now", "Probably passes", "Seems correct", "Looks good", "I'm confident that...", any satisfaction BEFORE running verification. Replace with evidence: `"Tests pass: 42/42 green (exit 0)"`.
20
+
21
+ ## Red Flags — These Thoughts Mean STOP
22
+ - "I just wrote this code, it definitely works" / "The changes are too small to break anything"
23
+ - "I already verified this earlier" / "The agent said it's done"
24
+ All mean: **run verification command NOW, read output, then report**.
13
25
  </purpose>
14
26
 
15
27
  <context>
@@ -182,12 +194,17 @@ Protocol: read before analysis, append-only, dedup by type+key.
182
194
  </error_codes>
183
195
 
184
196
  <success_criteria>
197
+ - [ ] Must-haves established from convergence.criteria + success_criteria + derived behaviors
185
198
  - [ ] All 3 waves executed (with skip flags respected)
186
199
  - [ ] verification.json + context.md produced
187
200
  - [ ] validation.json produced (if Nyquist ran)
188
201
  - [ ] Fix plans generated for gap clusters
189
202
  - [ ] Issues auto-created for gaps + blocker anti-patterns
203
+ - [ ] Post-verify knowledge inquiry triggered when applicable
190
204
  - [ ] Phase index.json updated with verification status
205
+ - [ ] VRF artifact registered in state.json
206
+ - [ ] Gap-fix closure loop documented: gaps → plan --gaps → execute → verify (re-run)
207
+ - [ ] Next step routed (quality-review if passed, plan --gaps if gaps, quality-auto-test if low coverage)
191
208
  - [ ] discoveries.ndjson append-only throughout
192
209
  </success_criteria>
193
210
  </output>
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: quality-auto-test
3
- description: Auto-generate and run tests from specs or coverage gaps
3
+ description: Use when test coverage needs automated expansion or existing tests need iterative convergence
4
4
  argument-hint: "<phase> [-y] [-c N] [--max-iter N] [--layer L0-L3] [--dry-run] [--re-run]"
5
5
  allowed-tools: spawn_agents_on_csv, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion
6
6
  ---
@@ -94,11 +94,15 @@ S_PARSE:
94
94
  -> S_SOURCE DO: resolve phase dir, detect route (resume/re-run/spec/gap/code)
95
95
 
96
96
  S_SOURCE:
97
- -> S_INFRA DO: extract scenarios per route, normalize to unified format
97
+ -> S_INFRA DO: extract scenarios per route, normalize to unified format, integrate quality artifacts
98
98
  Route A (spec): Parse REQ-*.md acceptance criteria, classify layers, generate fixtures
99
99
  Route B (gap): Read verification/coverage gaps, classify files by type
100
100
  Route C (code): Explore module boundaries, API endpoints, integration points
101
101
 
102
+ **Cross-artifact integration** (all routes, after primary extraction):
103
+ - **Review findings**: Query state.json for type=review artifacts on same phase. Extract critical/high findings → additional scenarios marked `source: "review_finding"`. If review verdict=="BLOCK" and these tests fail, suggest quality-debug.
104
+ - **Debug root causes**: Query state.json for type=debug artifacts on same phase. Generate regression test scenarios from confirmed root causes → marked `source: "debug_root_cause"`.
105
+
102
106
  S_INFRA:
103
107
  -> S_CSV_GEN DO: detect framework, read 2-3 existing tests, build infrastructure_hints
104
108
 
@@ -244,12 +248,18 @@ Protocol: read before writing tests, append-only, dedup by type+key.
244
248
 
245
249
  <success_criteria>
246
250
  - [ ] Route auto-selected from project state (spec/gap/code)
251
+ - [ ] Review findings and debug root causes integrated as additional test scenarios
247
252
  - [ ] Layers executed in order with fail-fast on critical
248
253
  - [ ] Test writing + diagnosis parallelized via spawn_agents_on_csv
249
254
  - [ ] Cross-layer context propagation via prev_context
250
255
  - [ ] Iteration engine: inner test_defect fix, outer strategy adjust
251
256
  - [ ] Test confidence scored per iteration (5-dimension model)
257
+ - [ ] Convergence check includes confidence >= 60% alongside pass_rate threshold
258
+ - [ ] Pressure pass completed on highest-pass-rate layer before completion
252
259
  - [ ] state.json, report.json, reflection-log.md written
253
- - [ ] If spec: traceability.md; if failures: issues auto-created
260
+ - [ ] TST artifact registered in state.json
261
+ - [ ] If spec: traceability.md written; if failures: issues auto-created in issues.jsonl
262
+ - [ ] If gap source: validation.json gaps updated (MISSING→COVERED)
263
+ - [ ] Next step routed (converged → verify, bugs → debug, >80% → quality-test, <80% → debug)
254
264
  </success_criteria>
255
265
  </output>