maestro-flow-one 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +173 -0
  3. package/bin/maestro-flow.js +730 -0
  4. package/claude/maestro-flow/SKILL.md +239 -0
  5. package/claude/maestro-flow/chains/templates.json +256 -0
  6. package/claude/maestro-flow/commands/learn/decompose.md +176 -0
  7. package/claude/maestro-flow/commands/learn/follow.md +167 -0
  8. package/claude/maestro-flow/commands/learn/investigate.md +221 -0
  9. package/claude/maestro-flow/commands/learn/retro.md +303 -0
  10. package/claude/maestro-flow/commands/learn/second-opinion.md +167 -0
  11. package/claude/maestro-flow/commands/lifecycle/amend.md +300 -0
  12. package/claude/maestro-flow/commands/lifecycle/analyze.md +126 -0
  13. package/claude/maestro-flow/commands/lifecycle/brainstorm.md +100 -0
  14. package/claude/maestro-flow/commands/lifecycle/composer.md +354 -0
  15. package/claude/maestro-flow/commands/lifecycle/execute.md +114 -0
  16. package/claude/maestro-flow/commands/lifecycle/fork.md +86 -0
  17. package/claude/maestro-flow/commands/lifecycle/init.md +78 -0
  18. package/claude/maestro-flow/commands/lifecycle/learn.md +140 -0
  19. package/claude/maestro-flow/commands/lifecycle/link-coordinate.md +71 -0
  20. package/claude/maestro-flow/commands/lifecycle/merge.md +61 -0
  21. package/claude/maestro-flow/commands/lifecycle/overlay.md +178 -0
  22. package/claude/maestro-flow/commands/lifecycle/plan.md +138 -0
  23. package/claude/maestro-flow/commands/lifecycle/player.md +404 -0
  24. package/claude/maestro-flow/commands/lifecycle/quick.md +56 -0
  25. package/claude/maestro-flow/commands/lifecycle/roadmap.md +164 -0
  26. package/claude/maestro-flow/commands/lifecycle/ui-design.md +93 -0
  27. package/claude/maestro-flow/commands/lifecycle/update.md +176 -0
  28. package/claude/maestro-flow/commands/lifecycle/verify.md +90 -0
  29. package/claude/maestro-flow/commands/manage/codebase-rebuild.md +75 -0
  30. package/claude/maestro-flow/commands/manage/codebase-refresh.md +57 -0
  31. package/claude/maestro-flow/commands/manage/harvest.md +94 -0
  32. package/claude/maestro-flow/commands/manage/issue-discover.md +77 -0
  33. package/claude/maestro-flow/commands/manage/issue.md +73 -0
  34. package/claude/maestro-flow/commands/manage/knowhow-capture.md +193 -0
  35. package/claude/maestro-flow/commands/manage/knowhow.md +77 -0
  36. package/claude/maestro-flow/commands/manage/learn.md +67 -0
  37. package/claude/maestro-flow/commands/manage/status.md +51 -0
  38. package/claude/maestro-flow/commands/manage/wiki.md +62 -0
  39. package/claude/maestro-flow/commands/milestone/audit.md +68 -0
  40. package/claude/maestro-flow/commands/milestone/complete.md +75 -0
  41. package/claude/maestro-flow/commands/milestone/release.md +96 -0
  42. package/claude/maestro-flow/commands/quality/auto-test.md +124 -0
  43. package/claude/maestro-flow/commands/quality/debug.md +115 -0
  44. package/claude/maestro-flow/commands/quality/refactor.md +55 -0
  45. package/claude/maestro-flow/commands/quality/retrospective.md +78 -0
  46. package/claude/maestro-flow/commands/quality/review.md +108 -0
  47. package/claude/maestro-flow/commands/quality/sync.md +51 -0
  48. package/claude/maestro-flow/commands/quality/test.md +103 -0
  49. package/claude/maestro-flow/commands/spec/add.md +49 -0
  50. package/claude/maestro-flow/commands/spec/load.md +51 -0
  51. package/claude/maestro-flow/commands/spec/remove.md +51 -0
  52. package/claude/maestro-flow/commands/spec/setup.md +51 -0
  53. package/claude/maestro-flow/commands/wiki/connect.md +62 -0
  54. package/claude/maestro-flow/commands/wiki/digest.md +69 -0
  55. package/codex/maestro-flow/SKILL.md +505 -0
  56. package/codex/maestro-flow/chains/templates.json +256 -0
  57. package/codex/maestro-flow/commands/learn/decompose.md +113 -0
  58. package/codex/maestro-flow/commands/learn/follow.md +83 -0
  59. package/codex/maestro-flow/commands/learn/investigate.md +83 -0
  60. package/codex/maestro-flow/commands/learn/retro.md +83 -0
  61. package/codex/maestro-flow/commands/learn/second-opinion.md +86 -0
  62. package/codex/maestro-flow/commands/lifecycle/amend.md +300 -0
  63. package/codex/maestro-flow/commands/lifecycle/analyze.md +483 -0
  64. package/codex/maestro-flow/commands/lifecycle/brainstorm.md +397 -0
  65. package/codex/maestro-flow/commands/lifecycle/composer.md +213 -0
  66. package/codex/maestro-flow/commands/lifecycle/execute.md +318 -0
  67. package/codex/maestro-flow/commands/lifecycle/fork.md +98 -0
  68. package/codex/maestro-flow/commands/lifecycle/init.md +134 -0
  69. package/codex/maestro-flow/commands/lifecycle/learn.md +80 -0
  70. package/codex/maestro-flow/commands/lifecycle/link-coordinate.md +257 -0
  71. package/codex/maestro-flow/commands/lifecycle/merge.md +69 -0
  72. package/codex/maestro-flow/commands/lifecycle/overlay.md +119 -0
  73. package/codex/maestro-flow/commands/lifecycle/plan.md +460 -0
  74. package/codex/maestro-flow/commands/lifecycle/player.md +323 -0
  75. package/codex/maestro-flow/commands/lifecycle/quick.md +124 -0
  76. package/codex/maestro-flow/commands/lifecycle/roadmap.md +468 -0
  77. package/codex/maestro-flow/commands/lifecycle/ui-design.md +135 -0
  78. package/codex/maestro-flow/commands/lifecycle/update.md +176 -0
  79. package/codex/maestro-flow/commands/lifecycle/verify.md +468 -0
  80. package/codex/maestro-flow/commands/manage/codebase-rebuild.md +347 -0
  81. package/codex/maestro-flow/commands/manage/codebase-refresh.md +66 -0
  82. package/codex/maestro-flow/commands/manage/harvest.md +91 -0
  83. package/codex/maestro-flow/commands/manage/issue-discover.md +431 -0
  84. package/codex/maestro-flow/commands/manage/issue.md +75 -0
  85. package/codex/maestro-flow/commands/manage/knowhow-capture.md +110 -0
  86. package/codex/maestro-flow/commands/manage/knowhow.md +95 -0
  87. package/codex/maestro-flow/commands/manage/learn.md +137 -0
  88. package/codex/maestro-flow/commands/manage/status.md +76 -0
  89. package/codex/maestro-flow/commands/manage/wiki.md +55 -0
  90. package/codex/maestro-flow/commands/milestone/audit.md +87 -0
  91. package/codex/maestro-flow/commands/milestone/complete.md +91 -0
  92. package/codex/maestro-flow/commands/milestone/release.md +70 -0
  93. package/codex/maestro-flow/commands/quality/auto-test.md +547 -0
  94. package/codex/maestro-flow/commands/quality/debug.md +334 -0
  95. package/codex/maestro-flow/commands/quality/refactor.md +151 -0
  96. package/codex/maestro-flow/commands/quality/retrospective.md +292 -0
  97. package/codex/maestro-flow/commands/quality/review.md +364 -0
  98. package/codex/maestro-flow/commands/quality/sync.md +111 -0
  99. package/codex/maestro-flow/commands/quality/test.md +498 -0
  100. package/codex/maestro-flow/commands/spec/add.md +101 -0
  101. package/codex/maestro-flow/commands/spec/load.md +77 -0
  102. package/codex/maestro-flow/commands/spec/remove.md +69 -0
  103. package/codex/maestro-flow/commands/spec/setup.md +75 -0
  104. package/codex/maestro-flow/commands/wiki/connect.md +73 -0
  105. package/codex/maestro-flow/commands/wiki/digest.md +87 -0
  106. package/package.json +24 -0
@@ -0,0 +1,334 @@
1
+ ---
2
+ name: quality-debug
3
+ description: Hypothesis-driven debugging via CSV wave pipeline. Wave 1 generates parallel hypotheses, Wave 2 attempts parallel fixes on confirmed hypotheses. Replaces quality-debug command.
4
+ argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"[bug description] [--from-uat <phase>] [--parallel]\""
5
+ allowed-tools: spawn_agents_on_csv, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion
6
+ ---
7
+
8
+ <purpose>
9
+ Wave-based hypothesis-driven debugging using `spawn_agents_on_csv`. Wave 1 explores hypotheses in parallel, Wave 2 attempts fixes on confirmed hypotheses in parallel.
10
+
11
+ **Core workflow**: Gather Symptoms -> Generate Hypotheses -> Parallel Investigation -> Parallel Fix Attempts -> Unify Results
12
+
13
+ ```
14
+ +---------------------------------------------------------------------------+
15
+ | DEBUG CSV WAVE WORKFLOW |
16
+ +---------------------------------------------------------------------------+
17
+ | |
18
+ | Phase 1: Input Resolution -> CSV |
19
+ | +-- Parse mode: standalone / --from-uat / --parallel |
20
+ | +-- Gather symptoms (interactive) or load UAT gaps (pre-filled) |
21
+ | +-- Cluster gaps by component (if from-uat) |
22
+ | +-- Generate 3-5 hypotheses per cluster/issue |
23
+ | +-- Generate tasks.csv with one row per hypothesis |
24
+ | +-- User validates hypothesis breakdown (skip if -y) |
25
+ | |
26
+ | Phase 2: Wave Execution Engine |
27
+ | +-- Wave 1: Hypothesis Investigation (parallel) |
28
+ | | +-- Each agent investigates one hypothesis |
29
+ | | +-- Agent searches code, logs evidence, confirms/refutes |
30
+ | | +-- Discoveries shared via board (code patterns, root causes) |
31
+ | | +-- Results: evidence_for + evidence_against per hypothesis |
32
+ | +-- Wave 2: Fix Attempts (parallel, confirmed hypotheses only) |
33
+ | | +-- Filter: only hypotheses with status=confirmed from wave 1 |
34
+ | | +-- Each agent attempts fix for its confirmed root cause |
35
+ | | +-- Agent applies fix, runs verification, logs result |
36
+ | | +-- Results: fix_applied + verified per fix task |
37
+ | +-- discoveries.ndjson shared across all waves (append-only) |
38
+ | |
39
+ | Phase 3: Results Aggregation |
40
+ | +-- Export results.csv with all investigation + fix outcomes |
41
+ | +-- Generate context.md with diagnosis summary |
42
+ | +-- Update UAT gaps with diagnosis (if --from-uat) |
43
+ | +-- Update issues.jsonl with diagnosis results |
44
+ | +-- Display summary with next steps |
45
+ | |
46
+ +---------------------------------------------------------------------------+
47
+ ```
48
+ </purpose>
49
+
50
+ <context>
51
+ ```bash
52
+ $quality-debug "Login button throws 500 error on click"
53
+ $quality-debug -y "JWT token not refreshed --from-uat 3"
54
+ $quality-debug -c 4 "Navigation crash --from-uat 3 --parallel"
55
+ $quality-debug -y "--from-auto-test 3"
56
+ $quality-debug --continue "20260318-debug-P3-jwt-expiry"
57
+ ```
58
+
59
+ **Flags**:
60
+ - `-y, --yes`: Skip all confirmations (auto mode)
61
+ - `-c, --concurrency N`: Max concurrent agents within each wave (default: 5)
62
+ - `--continue`: Resume existing session
63
+ - `--from-uat <phase>`: Load gaps from UAT uat.md as pre-filled symptoms
64
+ - `--from-auto-test <phase>`: Load code_defect failures from auto-test report.json as pre-filled symptoms
65
+ - `--parallel`: One agent per gap cluster (implies from-uat or from-auto-test)
66
+
67
+ When `--yes` or `-y`: Auto-confirm hypothesis selection, skip interactive symptom gathering (require bug description in args), use defaults for mode detection.
68
+
69
+ **Output Directory**: `.workflow/.csv-wave/{session-id}/`
70
+ **Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report)
71
+ </context>
72
+
73
+ <csv_schema>
74
+
75
+ ### tasks.csv (Master State)
76
+
77
+ ```csv
78
+ id,title,description,hypothesis,evidence_for,evidence_against,deps,context_from,wave,status,findings,fix_applied,verified,error
79
+ "H1","Null pointer in login handler","Investigate whether login handler crashes due to null user object after failed DB lookup","User object is null when DB returns empty result; login.ts:42 dereferences without null check","","","","","1","","","","",""
80
+ "H2","Missing error boundary","Investigate whether unhandled promise rejection in auth middleware propagates to 500","Auth middleware catches DB errors but not validation errors; middleware.ts:78 has no catch block","","","","","1","","","","",""
81
+ "H3","Stale session token","Investigate whether expired session tokens bypass refresh logic","Session refresh only triggers on 403 but server returns 401 for expired tokens; session.ts:15","","","","","1","","","","",""
82
+ "FIX-H1","Fix null pointer in login","Apply null check before user object dereference in login handler","","","","H1","H1","2","","","","",""
83
+ "FIX-H3","Fix session token refresh","Update refresh trigger to also handle 401 status codes","","","","H3","H3","2","","","","",""
84
+ ```
85
+
86
+ **Columns**:
87
+
88
+ | Column | Phase | Description |
89
+ |--------|-------|-------------|
90
+ | `id` | Input | Unique task identifier: `H{N}` for hypotheses (wave 1), `FIX-H{N}` for fixes (wave 2) |
91
+ | `title` | Input | Short hypothesis or fix title |
92
+ | `description` | Input | Detailed investigation/fix instructions |
93
+ | `hypothesis` | Input | The hypothesis being tested (wave 1) or empty (wave 2) |
94
+ | `evidence_for` | Output | Evidence supporting the hypothesis |
95
+ | `evidence_against` | Output | Evidence refuting the hypothesis |
96
+ | `deps` | Input | Semicolon-separated dependency task IDs (wave 2 depends on wave 1) |
97
+ | `context_from` | Input | Semicolon-separated task IDs whose findings this task needs |
98
+ | `wave` | Computed | Wave number (1 = investigation, 2 = fix attempt) |
99
+ | `status` | Output | `pending` -> `confirmed` / `refuted` / `inconclusive` / `fixed` / `fix_failed` / `skipped` |
100
+ | `findings` | Output | Key findings summary (max 500 chars) |
101
+ | `fix_applied` | Output | Description of fix applied (wave 2 only) |
102
+ | `verified` | Output | `true` / `false` -- whether fix was verified to work (wave 2 only) |
103
+ | `error` | Output | Error message if failed |
104
+
105
+ ### Per-Wave CSV (Temporary)
106
+
107
+ Each wave generates `wave-{N}.csv` with extra `prev_context` column.
108
+
109
+ ### Output Artifacts
110
+
111
+ | File | Purpose | Lifecycle |
112
+ |------|---------|-----------|
113
+ | `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave |
114
+ | `wave-{N}.csv` | Per-wave input (temporary) | Created before wave, deleted after |
115
+ | `results.csv` | Final export of all task results | Created in Phase 3 |
116
+ | `discoveries.ndjson` | Shared exploration board | Append-only, carries across waves |
117
+ | `context.md` | Human-readable diagnosis report | Created in Phase 3 |
118
+
119
+ ### Session Structure
120
+
121
+ ```
122
+ .workflow/.csv-wave/{YYYYMMDD}-debug-P{N}-{slug}/
123
+ +-- tasks.csv
124
+ +-- results.csv
125
+ +-- discoveries.ndjson
126
+ +-- context.md
127
+ +-- wave-{N}.csv (temporary)
128
+ ```
129
+ </csv_schema>
130
+
131
+ <invariants>
132
+ 1. **Start Immediately**: First action is session initialization, then Phase 1
133
+ 2. **Wave Order is Sacred**: Never execute wave 2 before wave 1 completes and results are merged
134
+ 3. **CSV is Source of Truth**: Master tasks.csv holds all state
135
+ 4. **Context Propagation**: prev_context built from master CSV, not from memory
136
+ 5. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson
137
+ 6. **Skip on Refuted**: Wave 2 fix tasks skip if their hypothesis was refuted or inconclusive
138
+ 7. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged
139
+ 8. **DO NOT STOP**: Continuous execution until all waves complete
140
+ </invariants>
141
+
142
+ <execution>
143
+
144
+ ### Session Initialization
145
+
146
+ ```
147
+ Parse from $ARGUMENTS:
148
+ AUTO_YES ← --yes | -y
149
+ continueMode ← --continue
150
+ maxConcurrency ← --concurrency | -c N (default: 5)
151
+ fromUat ← --from-uat <phase> (default: null)
152
+ fromAutoTest ← --from-auto-test <phase> (default: null)
153
+ parallelMode ← --parallel
154
+ bugDescription ← remaining text after flag removal
155
+
156
+ Derive:
157
+ phaseRef ← fromUat || fromAutoTest || null
158
+ sourceType ← fromAutoTest ? "auto-test" : fromUat ? "uat" : "standalone"
159
+ slug ← bugDescription kebab-cased, max 40 chars
160
+ dateStr ← UTC+8 YYYYMMDD
161
+ sessionId ← phaseRef ? "{dateStr}-debug-P{phaseRef}-{slug}" : "{dateStr}-debug-{slug}"
162
+ sessionFolder ← ".workflow/.csv-wave/{sessionId}"
163
+
164
+ mkdir -p {sessionFolder}
165
+ ```
166
+
167
+ ### Phase 1: Input Resolution -> CSV
168
+
169
+ **Objective**: Parse mode, gather symptoms or load UAT gaps, generate hypotheses, build tasks.csv.
170
+
171
+ **Decomposition Rules**:
172
+
173
+ 1. **Mode detection**:
174
+
175
+ | Condition | Mode |
176
+ |-----------|------|
177
+ | `--from-uat` flag present | from-uat (load gaps from uat.md) |
178
+ | `--from-auto-test` flag present | from-auto-test (load code_defects from report.json) |
179
+ | `--parallel` flag present | parallel (implies from-uat or from-auto-test, one agent per gap cluster) |
180
+ | Neither flag | standalone (gather symptoms interactively) |
181
+
182
+ 2. **Related session discovery**: Query `state.json.artifacts[]` for matching phase+milestone. Extract relevant outputs by type: execute -> .summaries/.task/, review -> review.json (guide hypotheses), debug -> understanding.md (avoid re-investigation), test -> uat.md + .tests/auto-test/report.json.
183
+
184
+ 3. **Symptom collection**:
185
+
186
+ | Mode | Source | Action |
187
+ |------|--------|--------|
188
+ | standalone | User input | Ask 5 questions: expected, actual, errors, timeline, reproduction |
189
+ | from-uat | test artifact's uat.md (via registry) | Parse Gaps section, cluster by component |
190
+ | from-auto-test | test artifact's `.tests/auto-test/report.json` (via registry) | Parse `failures[]` where `classification == "code_defect"`, cluster by target module |
191
+ | parallel | test artifact's uat.md or report.json (via registry) | Same as from-uat/from-auto-test, one investigation per cluster |
192
+
193
+ **from-auto-test specifics**: Each `code_defect` failure provides: `scenario_id`, `req_ref`, `description`, `expected`, `actual`, `fix_suggestion.file`, `fix_suggestion.line`, `fix_suggestion.direction`. Map these to symptoms: expected=failure.expected, actual=failure.actual, location=fix_suggestion.file:line, context=fix_suggestion.direction.
194
+
195
+ 3. **Hypothesis generation**: Per symptom cluster, analyze affected code and generate 3-5 ranked hypotheses (each becomes a wave 1 row).
196
+
197
+ 4. **Fix task generation**: Pre-generate wave 2 fix row per hypothesis (`deps`/`context_from` -> hypothesis ID). Only executes if hypothesis confirmed.
198
+
199
+ 5. **CSV generation**: Hypothesis rows (wave 1) + fix rows (wave 2).
200
+
201
+ **Wave computation**: Simple 2-wave -- all hypothesis tasks = wave 1, all fix tasks = wave 2.
202
+
203
+ **User validation**: Display hypothesis breakdown (skip if AUTO_YES).
204
+
205
+ ### Phase 2: Wave Execution Engine
206
+
207
+ **Objective**: Investigate hypotheses wave-by-wave via spawn_agents_on_csv.
208
+
209
+ #### Wave 1: Hypothesis Investigation (Parallel)
210
+
211
+ 1. Extract wave 1 pending rows from master `tasks.csv` into `wave-1.csv` (no prev_context needed)
212
+ 2. Execute:
213
+
214
+ ```javascript
215
+ spawn_agents_on_csv({
216
+ csv_path: `${sessionFolder}/wave-1.csv`,
217
+ id_column: "id",
218
+ instruction: buildInvestigationInstruction(sessionFolder),
219
+ max_concurrency: maxConcurrency, max_runtime_seconds: 3600,
220
+ output_csv_path: `${sessionFolder}/wave-1-results.csv`,
221
+ output_schema: { id, status: [confirmed|refuted|inconclusive|failed], findings, evidence_for, evidence_against, error }
222
+ })
223
+ ```
224
+
225
+ 3. Merge results into master `tasks.csv`, delete `wave-1.csv`
226
+ 4. **Filter for wave 2**: Mark fix tasks as `skipped` if their hypothesis was `refuted` or `inconclusive`
227
+
228
+ #### Wave 2: Fix Attempts (Parallel, Confirmed Only)
229
+
230
+ 1. If no confirmed hypotheses remain, skip wave 2 entirely
231
+ 2. Extract wave 2 pending rows, build `prev_context` from confirmed wave 1 findings
232
+ 3. Write `wave-2.csv`, then execute:
233
+
234
+ ```javascript
235
+ spawn_agents_on_csv({
236
+ csv_path: `${sessionFolder}/wave-2.csv`,
237
+ id_column: "id",
238
+ instruction: buildFixInstruction(sessionFolder),
239
+ max_concurrency: maxConcurrency, max_runtime_seconds: 3600,
240
+ output_csv_path: `${sessionFolder}/wave-2-results.csv`,
241
+ output_schema: { id, status: [fixed|fix_failed|failed], findings, fix_applied, verified, error }
242
+ })
243
+ ```
244
+
245
+ 4. Merge results into master `tasks.csv`, delete `wave-2.csv`
246
+
247
+ ### Phase 3: Results Aggregation
248
+
249
+ **Objective**: Generate final results and human-readable report.
250
+
251
+ 1. Export final `tasks.csv` as `results.csv`
252
+
253
+ 2. **Generate context.md**: Debug report with summary (mode, hypothesis/confirmed/fixed/verified counts), per-hypothesis results (hypothesis, evidence for/against, findings, status), per-fix results (fix applied, verified, findings), aggregated root causes, and next steps.
254
+
255
+ 3. **UAT update** (if --from-uat): Update `uat.md` gaps with `root_cause`, `fix_direction`, `affected_files` for confirmed hypotheses.
256
+
257
+ 4. **Issue update**: If `issues.jsonl` exists, update matching issues with status `diagnosed`, add `context.suggested_fix` and `context.notes`.
258
+
259
+ 5. **Register artifact** (phase-scoped only): Append to `state.json.artifacts[]` with `type: "debug"`, `id: DBG-NNN`, `depends_on: triggering_review_id || exec_art.id`.
260
+
261
+ 6. **Post-debug Knowledge Inquiry**: Prompt user to capture knowledge when:
262
+ - Recurring root cause pattern detected -> `/spec-add debug`
263
+ - Non-obvious fix strategy used -> `/spec-add learning`
264
+ - Architectural gap identified -> `/spec-add arch`
265
+
266
+ 8. **Next step routing**:
267
+
268
+ | Result | Suggestion |
269
+ |--------|------------|
270
+ | All fixes verified | Run tests: `Skill({ skill: "maestro-flow", args: "--cmd quality-test {phase}" })` |
271
+ | Fixes applied, not verified | Re-verify: `Skill({ skill: "maestro-flow", args: "--cmd maestro-verify {phase}" })` |
272
+ | Confirmed but no fix | Plan fixes: `Skill({ skill: "maestro-flow", args: "--cmd maestro-plan {phase} --gaps" })` |
273
+ | All inconclusive | Resume with more context or manual investigation |
274
+ | From UAT, all diagnosed | `Skill({ skill: "maestro-flow", args: "--cmd quality-test {phase} --auto-fix" })` |
275
+
276
+ 9. Display summary.
277
+
278
+ ### Shared Discovery Board Protocol
279
+
280
+ #### Standard Discovery Types
281
+
282
+ | Type | Dedup Key | Data Schema | Description |
283
+ |------|-----------|-------------|-------------|
284
+ | `code_pattern` | `data.name` | `{name, file, description}` | Reusable code pattern found |
285
+ | `integration_point` | `data.file` | `{file, description, exports[]}` | Module connection point |
286
+ | `convention` | singleton | `{naming, imports, formatting}` | Project code conventions |
287
+ | `blocker` | `data.issue` | `{issue, severity, impact}` | Blocking issue found |
288
+ | `tech_stack` | singleton | `{framework, language, tools[]}` | Technology stack info |
289
+
290
+ #### Domain Discovery Types
291
+
292
+ | Type | Dedup Key | Data Schema | Description |
293
+ |------|-----------|-------------|-------------|
294
+ | `root_cause` | `data.location` | `{location, cause, severity, confidence}` | Confirmed root cause |
295
+ | `hypothesis_evidence` | `data.hypothesis+data.location` | `{hypothesis, location, type, conclusion}` | Evidence for/against hypothesis |
296
+ | `affected_component` | `data.component` | `{component, files[], impact}` | Component affected by bug |
297
+ | `reproduction_path` | `data.trigger` | `{trigger, steps[], frequency}` | Bug reproduction path |
298
+
299
+ #### Protocol
300
+
301
+ Read `discoveries.ndjson` before investigation. Append-only: dedup by type+key before writing, never modify/delete.
302
+
303
+ ```bash
304
+ echo '{"ts":"<ISO>","worker":"{id}","type":"root_cause","data":{"location":"src/auth/login.ts:42","cause":"null_dereference","severity":"high","confidence":"confirmed"}}' >> {session_folder}/discoveries.ndjson
305
+ ```
306
+ </execution>
307
+
308
+ <error_codes>
309
+
310
+ | Error | Resolution |
311
+ |-------|------------|
312
+ | No bug description and no --from-uat/--from-auto-test | Abort with error: "Issue description required" |
313
+ | UAT file not found for --from-uat phase | Abort with error: "uat.md not found for phase {N}" |
314
+ | Auto-test report not found for --from-auto-test phase | Abort with error: "report.json not found for phase {N}" |
315
+ | No gaps in UAT file / no code_defects in report | Abort with error: "No failed gaps/defects found" |
316
+ | Hypothesis agent timeout | Mark as inconclusive, continue with remaining |
317
+ | All hypotheses refuted | Skip wave 2, suggest manual investigation |
318
+ | Fix agent timeout | Mark as fix_failed, report partial results |
319
+ | CSV parse error | Validate format, show line number |
320
+ | discoveries.ndjson corrupt | Ignore malformed lines |
321
+ | Continue mode: no session found | List available sessions |
322
+ | Existing debug session found | Offer resume (skip if AUTO_YES) |
323
+ </error_codes>
324
+
325
+ <success_criteria>
326
+ - [ ] Session folder created with valid tasks.csv
327
+ - [ ] Wave 1 hypotheses investigated in parallel
328
+ - [ ] Refuted/inconclusive hypotheses correctly skip wave 2 fix tasks
329
+ - [ ] Wave 2 fixes attempted only for confirmed hypotheses
330
+ - [ ] context.md produced with diagnosis summary
331
+ - [ ] UAT gaps updated (if --from-uat)
332
+ - [ ] Issues updated with diagnosis results
333
+ - [ ] discoveries.ndjson append-only throughout
334
+ </success_criteria>
@@ -0,0 +1,151 @@
1
+ ---
2
+ name: quality-refactor
3
+ description: Tech debt reduction with reflection-driven iteration. Analyze scope, plan refactoring, execute with test verification, reflect on strategy per round.
4
+ argument-hint: "<phase|--dir path> [--max-iterations N]"
5
+ allowed-tools: Read, Write, Edit, Bash, Glob, Grep, Agent, AskUserQuestion
6
+ ---
7
+
8
+ <purpose>
9
+ Iterative refactoring cycle: analyze scope for tech debt -> plan refactoring tasks -> execute each with test verification -> reflect on strategy per round -> repeat if needed. Every change is verified against existing tests. Failed changes are reverted and retried with adjusted strategy.
10
+ </purpose>
11
+
12
+ <context>
13
+ $ARGUMENTS -- module path, feature area, or "all", plus optional flags.
14
+
15
+ **Usage**:
16
+
17
+ ```bash
18
+ $quality-refactor "src/auth" # module path scope
19
+ $quality-refactor "authentication" # feature area scope
20
+ $quality-refactor "all" # full codebase scan
21
+ $quality-refactor "src/api --max-iterations 5" # limit iteration rounds
22
+ $quality-refactor "--dir .workflow/scratch/refactor-auth-2026-03-18" # resume existing
23
+ ```
24
+
25
+ **Flags**:
26
+ - `<phase|scope>`: Module path, feature area, or "all"
27
+ - `--dir path`: Resume existing refactor scratch directory
28
+ - `--max-iterations N`: Max refactoring rounds (default: 3)
29
+
30
+ **Output**: `.workflow/scratch/refactor-{slug}-{date}/` with index.json, plan.json, reflection-log.md, .task/, .summaries/
31
+ </context>
32
+
33
+ <invariants>
34
+ 1. **Test after every change** -- zero regressions tolerated
35
+ 2. **Revert on failure** -- never leave broken state
36
+ 3. **Max 2 retries per task** with strategy adjustment
37
+ 4. **Reflection-driven** -- every round records strategy, outcome, adjustment
38
+ 5. **User approval required** before execution (Step 4)
39
+ 6. **Quick wins first** -- order by risk (low first) and dependency
40
+ 7. **Agent calls use `run_in_background: false`** for synchronous execution
41
+ 8. **Incremental safety** -- each task is independently safe to apply or revert
42
+ </invariants>
43
+
44
+ <execution>
45
+
46
+ ### Step 1: Parse Scope
47
+
48
+ 1. Parse `$ARGUMENTS` for scope and flags
49
+ 2. If `--dir` provided: resume existing scratch directory (skip to Step 5)
50
+ 3. Scope types:
51
+ - Module path (e.g., "src/auth") -> scan that directory
52
+ - Feature area (e.g., "authentication") -> search for related files
53
+ - "all" -> full codebase scan
54
+ 4. If empty: prompt user via AskUserQuestion with options (Module path / Feature area / Full codebase)
55
+ 5. Detect `--max-iterations N` (default: 3)
56
+
57
+ ### Step 2: Create Scratch Directory
58
+
59
+ Create `.workflow/scratch/refactor-{slug}-{date}/` with `.task/` and `.summaries/` subdirectories. Write `index.json` with type "refactor", scope, status "active", plan/execution/reflection counters.
60
+
61
+ ### Step 3: Scope Analysis
62
+
63
+ Load project specs if available (`maestro spec load --category coding`).
64
+
65
+ Analyze scope for tech debt categories:
66
+
67
+ | Category | What to Look For |
68
+ |----------|-----------------|
69
+ | Duplication | Repeated code blocks, copy-paste patterns |
70
+ | Complexity | Long functions, deep nesting, high cyclomatic complexity |
71
+ | Naming | Inconsistent naming, unclear identifiers |
72
+ | Dependencies | Circular deps, tight coupling, god objects |
73
+ | Dead code | Unused functions, unreachable branches |
74
+ | Pattern violations | Inconsistent with project conventions |
75
+
76
+ Present analysis summary table with category, count, severity.
77
+ Confirm with user before proceeding.
78
+
79
+ ### Step 4: Plan Refactoring
80
+
81
+ 1. Write `plan.json` with scope, total_tasks, strategy ("incremental -- each task independently safe")
82
+ 2. For each identified issue, create `.task/TASK-{NNN}.json`:
83
+ - id, title, status (pending), type (refactor), category
84
+ - description, read_first files, files with action/target/change
85
+ - convergence.criteria (grep-verifiable), verification command
86
+ - implementation steps, risk level
87
+ 3. Order: high risk last, dependencies respected, quick wins first
88
+ 4. Update `index.json` plan fields
89
+ 5. Present plan to user via AskUserQuestion -- show affected files, risk areas, ask for approval
90
+
91
+ ### Step 5: Execute with Reflection
92
+
93
+ Initialize `reflection-log.md` if not exists.
94
+
95
+ For each task in order:
96
+
97
+ **5a. Execute refactoring:** Spawn Agent to implement the refactoring — read `read_first` files, apply changes to targets, follow convergence criteria exactly.
98
+
99
+ **5b. Run test suite** (npm test / pytest / go test as appropriate).
100
+
101
+ **5c. Record in reflection-log.md:** Round number, task title, strategy, result (pass/fail), test outcome, adjustment for next round, files changed.
102
+
103
+ **5d. Handle test failures:**
104
+ 1. Revert the change
105
+ 2. Record failure + strategy adjustment in reflection-log.md
106
+ 3. Retry with adjusted strategy (max 2 retries per task)
107
+ 4. If still failing: mark task "blocked", continue to next
108
+
109
+ **5e. Update state:**
110
+ - `.task/TASK-{NNN}.json` status -> "completed" or "blocked"
111
+ - `.summaries/TASK-{NNN}-summary.md` written
112
+ - `index.json` execution and reflection fields updated
113
+
114
+ ### Step 6: Final Verification
115
+
116
+ Run full test suite. Record final state in reflection-log.md: test result, tasks completed/total, tasks blocked, key learnings.
117
+
118
+ ### Step 7: Complete and Report
119
+
120
+ Update `index.json`: status -> "completed", final execution/reflection counts.
121
+
122
+ Display report: scope, tasks completed/blocked, reflection rounds, strategy adjustments, test status, key learnings from reflection-log.md, artifact paths (`{REFACTOR_DIR}/reflection-log.md`, `{REFACTOR_DIR}/.summaries/`).
123
+
124
+ **Next-step routing:**
125
+
126
+ | Result | Next Step |
127
+ |--------|-----------|
128
+ | All tests pass, refactoring complete | `$quality-sync` (update codebase docs) |
129
+ | Test failures remain after refactor | `$quality-debug "{scope}"` |
130
+ | No test suite available for scope | `$quality-auto-test "{phase}"` |
131
+ | Partial completion (some blocked) | `$quality-debug "{scope}"` for blocked tasks |
132
+ </execution>
133
+
134
+ <error_codes>
135
+ | Code | Severity | Condition | Recovery |
136
+ |------|----------|-----------|----------|
137
+ | E001 | error | Scope/description required | Prompt user for module path, feature area, or "all" |
138
+ | E002 | error | Test suite not available | Suggest creating tests first, or proceed with manual verification |
139
+ | W001 | warning | Partial test coverage | Note uncovered areas, proceed with extra caution |
140
+ </error_codes>
141
+
142
+ <success_criteria>
143
+ - [ ] Scope resolved and scratch directory created
144
+ - [ ] Tech debt analysis completed with categorized findings
145
+ - [ ] Refactoring plan approved by user
146
+ - [ ] Each task executed with test verification
147
+ - [ ] Failed changes reverted, retried with adjusted strategy
148
+ - [ ] Reflection log records every round's strategy and outcome
149
+ - [ ] Final test suite passes with zero regressions
150
+ - [ ] Completion report with key learnings displayed
151
+ </success_criteria>