maestro-flow 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/.claude/commands/quality-business-test.md +110 -0
  2. package/.codex/skills/maestro-init/SKILL.md +167 -167
  3. package/.codex/skills/maestro-phase-add/SKILL.md +154 -154
  4. package/.codex/skills/maestro-phase-transition/SKILL.md +173 -173
  5. package/.codex/skills/maestro-verify/SKILL.md +566 -566
  6. package/.codex/skills/manage-codebase-rebuild/SKILL.md +5 -5
  7. package/.codex/skills/manage-codebase-refresh/SKILL.md +5 -5
  8. package/.codex/skills/manage-issue/SKILL.md +7 -7
  9. package/.codex/skills/manage-issue-analyze/SKILL.md +7 -7
  10. package/.codex/skills/manage-issue-discover/SKILL.md +503 -503
  11. package/.codex/skills/manage-issue-execute/SKILL.md +9 -9
  12. package/.codex/skills/manage-issue-plan/SKILL.md +8 -8
  13. package/.codex/skills/manage-learn/SKILL.md +7 -7
  14. package/.codex/skills/manage-memory/SKILL.md +72 -72
  15. package/.codex/skills/manage-memory-capture/SKILL.md +86 -86
  16. package/.codex/skills/manage-status/SKILL.md +2 -2
  17. package/.codex/skills/quality-business-test/SKILL.md +223 -0
  18. package/.codex/skills/quality-debug/SKILL.md +5 -5
  19. package/.codex/skills/quality-integration-test/SKILL.md +544 -544
  20. package/.codex/skills/quality-refactor/SKILL.md +6 -6
  21. package/.codex/skills/quality-retrospective/SKILL.md +10 -10
  22. package/.codex/skills/quality-review/SKILL.md +408 -408
  23. package/.codex/skills/quality-sync/SKILL.md +6 -6
  24. package/.codex/skills/quality-test/SKILL.md +5 -5
  25. package/.codex/skills/quality-test-gen/SKILL.md +447 -447
  26. package/.codex/skills/spec-add/SKILL.md +5 -5
  27. package/.codex/skills/spec-load/SKILL.md +5 -5
  28. package/.codex/skills/spec-map/SKILL.md +5 -5
  29. package/.codex/skills/spec-setup/SKILL.md +2 -2
  30. package/chains/_intent-map.json +6 -0
  31. package/chains/_router.json +14 -0
  32. package/chains/full-lifecycle.json +15 -0
  33. package/chains/quality-loop.json +16 -1
  34. package/chains/singles/business-test.json +26 -0
  35. package/dashboard/dist-server/dashboard/src/server/agents/codex-app-server-adapter.d.ts +4 -0
  36. package/dashboard/dist-server/dashboard/src/server/agents/codex-app-server-adapter.js +47 -1
  37. package/dashboard/dist-server/dashboard/src/server/agents/codex-app-server-adapter.js.map +1 -1
  38. package/dashboard/dist-server/dashboard/src/server/agents/codex-cli-adapter.js +16 -2
  39. package/dashboard/dist-server/dashboard/src/server/agents/codex-cli-adapter.js.map +1 -1
  40. package/dashboard/dist-server/dashboard/src/server/agents/stream-json-adapter.d.ts +1 -1
  41. package/dashboard/dist-server/dashboard/src/server/agents/stream-json-adapter.js +25 -9
  42. package/dashboard/dist-server/dashboard/src/server/agents/stream-json-adapter.js.map +1 -1
  43. package/dashboard/dist-server/src/agents/cli-agent-runner.d.ts +3 -0
  44. package/dashboard/dist-server/src/agents/cli-agent-runner.js +78 -61
  45. package/dashboard/dist-server/src/agents/cli-agent-runner.js.map +1 -1
  46. package/dashboard/dist-server/src/agents/cli-history-store.d.ts +14 -1
  47. package/dashboard/dist-server/src/agents/cli-history-store.js +24 -2
  48. package/dashboard/dist-server/src/agents/cli-history-store.js.map +1 -1
  49. package/dashboard/dist-server/src/commands/delegate.js +142 -6
  50. package/dashboard/dist-server/src/commands/delegate.js.map +1 -1
  51. package/dist/src/agents/cli-agent-runner.d.ts +3 -0
  52. package/dist/src/agents/cli-agent-runner.d.ts.map +1 -1
  53. package/dist/src/agents/cli-agent-runner.js +72 -46
  54. package/dist/src/agents/cli-agent-runner.js.map +1 -1
  55. package/dist/src/agents/cli-history-store.d.ts +14 -1
  56. package/dist/src/agents/cli-history-store.d.ts.map +1 -1
  57. package/dist/src/agents/cli-history-store.js +24 -2
  58. package/dist/src/agents/cli-history-store.js.map +1 -1
  59. package/dist/src/commands/delegate.d.ts.map +1 -1
  60. package/dist/src/commands/delegate.js +65 -5
  61. package/dist/src/commands/delegate.js.map +1 -1
  62. package/dist/src/commands/install-backend.d.ts.map +1 -1
  63. package/dist/src/commands/install-backend.js +5 -2
  64. package/dist/src/commands/install-backend.js.map +1 -1
  65. package/dist/src/mcp/delegate-channel-relay.d.ts.map +1 -1
  66. package/dist/src/mcp/delegate-channel-relay.js +7 -2
  67. package/dist/src/mcp/delegate-channel-relay.js.map +1 -1
  68. package/dist/src/mcp/server.js +1 -1
  69. package/dist/src/mcp/server.js.map +1 -1
  70. package/dist/src/tools/index.d.ts +1 -6
  71. package/dist/src/tools/index.d.ts.map +1 -1
  72. package/dist/src/tools/index.js +1 -451
  73. package/dist/src/tools/index.js.map +1 -1
  74. package/package.json +3 -2
  75. package/templates/business-test-report.json +68 -0
@@ -0,0 +1,223 @@
1
+ ---
2
+ name: quality-business-test
3
+ description: PRD-forward business testing with requirement traceability, multi-layer execution (L1 Interface → L2 Business Rule → L3 Scenario), fixture generation, and feedback loop.
4
+ argument-hint: "<phase> [--spec SPEC-xxx] [--layer L1|L2|L3] [--gen-code] [--dry-run] [--re-run] [--auto]"
5
+ allowed-tools: Read, Write, Edit, Bash, Glob, Grep, Agent, AskUserQuestion
6
+ ---
7
+
8
+ ## Auto Mode
9
+
10
+ `--auto` skips interactive confirmation of test plan. `--dry-run` extracts scenarios only without execution.
11
+
12
+ # Business Test (PRD-Forward)
13
+
14
+ ## Usage
15
+
16
+ ```bash
17
+ $quality-business-test "3" # test phase 3 against PRD
18
+ $quality-business-test "3 --layer L1" # L1 interface tests only
19
+ $quality-business-test "3 --gen-code" # generate framework-specific test classes
20
+ $quality-business-test "3 --dry-run" # extract scenarios only, don't execute
21
+ $quality-business-test "3 --re-run" # re-run only previously failed scenarios
22
+ $quality-business-test "3 --spec SPEC-auth-2026-04" # explicit spec reference
23
+ $quality-business-test "3 --auto" # skip plan confirmation
24
+ ```
25
+
26
+ **Flags**:
27
+ - `<phase>`: Phase number (required)
28
+ - `--spec SPEC-xxx`: Explicit spec package reference (default: auto-detect from index.json)
29
+ - `--layer L1|L2|L3`: Run only specific layer
30
+ - `--gen-code`: Generate framework-specific test classes (JUnit/RestAssured, supertest/vitest, pytest/httpx)
31
+ - `--dry-run`: Extract scenarios and fixtures only, don't execute
32
+ - `--re-run`: Re-run only previously failed/blocked scenarios
33
+ - `--auto`: Skip interactive confirmations
34
+
35
+ **Output**: `{phase_dir}/.tests/business/business-test-plan.json` + `business-test-report.json` + `business-test-summary.md`
36
+
37
+ ---
38
+
39
+ ## Overview
40
+
41
+ Validate built features against PRD acceptance criteria through automated multi-layer business testing. Unlike quality-test (interactive UAT from code gaps) and quality-test-gen (generate tests from coverage gaps), this starts from REQ-*.md acceptance criteria and works forward.
42
+
43
+ **Three-track testing** (complementary, not replacements):
44
+
45
+ | Command | Input Source | Verification Angle |
46
+ |---------|-------------|-------------------|
47
+ | `quality-business-test` | REQ-*.md acceptance criteria | **PRD-forward** — are business rules satisfied? |
48
+ | `quality-test` | verification.json must_haves | **Code-backward** — does the code work? |
49
+ | `quality-test-gen` | validation.json gaps | **Coverage-backward** — is coverage sufficient? |
50
+
51
+ **Layer definitions:**
52
+
53
+ | Layer | Name | Tests | Source |
54
+ |-------|------|-------|--------|
55
+ | L1 | Interface Contract | Single endpoint request/response, input validation, schema compliance | Architecture API endpoints + REQ AC |
56
+ | L2 | Business Rule | Multi-step logic, state transitions, business constraints, edge cases | REQ acceptance criteria + NFR |
57
+ | L3 | Business Scenario | Full user flows, multi-service chains, error propagation | Epic user stories |
58
+
59
+ ---
60
+
61
+ ## Implementation
62
+
63
+ ### Step 1: Resolve Target & Load Spec Package
64
+
65
+ 1. Parse `$ARGUMENTS` for phase number and flags
66
+ 2. Set `PHASE_DIR = .workflow/phases/{NN}-{slug}/`
67
+ 3. Load `index.json` -> find `spec_ref` -> locate `.workflow/.spec/SPEC-xxx/`
68
+ 4. **Full mode**: Read `requirements/_index.md` + all `REQ-*.md` + `NFR-*.md` + `architecture/_index.md` + `epics/EPIC-*.md`
69
+ 5. **Degraded mode** (no spec package): Read `index.json.success_criteria` + `plan.json` convergence criteria + `.summaries/TASK-*.md`
70
+ 6. If `--re-run`: load previous `business-test-report.json`, filter to failed/blocked scenarios
71
+
72
+ ### Step 2: Extract Business Test Scenarios from PRD
73
+
74
+ For each `REQ-NNN-{slug}.md`:
75
+
76
+ 1. Parse `## Acceptance Criteria` section
77
+ 2. Map RFC 2119 keywords to priority:
78
+
79
+ | Keyword | Priority | Failure = |
80
+ |---------|----------|-----------|
81
+ | MUST / SHALL | critical | blocker |
82
+ | SHOULD / RECOMMENDED | high | major |
83
+ | MAY / OPTIONAL | medium | minor |
84
+
85
+ 3. Classify scenario into layer:
86
+
87
+ | Source | Layer | Category |
88
+ |--------|-------|----------|
89
+ | Architecture API endpoints + REQ AC about request/response | L1 | api_contract |
90
+ | REQ AC about business logic, validation, state changes | L2 | business_rule |
91
+ | Architecture state machine transitions | L2 | state_transition |
92
+ | Epic user stories (multi-step flows) | L3 | user_flow |
93
+ | NFR performance/security constraints | L2 | non_functional |
94
+
95
+ 4. Generate scenario JSON with `id`, `req_ref` (REQ-NNN:AC-N), `layer`, `priority`, `name`, `category`, `endpoint`, `input`, `expected`, `preconditions`, `postconditions`, `mock_services`
96
+
97
+ **Degraded mode**: Extract from success_criteria (each -> L2 scenario), plan.json convergence criteria (each -> L1/L2), all default priority: high. No L3 in degraded mode.
98
+
99
+ ### Step 3: Generate Test Data (Fixtures)
100
+
101
+ Three tiers:
102
+
103
+ **Tier 1 — Schema-derived**: From REQ data models, generate valid/invalid/boundary variants per entity:
104
+ - valid: satisfies all constraints
105
+ - invalid: violate each constraint individually (null, empty, overflow, wrong type)
106
+ - boundary: edge values (min, max, min-1, max+1)
107
+
108
+ **Tier 2 — Criteria-derived**: From "MUST return X when Y" -> `{ input: Y, expected: X }`. From "MUST validate Z" -> `{ input: invalid_Z, expected: error }`.
109
+
110
+ **Tier 3 — Scenario-derived (L3 only)**: From Epic user stories -> scenario packs with coordinated entity IDs across steps.
111
+
112
+ **Microservice mocks**: From architecture API contract -> request/response pairs for WireMock stubs.
113
+
114
+ ### Step 4: Write Test Plan & Confirm
115
+
116
+ 1. Archive previous `business-test-plan.json` to `.history/` if exists
117
+ 2. Write `.tests/business/business-test-plan.json` with scenarios, fixtures, mock_contracts, requirement_coverage_plan
118
+ 3. Display plan summary (scenario counts per layer, fixture counts, requirement coverage)
119
+ 4. If not `--auto`: wait for user confirmation (yes/edit/cancel)
120
+ 5. If `--dry-run`: stop here, report plan
121
+
122
+ ### Step 5: Generate Test Code (if --gen-code)
123
+
124
+ Detect project tech stack from `.workflow/specs/project-tech.json` or codebase scan.
125
+
126
+ | Stack | L1 | L2 | L3 |
127
+ |-------|----|----|-----|
128
+ | Java/Spring Boot | RestAssured + MockMvc | JUnit 5 Parameterized + WireMock | TestContainers |
129
+ | TypeScript/Node | supertest + vitest | vitest + nock | playwright/cypress |
130
+ | Python | httpx + pytest | pytest + responses | pytest + selenium |
131
+
132
+ Each test method includes REQ-NNN:AC-N reference in display name. Test files placed in `.tests/business/{layer}/`.
133
+
134
+ If no `--gen-code`: scenarios stay as structured JSON for AI agent execution.
135
+
136
+ ### Step 6: Execute Tests (Progressive L1 → L2 → L3)
137
+
138
+ **Fail-fast**: L1 critical failures -> STOP (don't run L2). L2 critical failures -> STOP (don't run L3).
139
+
140
+ **Generator-Critic loop per layer (max 3 iterations):**
141
+
142
+ | Iteration | Action |
143
+ |-----------|--------|
144
+ | 1 | Run all scenarios. Critic: classify failures as test_defect / code_defect / env_issue |
145
+ | 2 | Auto-fix test_defects, re-run ALL scenarios |
146
+ | 3 | Final confirmation. Remaining failures = confirmed code_defects |
147
+
148
+ **Execution modes:**
149
+ - `--gen-code`: run via test framework (`mvn test`, `npx vitest`, etc.)
150
+ - default: AI agent executes scenarios against running application
151
+
152
+ Record results in `.tests/business/test-results-iter-{N}.json`.
153
+
154
+ ### Step 7: Build Traceability Matrix
155
+
156
+ Map each result to `REQ-NNN:AC-N`:
157
+
158
+ ```
159
+ FOR each REQ:
160
+ FOR each AC:
161
+ ac_status = "passed" if ALL scenarios passed
162
+ "failed" if ANY failed
163
+ "blocked" if ANY blocked (none failed)
164
+ "untested" if no scenarios mapped
165
+ verdict = "verified" if all MUST+SHOULD passed
166
+ "partial" if some failed
167
+ "unverified" if all failed/untested
168
+ ```
169
+
170
+ ### Step 8: Generate Reports
171
+
172
+ 1. Archive previous report/summary to `.history/`
173
+ 2. Write `.tests/business/business-test-report.json` with:
174
+ - `layers`: per-layer stats (total, passed, failed, blocked, pass_rate)
175
+ - `requirement_coverage`: per-REQ criteria results with failure details
176
+ - `failures`: each with req_ref, severity, expected/actual, fix_suggestion
177
+ - `summary`: total_requirements, fully_verified, partially_verified, unverified, coverage_pct
178
+ 3. Write `.tests/business/business-test-summary.md` (human-readable tables)
179
+ 4. Update `index.json` with `business_test` section
180
+
181
+ ### Step 9: Feedback Loop
182
+
183
+ 1. Auto-create issues from failures in `.workflow/issues/issues.jsonl` (each with `req_ref`, `source: "business-test"`)
184
+ 2. Report results
185
+ 3. Route next step:
186
+
187
+ | Result | Suggestion |
188
+ |--------|------------|
189
+ | All requirements verified | Skill({ skill: "maestro-phase-transition", args: "{phase}" }) |
190
+ | Failures found | Skill({ skill: "quality-debug", args: "--from-business-test {phase}" }) |
191
+ | `--re-run` all pass | Skill({ skill: "maestro-verify", args: "{phase}" }) |
192
+ | Low coverage (< 60%) | Skill({ skill: "quality-test-gen", args: "{phase}" }) |
193
+
194
+ **Closure criteria**: Requirement marked "verified" ONLY when ALL MUST+SHOULD acceptance criteria pass.
195
+
196
+ ---
197
+
198
+ ## Error Handling
199
+
200
+ | Code | Severity | Condition | Recovery |
201
+ |------|----------|-----------|----------|
202
+ | E001 | error | Phase number required | Prompt user for phase number |
203
+ | E002 | error | Phase directory not found | Verify phase exists in .workflow/phases/ |
204
+ | E003 | error | No spec package AND no success_criteria | Run maestro-spec-generate or maestro-plan first |
205
+ | E004 | error | L1 critical failures block L2/L3 | Fix blockers via quality-debug |
206
+ | W001 | warning | Degraded mode (no spec package) | Consider running maestro-spec-generate |
207
+ | W002 | warning | Some REQs have no testable AC | Note in report |
208
+ | W003 | warning | Generator-Critic loop exhausted | Accept current state |
209
+ | W004 | warning | Mock services unavailable for L3 | Skip L3 or use --gen-code |
210
+
211
+ ---
212
+
213
+ ## Core Rules
214
+
215
+ - **PRD is source of truth** -- business rules drive test scenarios, not code structure
216
+ - **RFC 2119 keyword priority** -- MUST = critical, SHOULD = high, MAY = medium
217
+ - **Fail-fast across layers** -- critical L1 failures block L2/L3
218
+ - **Generator-Critic loop max 3 iterations** per layer
219
+ - **Traceability on every result** -- every pass/fail maps to REQ-NNN:AC-N
220
+ - **Agent calls use `run_in_background: false`** for synchronous execution
221
+ - **Auto-create issues** in `.workflow/issues/issues.jsonl` for every failure
222
+ - **Degraded mode** works without spec package (from success_criteria + plan.json)
223
+ - **Never modify source code** -- this command tests, it doesn't fix
@@ -1,5 +1,5 @@
1
1
  ---
2
- name: maestro-debug
2
+ name: quality-debug
3
3
  description: Hypothesis-driven debugging via CSV wave pipeline. Wave 1 generates parallel hypotheses, Wave 2 attempts parallel fixes on confirmed hypotheses. Replaces quality-debug command.
4
4
  argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"[bug description] [--from-uat <phase>] [--parallel]\""
5
5
  allowed-tools: spawn_agents_on_csv, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion
@@ -14,10 +14,10 @@ When `--yes` or `-y`: Auto-confirm hypothesis selection, skip interactive sympto
14
14
  ## Usage
15
15
 
16
16
  ```bash
17
- $maestro-debug "Login button throws 500 error on click"
18
- $maestro-debug -y "JWT token not refreshed --from-uat 3"
19
- $maestro-debug -c 4 "Navigation crash --from-uat 3 --parallel"
20
- $maestro-debug --continue "debug-jwt-expiry-20260318"
17
+ $quality-debug "Login button throws 500 error on click"
18
+ $quality-debug -y "JWT token not refreshed --from-uat 3"
19
+ $quality-debug -c 4 "Navigation crash --from-uat 3 --parallel"
20
+ $quality-debug --continue "debug-jwt-expiry-20260318"
21
21
  ```
22
22
 
23
23
  **Flags**: