maestro-flow 0.3.46 → 0.3.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/ui-design-agent.md +1 -0
- package/.claude/agents/workflow-executor.md +3 -0
- package/.claude/commands/learn-decompose.md +91 -146
- package/.claude/commands/learn-follow.md +102 -137
- package/.claude/commands/learn-investigate.md +102 -167
- package/.claude/commands/learn-retro.md +100 -243
- package/.claude/commands/learn-second-opinion.md +95 -135
- package/.claude/commands/maestro-amend.md +95 -232
- package/.claude/commands/maestro-analyze.md +1 -6
- package/.claude/commands/maestro-collab.md +104 -265
- package/.claude/commands/maestro-composer.md +113 -293
- package/.claude/commands/maestro-execute.md +10 -17
- package/.claude/commands/maestro-impeccable.md +89 -0
- package/.claude/commands/maestro-plan.md +1 -6
- package/.claude/commands/maestro-player.md +111 -340
- package/.claude/commands/maestro-quick.md +9 -0
- package/.claude/commands/maestro-ralph-execute.md +167 -210
- package/.claude/commands/maestro-ralph.md +245 -426
- package/.claude/commands/maestro-ui-codify.md +13 -0
- package/.claude/commands/maestro-ui-craft.md +364 -0
- package/.claude/commands/maestro-ui-design.md +12 -1
- package/.claude/commands/maestro-verify.md +12 -13
- package/.claude/commands/maestro.md +142 -72
- package/.claude/commands/manage-knowhow-capture.md +45 -170
- package/.claude/commands/quality-auto-test.md +9 -0
- package/.claude/commands/quality-debug.md +11 -25
- package/.claude/commands/quality-refactor.md +9 -0
- package/.claude/commands/quality-review.md +5 -14
- package/.claude/commands/spec-add.md +1 -1
- package/.claude/commands/spec-load.md +3 -2
- package/.claude/skills/maestro-impeccable/SKILL.md +169 -0
- package/.codex/skills/learn-decompose/SKILL.md +1 -1
- package/.codex/skills/learn-investigate/SKILL.md +2 -1
- package/.codex/skills/maestro/SKILL.md +420 -313
- package/.codex/skills/maestro-analyze/SKILL.md +126 -417
- package/.codex/skills/maestro-brainstorm/SKILL.md +129 -451
- package/.codex/skills/maestro-collab/SKILL.md +134 -547
- package/.codex/skills/maestro-execute/SKILL.md +3 -1
- package/.codex/skills/maestro-impeccable/SKILL.md +112 -0
- package/.codex/skills/maestro-plan/SKILL.md +88 -437
- package/.codex/skills/maestro-player/SKILL.md +191 -333
- package/.codex/skills/maestro-quick/SKILL.md +2 -0
- package/.codex/skills/maestro-ralph/SKILL.md +327 -710
- package/.codex/skills/maestro-roadmap/SKILL.md +201 -518
- package/.codex/skills/maestro-ui-codify/SKILL.md +1 -0
- package/.codex/skills/maestro-ui-craft/SKILL.md +341 -0
- package/.codex/skills/maestro-ui-design/SKILL.md +10 -0
- package/.codex/skills/maestro-verify/SKILL.md +116 -409
- package/.codex/skills/quality-auto-test/SKILL.md +145 -443
- package/.codex/skills/quality-refactor/SKILL.md +1 -1
- package/.codex/skills/quality-test/SKILL.md +229 -517
- package/.codex/skills/spec-add/SKILL.md +1 -1
- package/README.md +4 -1
- package/README.zh-CN.md +3 -1
- package/dashboard/dist-server/dashboard/src/server/agents/codex-cli-adapter.js +3 -0
- package/dashboard/dist-server/dashboard/src/server/agents/codex-cli-adapter.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/routes/install.js +110 -1
- package/dashboard/dist-server/dashboard/src/server/routes/install.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/routes/settings.js +56 -0
- package/dashboard/dist-server/dashboard/src/server/routes/settings.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/routes/wiki.js +2 -0
- package/dashboard/dist-server/dashboard/src/server/routes/wiki.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/spec-entry-parser.js +2 -2
- package/dashboard/dist-server/dashboard/src/server/wiki/spec-entry-parser.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js +2 -0
- package/dashboard/dist-server/dashboard/src/server/wiki/wiki-indexer.js.map +1 -1
- package/dashboard/dist-server/dashboard/src/server/wiki/wiki-types.d.ts +3 -1
- package/dashboard/dist-server/dashboard/src/shared/constants.d.ts +2 -0
- package/dashboard/dist-server/dashboard/src/shared/constants.js +2 -0
- package/dashboard/dist-server/dashboard/src/shared/constants.js.map +1 -1
- package/dist/src/agents/cli-agent-runner.d.ts.map +1 -1
- package/dist/src/agents/cli-agent-runner.js +1 -3
- package/dist/src/agents/cli-agent-runner.js.map +1 -1
- package/dist/src/agents/cli-history-store.d.ts +5 -0
- package/dist/src/agents/cli-history-store.d.ts.map +1 -1
- package/dist/src/agents/cli-history-store.js +65 -13
- package/dist/src/agents/cli-history-store.js.map +1 -1
- package/dist/src/cli.js +13 -0
- package/dist/src/cli.js.map +1 -1
- package/dist/src/commands/command-help.d.ts +3 -0
- package/dist/src/commands/command-help.d.ts.map +1 -0
- package/dist/src/commands/command-help.js +60 -0
- package/dist/src/commands/command-help.js.map +1 -0
- package/dist/src/commands/config.d.ts.map +1 -1
- package/dist/src/commands/config.js +17 -0
- package/dist/src/commands/config.js.map +1 -1
- package/dist/src/commands/delegate.d.ts.map +1 -1
- package/dist/src/commands/delegate.js +12 -2
- package/dist/src/commands/delegate.js.map +1 -1
- package/dist/src/commands/impeccable.d.ts +10 -0
- package/dist/src/commands/impeccable.d.ts.map +1 -0
- package/dist/src/commands/impeccable.js +181 -0
- package/dist/src/commands/impeccable.js.map +1 -0
- package/dist/src/commands/spec.js +1 -1
- package/dist/src/commands/spec.js.map +1 -1
- package/dist/src/commands/wiki.d.ts.map +1 -1
- package/dist/src/commands/wiki.js +5 -1
- package/dist/src/commands/wiki.js.map +1 -1
- package/dist/src/config/cli-tools-config.d.ts.map +1 -1
- package/dist/src/config/cli-tools-config.js +10 -7
- package/dist/src/config/cli-tools-config.js.map +1 -1
- package/dist/src/core/addon-registry.d.ts +31 -0
- package/dist/src/core/addon-registry.d.ts.map +1 -0
- package/dist/src/core/addon-registry.js +28 -0
- package/dist/src/core/addon-registry.js.map +1 -0
- package/dist/src/hooks/plugins/spec-injection-plugin.js +2 -0
- package/dist/src/hooks/plugins/spec-injection-plugin.js.map +1 -1
- package/dist/src/hooks/spec-injector.js +2 -2
- package/dist/src/hooks/spec-injector.js.map +1 -1
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/tools/impeccable/critique-storage.d.ts +28 -0
- package/dist/src/tools/impeccable/critique-storage.d.ts.map +1 -0
- package/dist/src/tools/impeccable/critique-storage.js +120 -0
- package/dist/src/tools/impeccable/critique-storage.js.map +1 -0
- package/dist/src/tools/impeccable/design-parser.d.ts +90 -0
- package/dist/src/tools/impeccable/design-parser.d.ts.map +1 -0
- package/dist/src/tools/impeccable/design-parser.js +696 -0
- package/dist/src/tools/impeccable/design-parser.js.map +1 -0
- package/dist/src/tools/impeccable/detect-csp.d.ts +6 -0
- package/dist/src/tools/impeccable/detect-csp.d.ts.map +1 -0
- package/dist/src/tools/impeccable/detect-csp.js +130 -0
- package/dist/src/tools/impeccable/detect-csp.js.map +1 -0
- package/dist/src/tools/impeccable/is-generated.d.ts +4 -0
- package/dist/src/tools/impeccable/is-generated.d.ts.map +1 -0
- package/dist/src/tools/impeccable/is-generated.js +56 -0
- package/dist/src/tools/impeccable/is-generated.js.map +1 -0
- package/dist/src/tools/impeccable/live/accept.d.ts +50 -0
- package/dist/src/tools/impeccable/live/accept.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/accept.js +556 -0
- package/dist/src/tools/impeccable/live/accept.js.map +1 -0
- package/dist/src/tools/impeccable/live/bootstrap.d.ts +2 -0
- package/dist/src/tools/impeccable/live/bootstrap.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/bootstrap.js +244 -0
- package/dist/src/tools/impeccable/live/bootstrap.js.map +1 -0
- package/dist/src/tools/impeccable/live/complete.d.ts +7 -0
- package/dist/src/tools/impeccable/live/complete.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/complete.js +67 -0
- package/dist/src/tools/impeccable/live/complete.js.map +1 -0
- package/dist/src/tools/impeccable/live/completion.d.ts +24 -0
- package/dist/src/tools/impeccable/live/completion.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/completion.js +26 -0
- package/dist/src/tools/impeccable/live/completion.js.map +1 -0
- package/dist/src/tools/impeccable/live/inject.d.ts +41 -0
- package/dist/src/tools/impeccable/live/inject.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/inject.js +394 -0
- package/dist/src/tools/impeccable/live/inject.js.map +1 -0
- package/dist/src/tools/impeccable/live/poll.d.ts +24 -0
- package/dist/src/tools/impeccable/live/poll.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/poll.js +180 -0
- package/dist/src/tools/impeccable/live/poll.js.map +1 -0
- package/dist/src/tools/impeccable/live/resume.d.ts +5 -0
- package/dist/src/tools/impeccable/live/resume.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/resume.js +30 -0
- package/dist/src/tools/impeccable/live/resume.js.map +1 -0
- package/dist/src/tools/impeccable/live/server.d.ts +6 -0
- package/dist/src/tools/impeccable/live/server.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/server.js +867 -0
- package/dist/src/tools/impeccable/live/server.js.map +1 -0
- package/dist/src/tools/impeccable/live/session-store.d.ts +72 -0
- package/dist/src/tools/impeccable/live/session-store.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/session-store.js +281 -0
- package/dist/src/tools/impeccable/live/session-store.js.map +1 -0
- package/dist/src/tools/impeccable/live/static/live-browser-session.js +123 -0
- package/dist/src/tools/impeccable/live/static/live-browser.js +4860 -0
- package/dist/src/tools/impeccable/live/static/modern-screenshot.umd.js +14 -0
- package/dist/src/tools/impeccable/live/status.d.ts +2 -0
- package/dist/src/tools/impeccable/live/status.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/status.js +52 -0
- package/dist/src/tools/impeccable/live/status.js.map +1 -0
- package/dist/src/tools/impeccable/live/wrap.d.ts +33 -0
- package/dist/src/tools/impeccable/live/wrap.d.ts.map +1 -0
- package/dist/src/tools/impeccable/live/wrap.js +572 -0
- package/dist/src/tools/impeccable/live/wrap.js.map +1 -0
- package/dist/src/tools/impeccable/load-context.d.ts +13 -0
- package/dist/src/tools/impeccable/load-context.d.ts.map +1 -0
- package/dist/src/tools/impeccable/load-context.js +79 -0
- package/dist/src/tools/impeccable/load-context.js.map +1 -0
- package/dist/src/tools/impeccable/paths.d.ts +34 -0
- package/dist/src/tools/impeccable/paths.d.ts.map +1 -0
- package/dist/src/tools/impeccable/paths.js +102 -0
- package/dist/src/tools/impeccable/paths.js.map +1 -0
- package/dist/src/tools/spec-entry-parser.d.ts +1 -1
- package/dist/src/tools/spec-entry-parser.d.ts.map +1 -1
- package/dist/src/tools/spec-entry-parser.js +1 -1
- package/dist/src/tools/spec-entry-parser.js.map +1 -1
- package/dist/src/tools/spec-init.d.ts.map +1 -1
- package/dist/src/tools/spec-init.js +26 -1
- package/dist/src/tools/spec-init.js.map +1 -1
- package/dist/src/tools/spec-loader.d.ts +1 -1
- package/dist/src/tools/spec-loader.d.ts.map +1 -1
- package/dist/src/tools/spec-loader.js +2 -0
- package/dist/src/tools/spec-loader.js.map +1 -1
- package/package.json +2 -2
- package/workflows/claude-instructions.md +17 -5
- package/workflows/cli-tools-usage.md +10 -3
- package/workflows/delegate-usage.md +3 -2
- package/workflows/impeccable/adapt.md +190 -0
- package/workflows/impeccable/animate.md +175 -0
- package/workflows/impeccable/audit.md +133 -0
- package/workflows/impeccable/bolder.md +113 -0
- package/workflows/impeccable/brand.md +118 -0
- package/workflows/impeccable/clarify.md +174 -0
- package/workflows/impeccable/codex.md +105 -0
- package/workflows/impeccable/cognitive-load.md +106 -0
- package/workflows/impeccable/color-and-contrast.md +105 -0
- package/workflows/impeccable/colorize.md +154 -0
- package/workflows/impeccable/craft.md +123 -0
- package/workflows/impeccable/critique.md +261 -0
- package/workflows/impeccable/delight.md +302 -0
- package/workflows/impeccable/distill.md +111 -0
- package/workflows/impeccable/document.md +439 -0
- package/workflows/impeccable/extract.md +69 -0
- package/workflows/impeccable/harden.md +347 -0
- package/workflows/impeccable/heuristics-scoring.md +234 -0
- package/workflows/impeccable/interaction-design.md +195 -0
- package/workflows/impeccable/layout.md +141 -0
- package/workflows/impeccable/live.md +622 -0
- package/workflows/impeccable/motion-design.md +109 -0
- package/workflows/impeccable/onboard.md +234 -0
- package/workflows/impeccable/optimize.md +258 -0
- package/workflows/impeccable/overdrive.md +130 -0
- package/workflows/impeccable/personas.md +179 -0
- package/workflows/impeccable/polish.md +242 -0
- package/workflows/impeccable/product.md +62 -0
- package/workflows/impeccable/quieter.md +99 -0
- package/workflows/impeccable/responsive-design.md +114 -0
- package/workflows/impeccable/shape.md +165 -0
- package/workflows/impeccable/spatial-design.md +100 -0
- package/workflows/impeccable/teach.md +168 -0
- package/workflows/impeccable/typeset.md +124 -0
- package/workflows/impeccable/typography.md +159 -0
- package/workflows/impeccable/ux-writing.md +107 -0
- package/workflows/impeccable.md +164 -0
- package/workflows/maestro.md +7 -3
- package/workflows/skill-authoring.md +265 -0
- package/workflows/specs-add.md +3 -2
- package/workflows/specs-load.md +2 -1
- package/workflows/specs-setup.md +21 -1
|
@@ -6,320 +6,157 @@ allowed-tools: spawn_agents_on_csv, Read, Write, Edit, Bash, Glob, Grep, AskUser
|
|
|
6
6
|
---
|
|
7
7
|
|
|
8
8
|
<purpose>
|
|
9
|
-
|
|
9
|
+
CSV-parallel automated testing pipeline via `spawn_agents_on_csv`.
|
|
10
|
+
Route -> Source Scenarios -> Write Tests (parallel per layer) -> Execute (L0->L3 sequential) -> Diagnose Failures (parallel) -> Iterate -> Report.
|
|
10
11
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
**Topology**: Layers as waves (L0→L1→L2→L3 sequential, scenarios within layer parallel)
|
|
14
|
-
|
|
15
|
-
```
|
|
16
|
-
+---------------------------------------------------------------------------+
|
|
17
|
-
| AUTO-TEST CSV LAYER PIPELINE |
|
|
18
|
-
+---------------------------------------------------------------------------+
|
|
19
|
-
| |
|
|
20
|
-
| Phase 1: Route & Plan -> CSV |
|
|
21
|
-
| +-- Read project state, auto-select route (spec/gap/code) |
|
|
22
|
-
| +-- Extract scenarios per route, normalize to unified format |
|
|
23
|
-
| +-- Discover test infrastructure (framework, patterns) |
|
|
24
|
-
| +-- Build scenarios.csv with one row per scenario |
|
|
25
|
-
| +-- Layers = waves (L1, L2, L3 sequential; L0 = static pre-check) |
|
|
26
|
-
| +-- User validates test plan (skip if -y) |
|
|
27
|
-
| |
|
|
28
|
-
| Phase 2: Layer Execution Engine (write + run) |
|
|
29
|
-
| +-- L0: Static analysis (tsc + eslint) — no CSV needed |
|
|
30
|
-
| +-- For each layer L1→L3 (sequential, fail-fast on critical): |
|
|
31
|
-
| | +-- Layer N: Write Tests (parallel via spawn_agents_on_csv) |
|
|
32
|
-
| | | +-- Each agent writes one test file (RED-GREEN) |
|
|
33
|
-
| | | +-- Agent reads target source + infrastructure patterns |
|
|
34
|
-
| | | +-- Agent verifies RED (run test, check it targets behavior) |
|
|
35
|
-
| | | +-- Results: test_file written, red_result, findings |
|
|
36
|
-
| | +-- Merge write-results into master scenarios.csv |
|
|
37
|
-
| | +-- Run all layer tests together (full layer execution) |
|
|
38
|
-
| | +-- Record per-scenario pass/fail results |
|
|
39
|
-
| |
|
|
40
|
-
| Phase 3: Iteration Engine (diagnose + fix) |
|
|
41
|
-
| +-- OUTER LOOP (max_iter iterations): |
|
|
42
|
-
| | +-- For each layer with failures: |
|
|
43
|
-
| | | +-- Build diagnosis.csv from failed scenarios |
|
|
44
|
-
| | | +-- Diagnose & Fix (parallel via spawn_agents_on_csv) |
|
|
45
|
-
| | | | +-- Each agent classifies one failure cluster |
|
|
46
|
-
| | | | +-- test_defect: agent provides fix diff |
|
|
47
|
-
| | | | +-- code_defect: agent documents evidence |
|
|
48
|
-
| | | +-- Apply test_defect fixes, re-run layer |
|
|
49
|
-
| | +-- Reflect: analyze trends, log strategy |
|
|
50
|
-
| | +-- Adjust: select next strategy (conservative/aggressive/...) |
|
|
51
|
-
| | +-- Convergence check: >=95% → done |
|
|
52
|
-
| +-- discoveries.ndjson shared across all iterations |
|
|
53
|
-
| |
|
|
54
|
-
| Phase 4: Results & Routing |
|
|
55
|
-
| +-- Export results.csv |
|
|
56
|
-
| +-- Write report.json, state.json, reflection-log.md |
|
|
57
|
-
| +-- Conditional: traceability.md, issue creation |
|
|
58
|
-
| +-- Route to next step based on convergence |
|
|
59
|
-
| |
|
|
60
|
-
+---------------------------------------------------------------------------+
|
|
61
|
-
```
|
|
12
|
+
Topology: layers as waves (L0->L1->L2->L3 sequential, scenarios within layer parallel).
|
|
62
13
|
</purpose>
|
|
63
14
|
|
|
64
15
|
<context>
|
|
65
|
-
|
|
66
|
-
$quality-auto-test "3" # auto-detect source, full iteration
|
|
67
|
-
$quality-auto-test -c 4 "3" # max 4 concurrent test writers per layer
|
|
68
|
-
$quality-auto-test -y "3 --max-iter 1" # single-pass generation only
|
|
69
|
-
$quality-auto-test "3 --dry-run" # plan only, no execution
|
|
70
|
-
$quality-auto-test "3 --re-run" # re-run only previously failed scenarios
|
|
71
|
-
$quality-auto-test "3 --layer L2" # restrict to L2 integration tests
|
|
72
|
-
```
|
|
16
|
+
$ARGUMENTS -- phase number and optional flags.
|
|
73
17
|
|
|
74
18
|
**Flags**:
|
|
75
19
|
- `-y, --yes`: Skip all confirmations
|
|
76
|
-
- `-c, --concurrency N`: Max concurrent agents
|
|
77
|
-
- `--max-iter N`: Max outer iterations (default 5
|
|
78
|
-
- `--layer L`:
|
|
79
|
-
- `--strategy conservative|aggressive|surgical|reflective`: Override starting
|
|
80
|
-
- `--dry-run`: Generate test plan only,
|
|
20
|
+
- `-c, --concurrency N`: Max concurrent agents per layer (default: 5)
|
|
21
|
+
- `--max-iter N`: Max outer iterations (default: 5; 1 = single-pass)
|
|
22
|
+
- `--layer L`: Restrict to specific layer (L0|L1|L2|L3)
|
|
23
|
+
- `--strategy conservative|aggressive|surgical|reflective`: Override starting strategy
|
|
24
|
+
- `--dry-run`: Generate test plan only, no execution
|
|
81
25
|
- `--re-run`: Re-run only previously failed/blocked scenarios
|
|
82
26
|
|
|
83
|
-
**Intelligent routing** (auto-detected
|
|
27
|
+
**Intelligent routing** (auto-detected):
|
|
84
28
|
|
|
85
29
|
| Priority | Condition | Route |
|
|
86
30
|
|----------|-----------|-------|
|
|
87
|
-
| 1 | Active session (state.json
|
|
88
|
-
| 2 | --re-run
|
|
89
|
-
| 3 |
|
|
90
|
-
| 4 |
|
|
31
|
+
| 1 | Active session (state.json running) | Resume |
|
|
32
|
+
| 2 | --re-run + previous failures | Re-run |
|
|
33
|
+
| 3 | REQ-*.md exists | spec (PRD-forward) |
|
|
34
|
+
| 4 | verification.json has gaps | gap (coverage-forward) |
|
|
91
35
|
| 5 | Default | code (exploration-forward) |
|
|
92
36
|
|
|
93
|
-
**Session
|
|
94
|
-
**
|
|
37
|
+
**Session**: `.tests/auto-test/.csv-session/`
|
|
38
|
+
**Output**: scenarios.csv, results.csv, discoveries.ndjson, report.json, state.json, reflection-log.md
|
|
95
39
|
</context>
|
|
96
40
|
|
|
97
41
|
<csv_schema>
|
|
98
42
|
|
|
99
|
-
### scenarios.csv (
|
|
43
|
+
### scenarios.csv (Test Writing Phase)
|
|
100
44
|
|
|
101
45
|
```csv
|
|
102
46
|
id,name,layer,priority,category,target_file,test_file,description,test_cases,fixtures,req_ref,infrastructure_hints,prev_context,status,red_result,findings,error
|
|
103
|
-
"AT-001","Auth token validation","L1","critical","api_contract","src/auth/token.ts","src/auth/__tests__/token.test.ts","Validate JWT
|
|
104
|
-
"AT-002","Login endpoint integration","L2","high","business_rule","src/routes/login.ts","src/routes/__tests__/login.integration.test.ts","POST /api/login returns JWT on valid credentials","valid login returns 200+token;invalid password returns 401;missing email returns 400","user_fixture;credentials_fixture","REQ-002:AC-1","supertest;see src/routes/__tests__/health.test.ts","AT-001 findings: token module exports verifyToken/generateToken","","","",""
|
|
47
|
+
"AT-001","Auth token validation","L1","critical","api_contract","src/auth/token.ts","src/auth/__tests__/token.test.ts","Validate JWT verification","verify valid;verify expired;verify malformed","valid_token;expired_token","REQ-001:AC-1","vitest;describe/it;see hash.test.ts","","","","",""
|
|
105
48
|
```
|
|
106
49
|
|
|
107
|
-
**
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
| `layer` | Input | L1/L2/L3 (determines wave order) |
|
|
114
|
-
| `priority` | Input | critical/high/medium |
|
|
115
|
-
| `category` | Input | api_contract/business_rule/state_transition/user_flow/... |
|
|
116
|
-
| `target_file` | Input | Source file being tested |
|
|
117
|
-
| `test_file` | Input | Target test file path to create |
|
|
118
|
-
| `description` | Input | What this scenario validates |
|
|
119
|
-
| `test_cases` | Input | Semicolon-separated test cases |
|
|
120
|
-
| `fixtures` | Input | Required fixtures/mocks (semicolon-separated) |
|
|
121
|
-
| `req_ref` | Input | Requirement reference (REQ-NNN:AC-N or gap-id or empty) |
|
|
122
|
-
| `infrastructure_hints` | Input | Framework + pattern references from Step 3 |
|
|
123
|
-
| `prev_context` | Computed | Findings from prior layer scenarios (cross-layer propagation) |
|
|
124
|
-
| `status` | Output | pending → written → passed → failed → blocked |
|
|
125
|
-
| `red_result` | Output | expected_fail / unexpected_fail / pass (RED phase result) |
|
|
126
|
-
| `findings` | Output | Implementation notes, patterns discovered (max 500 chars) |
|
|
127
|
-
| `error` | Output | Error message if failed |
|
|
128
|
-
|
|
129
|
-
### diagnosis.csv (Iteration Phase — Failure Diagnosis)
|
|
50
|
+
**scenarios.csv column semantics**:
|
|
51
|
+
- Input: id (AT-NNN), name, layer (L1/L2/L3 = wave order), priority (critical/high/medium), category (api_contract/business_rule/state_transition/user_flow/...), target_file (source file tested), test_file (test file path to create), description (what scenario validates), test_cases (semicolon-sep, each -> one it() block), fixtures (required mocks/fixtures, semicolon-sep), req_ref (REQ-NNN:AC-N or gap-id or empty), infrastructure_hints (framework + pattern refs from infra discovery)
|
|
52
|
+
- Computed: prev_context (findings from prior layer scenarios, cross-layer propagation)
|
|
53
|
+
- Output: status (pending->written->passed->failed->blocked), red_result (expected_fail/unexpected_fail/pass), findings (patterns discovered, notes for dependents, max 500 chars), error
|
|
54
|
+
|
|
55
|
+
### diagnosis.csv (Iteration Phase)
|
|
130
56
|
|
|
131
57
|
```csv
|
|
132
58
|
id,scenario_id,layer,test_file,error_detail,expected,actual,target_file,source_context,classification,fix_code,evidence,error
|
|
133
|
-
"DX-001","AT-003","L1","
|
|
134
|
-
"DX-002","AT-005","L2","src/routes/__tests__/login.test.ts","Expected 200, received 500","POST /login returns 200 with valid credentials","Internal server error: database connection refused","src/routes/login.ts","login.ts calls UserModel.findByEmail","env_issue","","Database not available in test environment",""
|
|
59
|
+
"DX-001","AT-003","L1","token.test.ts","TypeError: not a function","verifyToken returns payload","Not exported","token.ts","exports: generateToken only","test_defect","fix import path","verify-token.ts:15",""
|
|
135
60
|
```
|
|
136
61
|
|
|
137
|
-
**
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|--------|-------|-------------|
|
|
141
|
-
| `id` | Input | Diagnosis ID (DX-NNN) |
|
|
142
|
-
| `scenario_id` | Input | Reference to AT-NNN scenario |
|
|
143
|
-
| `layer` | Input | Layer where failure occurred |
|
|
144
|
-
| `test_file` | Input | Test file that failed |
|
|
145
|
-
| `error_detail` | Input | Full error message/stack trace excerpt |
|
|
146
|
-
| `expected` | Input | Expected behavior from scenario |
|
|
147
|
-
| `actual` | Input | Actual behavior observed |
|
|
148
|
-
| `target_file` | Input | Source file being tested |
|
|
149
|
-
| `source_context` | Input | Relevant source code context (exports, imports) |
|
|
150
|
-
| `classification` | Output | test_defect / code_defect / env_issue |
|
|
151
|
-
| `fix_code` | Output | Fix diff for test_defect (old → new) |
|
|
152
|
-
| `evidence` | Output | file:line references for diagnosis |
|
|
153
|
-
| `error` | Output | Agent error if diagnosis failed |
|
|
154
|
-
|
|
155
|
-
### Session Structure
|
|
156
|
-
|
|
157
|
-
```
|
|
158
|
-
.tests/auto-test/.csv-session/
|
|
159
|
-
+-- scenarios.csv (master state)
|
|
160
|
-
+-- results.csv (final export)
|
|
161
|
-
+-- discoveries.ndjson (shared across iterations)
|
|
162
|
-
+-- layer-L{N}-write.csv (temporary, per-layer write input)
|
|
163
|
-
+-- layer-L{N}-write-results.csv
|
|
164
|
-
+-- diagnosis-iter-{N}.csv (temporary, per-iteration diagnosis)
|
|
165
|
-
+-- diagnosis-iter-{N}-results.csv
|
|
166
|
-
```
|
|
62
|
+
**diagnosis.csv column semantics**:
|
|
63
|
+
- Input: id (DX-NNN), scenario_id (ref to AT-NNN), layer, test_file, error_detail (full error/stack excerpt), expected (from scenario), actual (observed behavior), target_file, source_context (relevant code: exports, imports)
|
|
64
|
+
- Output: classification (test_defect/code_defect/env_issue), fix_code (for test_defect: "old → new" or full replacement; empty for code_defect/env_issue), evidence (file:line references), error
|
|
167
65
|
</csv_schema>
|
|
168
66
|
|
|
169
67
|
<invariants>
|
|
170
|
-
1. **
|
|
171
|
-
2. **
|
|
172
|
-
3. **
|
|
173
|
-
4. **
|
|
174
|
-
5. **
|
|
175
|
-
6. **
|
|
176
|
-
7. **
|
|
177
|
-
8. **
|
|
178
|
-
9. **Convergence Threshold**: 95% pass rate = converged
|
|
179
|
-
10. **DO NOT STOP**: Continuous execution until convergence, max_iter, or all remaining = code_defect
|
|
68
|
+
1. **Layer order sacred**: Never execute L(N+1) before L(N) completes (fail-fast on critical)
|
|
69
|
+
2. **CSV is source of truth**: Master scenarios.csv holds all state
|
|
70
|
+
3. **Context propagation**: prev_context from prior-layer findings in CSV
|
|
71
|
+
4. **Discovery board append-only**: Never modify/delete discoveries.ndjson
|
|
72
|
+
5. **Route auto-detected**: Read state, never ask user for mode
|
|
73
|
+
6. **RED-GREEN methodology**: Tests target real behavior; failing test = bug discovery (never fix source)
|
|
74
|
+
7. **Max 3 inner fix attempts**: Per layer, fix test_defects up to 3 times via diagnosis CSV
|
|
75
|
+
8. **Convergence threshold**: 95% pass rate = converged
|
|
180
76
|
</invariants>
|
|
181
77
|
|
|
182
|
-
<
|
|
183
|
-
|
|
184
|
-
### Session Initialization
|
|
185
|
-
|
|
186
|
-
```
|
|
187
|
-
Parse from $ARGUMENTS:
|
|
188
|
-
AUTO_YES ← --yes | -y
|
|
189
|
-
maxConcurrency ← --concurrency | -c N (default: 5)
|
|
190
|
-
MAX_ITER ← --max-iter N (default: 5)
|
|
191
|
-
layerFilter ← --layer L (default: null = all)
|
|
192
|
-
startStrategy ← --strategy conservative|aggressive|surgical|reflective (default: null = auto)
|
|
193
|
-
dryRun ← --dry-run
|
|
194
|
-
reRun ← --re-run
|
|
195
|
-
phaseArg ← remaining text
|
|
196
|
-
|
|
197
|
-
Derive:
|
|
198
|
-
dateStr ← UTC+8 YYYYMMDD
|
|
199
|
-
sessionFolder ← ".tests/auto-test/.csv-session"
|
|
200
|
-
|
|
201
|
-
mkdir -p {sessionFolder}
|
|
202
|
-
```
|
|
203
|
-
|
|
204
|
-
### Phase 1: Route & Plan → CSV
|
|
205
|
-
|
|
206
|
-
#### Step 0: Parse & Load
|
|
207
|
-
|
|
208
|
-
Resolve phase dir from `state.json` artifact registry (`type='execute'`, matching phase). Error E002 if not found.
|
|
209
|
-
|
|
210
|
-
```
|
|
211
|
-
specs_test = maestro spec load --category test
|
|
212
|
-
specs_arch = maestro spec load --category arch
|
|
213
|
-
```
|
|
214
|
-
|
|
215
|
-
#### Step 1: Read State & Route
|
|
216
|
-
|
|
217
|
-
```
|
|
218
|
-
Priority: Resume > Re-run > Spec > Gap > Code
|
|
219
|
-
|
|
220
|
-
1. RESUME: .csv-session/scenarios.csv exists AND state.json status == "running"
|
|
221
|
-
→ offer resume or restart (resume = reload CSV, jump to current iteration)
|
|
222
|
-
|
|
223
|
-
2. RE-RUN: --re-run flag AND report.json has failed/blocked scenarios
|
|
224
|
-
→ load failed scenarios into CSV with status reset to pending
|
|
225
|
-
|
|
226
|
-
3. SPEC: .workflow/.spec/SPEC-*/requirements/REQ-*.md exists
|
|
227
|
-
→ ROUTE = "spec", SPEC_MODE = "full" | "degraded"
|
|
228
|
-
|
|
229
|
-
4. GAP: verification.json has gaps[] (MISSING/PARTIAL)
|
|
230
|
-
→ ROUTE = "gap"
|
|
231
|
-
|
|
232
|
-
5. CODE: Default fallback → ROUTE = "code"
|
|
233
|
-
```
|
|
234
|
-
|
|
235
|
-
#### Step 2: Source Scenarios
|
|
236
|
-
|
|
237
|
-
Execute route-specific extraction, normalize to unified format.
|
|
238
|
-
|
|
239
|
-
**Route A: spec** — Parse REQ acceptance criteria, classify layers, generate fixtures.
|
|
240
|
-
**Route B: gap** — Read verification/coverage gaps, classify files by type.
|
|
241
|
-
**Route C: code** — Explore module boundaries, API endpoints, integration points.
|
|
242
|
-
|
|
243
|
-
All routes produce unified scenario objects (see csv_schema).
|
|
244
|
-
|
|
245
|
-
#### Step 3: Discover Infrastructure
|
|
246
|
-
|
|
247
|
-
Detect framework, read 2-3 existing tests for patterns. Build `infrastructure_hints` string per scenario.
|
|
248
|
-
|
|
249
|
-
#### Step 4: Build scenarios.csv & Confirm
|
|
78
|
+
<state_machine>
|
|
250
79
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
80
|
+
<states>
|
|
81
|
+
S_PARSE -- 解析参数、路由检测 PERSIST: --
|
|
82
|
+
S_SOURCE -- 提取场景(spec/gap/code route) PERSIST: --
|
|
83
|
+
S_INFRA -- 发现测试基础设施(framework/patterns) PERSIST: --
|
|
84
|
+
S_CSV_GEN -- 生成 scenarios.csv PERSIST: scenarios.csv
|
|
85
|
+
S_L0 -- Static analysis (tsc + eslint, no CSV) PERSIST: L0 results
|
|
86
|
+
S_LAYER_EXEC -- Per-layer write + run (L1->L2->L3) PERSIST: test files + scenarios.csv
|
|
87
|
+
S_ITERATE -- Diagnose failures + fix loop PERSIST: diagnosis CSV + reflection-log.md
|
|
88
|
+
S_REPORT -- 输出报告、路由下一步 PERSIST: report.json + state.json + results.csv
|
|
89
|
+
</states>
|
|
254
90
|
|
|
255
|
-
|
|
256
|
-
```
|
|
257
|
-
=== AUTO-TEST PLAN ===
|
|
258
|
-
来源: {ROUTE} | 阶段: {phase_name} | Spec: {spec_ref or "N/A"}
|
|
91
|
+
<transitions>
|
|
259
92
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
L2 Integration: {N} scenarios ({X} critical, {Y} high)
|
|
263
|
-
L3 E2E: {N} scenarios ({X} critical, {Y} high)
|
|
93
|
+
S_PARSE:
|
|
94
|
+
-> S_SOURCE DO: resolve phase dir, detect route (resume/re-run/spec/gap/code)
|
|
264
95
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
96
|
+
S_SOURCE:
|
|
97
|
+
-> S_INFRA DO: extract scenarios per route, normalize to unified format
|
|
98
|
+
Route A (spec): Parse REQ-*.md acceptance criteria, classify layers, generate fixtures
|
|
99
|
+
Route B (gap): Read verification/coverage gaps, classify files by type
|
|
100
|
+
Route C (code): Explore module boundaries, API endpoints, integration points
|
|
268
101
|
|
|
269
|
-
|
|
102
|
+
S_INFRA:
|
|
103
|
+
-> S_CSV_GEN DO: detect framework, read 2-3 existing tests, build infrastructure_hints
|
|
270
104
|
|
|
271
|
-
|
|
105
|
+
S_CSV_GEN:
|
|
106
|
+
-> S_L0 DO: build scenarios.csv, set cross-layer prev_context
|
|
107
|
+
-> END WHEN: --dry-run (plan only)
|
|
272
108
|
|
|
273
|
-
|
|
109
|
+
S_L0:
|
|
110
|
+
-> S_LAYER_EXEC WHEN: L0 passes
|
|
111
|
+
-> END WHEN: L0 fails (stop, do not proceed)
|
|
274
112
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
113
|
+
S_LAYER_EXEC:
|
|
114
|
+
-> S_ITERATE WHEN: failures exist AND max_iter > 1 DO: A_PER_LAYER_WRITE_RUN
|
|
115
|
+
-> S_REPORT WHEN: all pass OR max_iter == 1 DO: A_PER_LAYER_WRITE_RUN
|
|
278
116
|
|
|
279
|
-
|
|
117
|
+
S_ITERATE:
|
|
118
|
+
-> S_REPORT WHEN: converged (>=95%) OR max_iter reached OR all remaining = code_defect
|
|
119
|
+
-> S_ITERATE WHEN: more iterations needed DO: A_ITERATE_LOOP
|
|
280
120
|
|
|
281
|
-
|
|
121
|
+
S_REPORT:
|
|
122
|
+
-> END DO: A_REPORT
|
|
282
123
|
|
|
283
|
-
|
|
124
|
+
</transitions>
|
|
284
125
|
|
|
285
|
-
|
|
126
|
+
<actions>
|
|
286
127
|
|
|
287
|
-
|
|
128
|
+
### A_PER_LAYER_WRITE_RUN
|
|
288
129
|
|
|
289
|
-
|
|
130
|
+
For each layer L1->L3 (sequential, respecting --layer filter):
|
|
290
131
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
output_csv_path: `${sessionFolder}/layer-L${N}-write-results.csv`,
|
|
299
|
-
output_schema: { id, status: [written|failed], red_result: [expected_fail|unexpected_fail|pass], findings, error }
|
|
300
|
-
})
|
|
301
|
-
```
|
|
132
|
+
1. Extract layer rows from scenarios.csv (status==pending)
|
|
133
|
+
2. Populate prev_context from completed prior-layer findings in master CSV
|
|
134
|
+
3. Write layer-L{N}-write.csv -> `spawn_agents_on_csv` for parallel test writing
|
|
135
|
+
4. Merge write-results -> scenarios.csv
|
|
136
|
+
5. Run full layer test suite: `{run_command} --testPathPattern="{layer_pattern}"`
|
|
137
|
+
6. Record per-scenario pass/fail
|
|
138
|
+
7. Fail-fast: any critical-priority failed -> stop layer progression
|
|
302
139
|
|
|
303
|
-
**Test Writer Agent Instruction** (
|
|
140
|
+
**Test Writer Agent Instruction** (injected into spawn_agents_on_csv):
|
|
304
141
|
```
|
|
305
142
|
You are a test writer. Write ONE test file for the given scenario.
|
|
306
143
|
|
|
307
144
|
## Task
|
|
308
|
-
- Read
|
|
145
|
+
- Read target_file to understand module under test
|
|
309
146
|
- Write test file at test_file path following infrastructure_hints patterns
|
|
310
|
-
- Each
|
|
147
|
+
- Each test_case in test_cases -> one it() block
|
|
311
148
|
- Use fixtures from fixtures column (infer from source if empty)
|
|
312
149
|
- Include scenario id in describe: describe("AT-NNN: {name}", ...)
|
|
313
|
-
- Run
|
|
150
|
+
- Run test file once after writing
|
|
314
151
|
|
|
315
152
|
## RED-GREEN Rules
|
|
316
|
-
-
|
|
317
|
-
-
|
|
318
|
-
-
|
|
153
|
+
- Test PASSES immediately: note "pass" — may need strengthening
|
|
154
|
+
- Test FAILS as expected (tests real behavior): note "expected_fail" — good
|
|
155
|
+
- Test FAILS unexpectedly (setup/import error): fix test setup, note "unexpected_fail"
|
|
319
156
|
- NEVER modify source code — only write/fix test files
|
|
320
157
|
|
|
321
158
|
## Output
|
|
322
|
-
- status: "written" if
|
|
159
|
+
- status: "written" if created, "failed" if unable
|
|
323
160
|
- red_result: the RED phase outcome
|
|
324
161
|
- findings: patterns discovered, notes for dependent scenarios (max 500 chars)
|
|
325
162
|
- error: only if status == "failed"
|
|
@@ -330,224 +167,89 @@ You are a test writer. Write ONE test file for the given scenario.
|
|
|
330
167
|
- Append to discoveries.ndjson if you find reusable patterns
|
|
331
168
|
```
|
|
332
169
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
**5. Run full layer test suite:**
|
|
336
|
-
```bash
|
|
337
|
-
{run_command} --testPathPattern="{layer_pattern}"
|
|
338
|
-
```
|
|
339
|
-
|
|
340
|
-
**6. Record per-scenario results** (pass/fail/blocked with error_detail)
|
|
341
|
-
|
|
342
|
-
**7. Fail-fast check:** If ANY critical-priority scenario failed → do NOT proceed to next layer
|
|
343
|
-
|
|
344
|
-
**If `--max-iter 1`:** After all layers written and run once, jump to Phase 4 (single-pass).
|
|
170
|
+
### A_ITERATE_LOOP
|
|
345
171
|
|
|
346
|
-
### Phase 3: Iteration Engine (Diagnose + Fix)
|
|
347
|
-
|
|
348
|
-
```
|
|
349
172
|
OUTER LOOP (max_iter iterations):
|
|
173
|
+
FOR each layer with failures:
|
|
174
|
+
INNER LOOP (max 3):
|
|
175
|
+
1. Build diagnosis.csv from failed scenarios (exclude code_defect)
|
|
176
|
+
2. `spawn_agents_on_csv` for parallel diagnosis
|
|
177
|
+
3. Diagnosis agent (see instruction below). test_defect -> provide fix. code_defect -> document evidence.
|
|
178
|
+
4. Apply test_defect fixes, re-run layer
|
|
350
179
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
INNER LOOP (max 3 per layer):
|
|
354
|
-
|
|
355
|
-
1. Build diagnosis.csv from failed scenarios in master CSV
|
|
356
|
-
(only scenarios with status=failed AND classification != code_defect)
|
|
357
|
-
|
|
358
|
-
2. IF diagnosis rows >= 1:
|
|
359
|
-
spawn_agents_on_csv({
|
|
360
|
-
csv_path: `${sessionFolder}/diagnosis-iter-${iter}.csv`,
|
|
361
|
-
id_column: "id",
|
|
362
|
-
instruction: buildDiagnosisInstruction(infrastructure),
|
|
363
|
-
max_concurrency: maxConcurrency,
|
|
364
|
-
max_runtime_seconds: 1200,
|
|
365
|
-
output_csv_path: `${sessionFolder}/diagnosis-iter-${iter}-results.csv`,
|
|
366
|
-
output_schema: { id, classification, fix_code, evidence, error }
|
|
367
|
-
})
|
|
368
|
-
|
|
369
|
-
3. Merge diagnosis results:
|
|
370
|
-
- test_defect with fix_code → apply fix, update scenario status to "pending"
|
|
371
|
-
- code_defect → mark as confirmed failure (stop retrying)
|
|
372
|
-
- env_issue → mark as blocked
|
|
373
|
-
|
|
374
|
-
4. Re-run ALL scenarios in this layer (catch regressions)
|
|
375
|
-
5. IF no test_defects remain: break inner loop
|
|
376
|
-
|
|
377
|
-
END INNER
|
|
378
|
-
|
|
379
|
-
Record final layer results
|
|
380
|
-
IF critical code_defects: stop layer progression (fail-fast)
|
|
381
|
-
|
|
382
|
-
END FOR
|
|
383
|
-
|
|
384
|
-
REFLECT:
|
|
385
|
-
Analyze: pass rate delta, failure clusters, strategy effectiveness
|
|
386
|
-
Append to reflection-log.md
|
|
387
|
-
|
|
388
|
-
**Test confidence scoring** (at each REFLECT step):
|
|
389
|
-
Dimensions (5): scenario_coverage, test_quality, diagnostic_accuracy, strategy_effectiveness, infrastructure_fitness. Factors (weights): completeness(.30), pass_rate_trend(.25), classification_accuracy(.20), coverage_breadth(.15), consistency(.10). Enhanced convergence: BOTH pass_rate ≥ threshold AND confidence ≥ 60%. Add confidence to `report.json`.
|
|
390
|
-
|
|
391
|
-
ADJUST (Adaptive Strategy):
|
|
392
|
-
IF startStrategy provided AND iteration == 1: use startStrategy as initial
|
|
393
|
-
OTHERWISE auto-select:
|
|
394
|
-
|
|
395
|
-
| Condition | Strategy |
|
|
396
|
-
|-----------|----------|
|
|
397
|
-
| Iteration 1-2 | Conservative: fix obvious test_defects only |
|
|
398
|
-
| Pass rate >80% | Aggressive: batch-fix related failures |
|
|
399
|
-
| New regressions | Surgical: revert, fix regression only |
|
|
400
|
-
| Stuck 3+ iters | Reflective: re-analyze root cause pattern |
|
|
401
|
-
|
|
402
|
-
CONVERGENCE:
|
|
403
|
-
pass_rate >= 95% → Phase 4 (converged)
|
|
404
|
-
iteration >= max_iter → Phase 4 (max_iter_reached)
|
|
405
|
-
all remaining = code_defect → Phase 4 (confirmed_defects)
|
|
406
|
-
ELSE → next iteration
|
|
407
|
-
|
|
408
|
-
END OUTER
|
|
409
|
-
```
|
|
410
|
-
|
|
411
|
-
**Diagnosis Agent Instruction** (per failure row):
|
|
180
|
+
**Diagnosis Agent Instruction** (injected into spawn_agents_on_csv):
|
|
412
181
|
```
|
|
413
|
-
You are a test failure diagnostician. Classify ONE test failure
|
|
182
|
+
You are a test failure diagnostician. Classify ONE test failure.
|
|
414
183
|
|
|
415
184
|
## Task
|
|
416
|
-
- Read test_file and target_file to understand
|
|
185
|
+
- Read test_file and target_file to understand failure context
|
|
417
186
|
- Analyze error_detail against expected vs actual
|
|
418
|
-
- Classify
|
|
419
|
-
- test_defect: Test
|
|
420
|
-
- code_defect: Source
|
|
187
|
+
- Classify:
|
|
188
|
+
- test_defect: Test wrong (bad import, wrong endpoint, bad fixture, incorrect assertion)
|
|
189
|
+
- code_defect: Source violates business rule (actual != expected requirement)
|
|
421
190
|
- env_issue: Environment problem (service down, config missing, timeout)
|
|
422
191
|
|
|
423
192
|
## Output
|
|
424
|
-
- classification:
|
|
425
|
-
- fix_code: If test_defect
|
|
426
|
-
|
|
427
|
-
-
|
|
428
|
-
- error: only if you cannot determine classification
|
|
193
|
+
- classification: test_defect / code_defect / env_issue
|
|
194
|
+
- fix_code: If test_defect: "old_line → new_line" or full replacement. Empty for others.
|
|
195
|
+
- evidence: file:line references supporting classification
|
|
196
|
+
- error: only if cannot determine
|
|
429
197
|
|
|
430
198
|
## Rules
|
|
431
|
-
- NEVER suggest source code changes — only test
|
|
432
|
-
-
|
|
433
|
-
- When uncertain
|
|
199
|
+
- NEVER suggest source code changes — only test fixes for test_defect
|
|
200
|
+
- Test correctly catching a real bug = code_defect, not test_defect
|
|
201
|
+
- When uncertain: prefer code_defect (conservative)
|
|
434
202
|
```
|
|
203
|
+
5. If no test_defects remain: break inner
|
|
204
|
+
REFLECT: analyze trends, log strategy, test confidence scoring (5 dims: scenario_coverage, test_quality, diagnostic_accuracy, strategy_effectiveness, infrastructure_fitness)
|
|
205
|
+
ADJUST: auto-select strategy (conservative iter 1-2, aggressive >80%, surgical on regression, reflective stuck 3+)
|
|
206
|
+
CONVERGENCE: >=95% -> report; max_iter -> report; all code_defect -> report
|
|
435
207
|
|
|
436
|
-
###
|
|
437
|
-
|
|
438
|
-
| Type | Dedup Key | Data Schema |
|
|
439
|
-
|------|-----------|-------------|
|
|
440
|
-
| `test_pattern` | `data.name` | `{name, file, description}` |
|
|
441
|
-
| `mock_setup` | `data.target` | `{target, setup_code, file}` |
|
|
442
|
-
| `fixture` | `data.name` | `{name, schema, file}` |
|
|
443
|
-
| `convention` | singleton | `{describe_style, assertion_lib, import_pattern}` |
|
|
444
|
-
| `blocker` | `data.issue` | `{issue, severity, layer}` |
|
|
445
|
-
|
|
446
|
-
Read before writing tests. Append-only. Dedup by type+key.
|
|
447
|
-
|
|
448
|
-
### Phase 4: Results & Routing
|
|
449
|
-
|
|
450
|
-
1. Export final `scenarios.csv` as `results.csv`
|
|
451
|
-
|
|
452
|
-
2. Write `.tests/auto-test/state.json`:
|
|
453
|
-
```json
|
|
454
|
-
{
|
|
455
|
-
"session_id": "auto-test-{YYYYMMDD-HHmmss}",
|
|
456
|
-
"phase": "{phase}", "phase_dir": "{PHASE_DIR}",
|
|
457
|
-
"source_route": "spec|gap|code|re-run",
|
|
458
|
-
"status": "converged|max_iter_reached|confirmed_defects|single_pass",
|
|
459
|
-
"iteration": N, "strategy": "conservative",
|
|
460
|
-
"strategy_history": [...],
|
|
461
|
-
"threshold": 95, "current_layer": "L2",
|
|
462
|
-
"layer_state": {
|
|
463
|
-
"L0": { "inner_iter": 1, "pass_rate": 100.0, "status": "passed" },
|
|
464
|
-
"L1": { "inner_iter": 2, "pass_rate": 95.0, "status": "passed" },
|
|
465
|
-
"L2": { ... }, "L3": { ... }
|
|
466
|
-
},
|
|
467
|
-
"pass_rate_history": [...],
|
|
468
|
-
"scenario_count": 30,
|
|
469
|
-
"csv_session": ".tests/auto-test/.csv-session/"
|
|
470
|
-
}
|
|
471
|
-
```
|
|
472
|
-
|
|
473
|
-
3. Write `.tests/auto-test/report.json` (same schema as workflow reference)
|
|
208
|
+
### A_REPORT
|
|
474
209
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
```
|
|
210
|
+
1. Export results.csv
|
|
211
|
+
2. Write state.json + report.json (with confidence section)
|
|
212
|
+
3. Conditional: traceability.md (spec route), issue creation (code_defect -> issues.jsonl)
|
|
213
|
+
4. Register artifact in state.json (type: test)
|
|
214
|
+
5. Display summary: route, iterations, convergence status, per-layer pass rates, bugs discovered
|
|
215
|
+
6. Route: converged -> maestro-verify; bugs -> quality-debug; >80% -> quality-test; <80% -> quality-debug; single pass all pass -> quality-test
|
|
482
216
|
|
|
483
|
-
|
|
484
|
-
```json
|
|
485
|
-
{ "id": "TST-NNN", "type": "test", "status": "completed|failed" }
|
|
486
|
-
```
|
|
217
|
+
</actions>
|
|
487
218
|
|
|
488
|
-
|
|
489
|
-
```
|
|
490
|
-
=== AUTO-TEST RESULTS ===
|
|
491
|
-
阶段: {phase_name}
|
|
492
|
-
来源: {ROUTE}
|
|
493
|
-
迭代: {N} (策略: {strategy_history})
|
|
494
|
-
收敛: {status} ({final_pass_rate}%)
|
|
495
|
-
|
|
496
|
-
层级结果:
|
|
497
|
-
L0 Static: {pass_rate}% ({passed}/{total})
|
|
498
|
-
L1 Unit/API: {pass_rate}% ({passed}/{total})
|
|
499
|
-
L2 Integration: {pass_rate}% ({passed}/{total})
|
|
500
|
-
L3 E2E: {pass_rate}% ({passed}/{total})
|
|
501
|
-
|
|
502
|
-
场景: {passed} passed, {failed} failed, {blocked} blocked
|
|
503
|
-
Bugs: {N} discovered
|
|
504
|
-
{IF spec: "需求覆盖: {pct}% | 已验证: {n}/{total}"}
|
|
505
|
-
|
|
506
|
-
CSV Session: .tests/auto-test/.csv-session/
|
|
507
|
-
```
|
|
219
|
+
</state_machine>
|
|
508
220
|
|
|
509
|
-
|
|
221
|
+
<discovery_board>
|
|
510
222
|
|
|
511
|
-
|
|
|
512
|
-
|
|
513
|
-
|
|
|
514
|
-
|
|
|
515
|
-
|
|
|
516
|
-
|
|
|
517
|
-
|
|
|
518
|
-
| Coverage still low | `$quality-auto-test "{phase} --layer {missing}"` |
|
|
519
|
-
| Re-run all pass | `$maestro-verify "{phase}"` |
|
|
520
|
-
| Single pass, all pass | `$quality-test "{phase}"` |
|
|
223
|
+
| Type | Dedup Key | Data |
|
|
224
|
+
|------|-----------|------|
|
|
225
|
+
| test_pattern | data.name | {name, file, description} |
|
|
226
|
+
| mock_setup | data.target | {target, setup_code, file} |
|
|
227
|
+
| fixture | data.name | {name, schema, file} |
|
|
228
|
+
| convention | singleton | {describe_style, assertion_lib, import_pattern} |
|
|
229
|
+
| blocker | data.issue | {issue, severity, layer} |
|
|
521
230
|
|
|
522
|
-
|
|
231
|
+
Protocol: read before writing tests, append-only, dedup by type+key.
|
|
232
|
+
</discovery_board>
|
|
523
233
|
|
|
524
234
|
<error_codes>
|
|
525
|
-
|
|
|
526
|
-
|
|
527
|
-
| Phase not found in artifact registry | Abort: "Phase
|
|
528
|
-
| No test framework detected | Abort: E003
|
|
529
|
-
| Agent spawn fails
|
|
235
|
+
| Condition | Recovery |
|
|
236
|
+
|-----------|----------|
|
|
237
|
+
| Phase not found in artifact registry | Abort: "Phase not found" |
|
|
238
|
+
| No test framework detected | Abort: E003, install framework |
|
|
239
|
+
| Agent spawn fails | Retry once, then mark scenario blocked |
|
|
530
240
|
| Convergence not met after max_iter | Report max_iter_reached, suggest debug |
|
|
531
241
|
| All scenarios in layer blocked | Stop layer, report env_issue |
|
|
532
|
-
| CSV parse error | Validate format, show line |
|
|
533
|
-
| discoveries.ndjson corrupt | Ignore malformed lines, continue |
|
|
534
242
|
| Resume: no session found | Start fresh |
|
|
535
243
|
</error_codes>
|
|
536
244
|
|
|
537
245
|
<success_criteria>
|
|
538
|
-
- [ ] Session folder created with valid scenarios.csv
|
|
539
246
|
- [ ] Route auto-selected from project state (spec/gap/code)
|
|
540
|
-
- [ ]
|
|
541
|
-
- [ ] Test writing parallelized via spawn_agents_on_csv
|
|
542
|
-
- [ ]
|
|
543
|
-
- [ ]
|
|
544
|
-
- [ ]
|
|
545
|
-
- [ ] Iteration engine ran (inner: test_defect fix, outer: strategy adjust)
|
|
546
|
-
- [ ] Test confidence scored per iteration with 5-dimension factor model
|
|
547
|
-
- [ ] Convergence check includes confidence >= 60% alongside pass_rate
|
|
548
|
-
- [ ] Confidence section added to report.json
|
|
247
|
+
- [ ] Layers executed in order with fail-fast on critical
|
|
248
|
+
- [ ] Test writing + diagnosis parallelized via spawn_agents_on_csv
|
|
249
|
+
- [ ] Cross-layer context propagation via prev_context
|
|
250
|
+
- [ ] Iteration engine: inner test_defect fix, outer strategy adjust
|
|
251
|
+
- [ ] Test confidence scored per iteration (5-dimension model)
|
|
549
252
|
- [ ] state.json, report.json, reflection-log.md written
|
|
550
|
-
- [ ] If spec: traceability.md
|
|
551
|
-
- [ ] If failures: issues auto-created in issues.jsonl
|
|
552
|
-
- [ ] Next step routed based on convergence status
|
|
253
|
+
- [ ] If spec: traceability.md; if failures: issues auto-created
|
|
553
254
|
</success_criteria>
|
|
255
|
+
</output>
|