create-byan-agent 2.19.2 → 2.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/CHANGELOG.md +148 -0
  2. package/README.md +4 -4
  3. package/install/src/byan-v2/generation/templates/default-agent.md +1 -1
  4. package/install/templates/.claude/CLAUDE.md +1 -1
  5. package/install/templates/.claude/hooks/fd-phase-guard.js +2 -2
  6. package/install/templates/.claude/hooks/mantra-validate.js +16 -8
  7. package/install/templates/.claude/hooks/strict-scope-guard.js +25 -7
  8. package/install/templates/.claude/rules/native-workflows.md +32 -0
  9. package/install/templates/.claude/skills/byan-byan/SKILL.md +5 -5
  10. package/install/templates/.claude/skills/byan-mantra-audit/SKILL.md +53 -0
  11. package/install/templates/.claude/skills/byan-merise-agile/SKILL.md +2 -2
  12. package/install/templates/.claude/skills/byan-native-dev-story/SKILL.md +83 -0
  13. package/install/templates/.claude/workflows/INDEX.md +35 -0
  14. package/install/templates/.claude/workflows/check-implementation-readiness.js +280 -0
  15. package/install/templates/.claude/workflows/code-review.js +179 -0
  16. package/install/templates/.claude/workflows/create-excalidraw-dataflow.js +214 -0
  17. package/install/templates/.claude/workflows/create-excalidraw-diagram.js +188 -0
  18. package/install/templates/.claude/workflows/create-excalidraw-flowchart.js +225 -0
  19. package/install/templates/.claude/workflows/create-excalidraw-wireframe.js +192 -0
  20. package/install/templates/.claude/workflows/create-story.js +216 -0
  21. package/install/templates/.claude/workflows/dev-story.js +100 -0
  22. package/install/templates/.claude/workflows/document-project.js +455 -0
  23. package/install/templates/.claude/workflows/qa-automate.js +169 -0
  24. package/install/templates/.claude/workflows/quick-dev.js +273 -0
  25. package/install/templates/.claude/workflows/sprint-planning.js +261 -0
  26. package/install/templates/.claude/workflows/testarch-atdd.js +287 -0
  27. package/install/templates/.claude/workflows/testarch-automate.js +229 -0
  28. package/install/templates/.claude/workflows/testarch-ci.js +184 -0
  29. package/install/templates/.claude/workflows/testarch-framework.js +267 -0
  30. package/install/templates/.claude/workflows/testarch-nfr.js +316 -0
  31. package/install/templates/.claude/workflows/testarch-test-design.js +293 -0
  32. package/install/templates/.claude/workflows/testarch-test-review.js +321 -0
  33. package/install/templates/.claude/workflows/testarch-trace.js +316 -0
  34. package/install/templates/.githooks/pre-commit +49 -15
  35. package/install/templates/_byan/config.yaml +15 -5
  36. package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-build-workflows.js +20 -0
  37. package/install/templates/_byan/mcp/byan-mcp-server/bin/byan-lint-workflows.js +57 -0
  38. package/install/templates/_byan/mcp/byan-mcp-server/lib/native-loop.js +39 -0
  39. package/install/templates/_byan/mcp/byan-mcp-server/lib/workflows-generator.js +149 -0
  40. package/install/templates/_byan/mcp/byan-mcp-server/lib/workflows-lint.js +113 -0
  41. package/install/templates/_byan/workflow/simple/byan/feature-workflow.md +14 -11
  42. package/install/templates/docs/native-workflows-contract.md +84 -0
  43. package/package.json +2 -2
  44. package/src/byan-v2/data/agent-scopes.json +46 -0
  45. package/src/byan-v2/data/mantras.json +194 -8
  46. package/src/byan-v2/generation/mantra-audit.js +147 -0
  47. package/src/byan-v2/generation/mantra-validator.js +56 -6
  48. package/src/byan-v2/generation/scope-resolver.js +102 -0
  49. package/src/byan-v2/generation/templates/default-agent.md +1 -1
@@ -0,0 +1,287 @@
1
+ export const meta = {
2
+ name: 'testarch-atdd',
3
+ description: 'Native port of the BYAN testarch-atdd workflow (create mode): generate FAILING acceptance tests before implementation (TDD red phase). Preflight+context, generation mode, test strategy, parallel API+E2E failing-test generation, aggregate+verify red-phase compliance, then return a structured verdict for the orchestrating skill to present at the human gate.',
4
+ phases: [
5
+ { title: 'PREFLIGHT', detail: 'verify prerequisites and load story, framework, and knowledge base' },
6
+ { title: 'MODE', detail: 'choose AI generation vs recording mode' },
7
+ { title: 'STRATEGY', detail: 'map acceptance criteria to test levels and P0-P3 priorities' },
8
+ { title: 'GENERATE', detail: 'parallel fan-out: FAILING API tests and FAILING E2E tests (red phase)' },
9
+ { title: 'AGGREGATE', detail: 'aggregate subprocess outputs, verify TDD red-phase compliance, build infra + checklist' },
10
+ { title: 'VALIDATE', detail: 'validate against checklist and return a completion verdict' },
11
+ ],
12
+ }
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // FD / STRICT STATE CONTRACT (re-asserted inline — enforcement-bridge).
16
+ //
17
+ // The in-CLI Workflow tool runs this script OUTSIDE the conversation turn, so
18
+ // BYAN's main-thread hooks (fd-phase-guard, strict-scope-guard, strict-stop-
19
+ // guard, mantra-validate) DO NOT fire here. This script therefore:
20
+ // - NEVER imports/requires _byan/.../lib/fd-state.js and NEVER writes
21
+ // fd-state.json directly (enforced by byan-lint-workflows.js).
22
+ // - uses NO wall-clock and NO randomness primitive (wall-clock / wall-clock /
23
+ // RNG break resume); any timestamp/id arrives via `args`.
24
+ // - returns DATA only. The orchestrating skill is the human-gated conductor;
25
+ // IT records FD/strict state via the byan_fd_* / byan_strict_* MCP tools
26
+ // AT the gate. The ATDD test files + checklist on disk are the workflow's
27
+ // product (written by the generate/aggregate leaves), not platform state.
28
+ // ---------------------------------------------------------------------------
29
+
30
+ // Mirrors source step-04 "Prepare Subprocess Inputs": the source builds a
31
+ // timestamp via `an injected timestamp` for /tmp file naming. The sandbox
32
+ // forbids wall-clock, so the id is passed in via args (orchestrator-supplied)
33
+ // with a deterministic fallback. This keeps temp-file naming reproducible.
34
+ const story = (args && args.story) || 'next approved story with clear acceptance criteria'
35
+ const runId = (args && args.runId) || 'atdd-run'
36
+
37
+ // Source step-04c "Verify TDD Red Phase Compliance": every generated test MUST
38
+ // carry test.skip(), assert EXPECTED behavior (no expect(true).toBe(true)
39
+ // placeholders), and be flagged expected_to_fail. This schema turns that prose
40
+ // gate into a structured, validated subprocess contract.
41
+ const GEN_SCHEMA = {
42
+ type: 'object',
43
+ required: ['success', 'tdd_phase', 'tests', 'test_count'],
44
+ properties: {
45
+ success: { type: 'boolean' },
46
+ subprocess: { type: 'string' },
47
+ tdd_phase: { type: 'string', description: "must be 'RED' for ATDD" },
48
+ tests: {
49
+ type: 'array',
50
+ items: {
51
+ type: 'object',
52
+ required: ['file', 'uses_test_skip', 'expected_to_fail', 'placeholder_assertions'],
53
+ properties: {
54
+ file: { type: 'string' },
55
+ uses_test_skip: { type: 'boolean', description: 'true if every test in the file uses test.skip()' },
56
+ expected_to_fail: { type: 'boolean' },
57
+ placeholder_assertions: { type: 'boolean', description: 'true if any expect(true).toBe(true) placeholders exist (must be false)' },
58
+ acceptance_criteria_covered: { type: 'array', items: { type: 'string' } },
59
+ },
60
+ },
61
+ },
62
+ fixture_needs: { type: 'array', items: { type: 'string' } },
63
+ test_count: { type: 'integer' },
64
+ summary: { type: 'string' },
65
+ },
66
+ }
67
+
68
+ // Source step-04c red-phase gate, applied to a subprocess result object.
69
+ function redPhaseCompliant(out) {
70
+ if (!out || out.success !== true) return false
71
+ if (out.tdd_phase !== 'RED') return false
72
+ const tests = (out && out.tests) || []
73
+ if (tests.length === 0) return false
74
+ return tests.every((t) => t && t.uses_test_skip === true && t.expected_to_fail === true && t.placeholder_assertions === false)
75
+ }
76
+
77
+ // === Step 1: Preflight & Context Loading (steps-c/step-01) ===
78
+ phase('PREFLIGHT')
79
+ const preflight = await agent(
80
+ `You are the Master Test Architect running the ATDD workflow (create mode), step 1 (preflight & context). ` +
81
+ `Read the real source step file _byan/workflow/simple/testarch/atdd/steps-c/step-01-preflight-and-context.md. ` +
82
+ `Target story: ${JSON.stringify(story)}.\n` +
83
+ `1) Verify HARD prerequisites: story approved with clear, testable acceptance criteria; a test framework is configured ` +
84
+ `(playwright.config.ts or cypress.config.ts); a dev environment is available. If any is missing, HALT: set proceed=false and list what is missing.\n` +
85
+ `2) Load story context: read the story markdown, extract acceptance criteria + constraints, identify affected components and integrations.\n` +
86
+ `3) Load framework + existing patterns: inspect the tests/ dir for fixtures/helpers; read TEA config flags tea_use_playwright_utils and tea_use_mcp_enhancements.\n` +
87
+ `4) Load knowledge-base fragments (data-factories, component-tdd, test-quality, test-healing-patterns, selector-resilience, timing-debugging; plus playwright-utils OR traditional fixture-architecture/network-first per the utils flag).\n` +
88
+ `Report a concise summary of loaded inputs.`,
89
+ {
90
+ label: 'preflight',
91
+ phase: 'PREFLIGHT',
92
+ schema: {
93
+ type: 'object',
94
+ required: ['proceed'],
95
+ properties: {
96
+ proceed: { type: 'boolean', description: 'false if any hard prerequisite is missing (HALT)' },
97
+ missing: { type: 'array', items: { type: 'string' } },
98
+ acceptance_criteria: { type: 'array', items: { type: 'string' } },
99
+ framework: { type: 'string', description: 'playwright | cypress | unknown' },
100
+ use_playwright_utils: { type: 'boolean' },
101
+ use_mcp_enhancements: { type: 'boolean' },
102
+ notes: { type: 'string' },
103
+ },
104
+ }
105
+ }
106
+ )
107
+
108
+ // Hard prerequisite gate (source step-01: "If any are missing: HALT").
109
+ if (!preflight.proceed) {
110
+ return {
111
+ workflow: 'testarch-atdd',
112
+ story,
113
+ status: 'halted-prerequisites',
114
+ summary: 'Preflight HALT: hard prerequisites missing (acceptance criteria / framework / env).',
115
+ missing: (preflight && preflight.missing) || [],
116
+ steps: 1,
117
+ needsHumanGate: true,
118
+ }
119
+ }
120
+
121
+ // === Step 2: Generation Mode Selection (steps-c/step-02) ===
122
+ phase('MODE')
123
+ const mode = await agent(
124
+ `ATDD step 2 (generation mode). Read _byan/workflow/simple/testarch/atdd/steps-c/step-02-generation-mode.md. ` +
125
+ `Context: framework=${(preflight && preflight.framework) || 'unknown'}, use_mcp_enhancements=${Boolean(preflight && preflight.use_mcp_enhancements)}.\n` +
126
+ `Default to AI generation when acceptance criteria are clear and scenarios are standard (CRUD/auth/API/navigation). ` +
127
+ `Choose recording mode ONLY for complex UI (drag/drop, multi-step wizards) when tea_use_mcp_enhancements is true AND Playwright MCP tools are available. ` +
128
+ `State the chosen mode and why.`,
129
+ {
130
+ label: 'generation-mode',
131
+ phase: 'MODE',
132
+ schema: {
133
+ type: 'object',
134
+ required: ['mode'],
135
+ properties: {
136
+ mode: { type: 'string', description: 'ai-generation | recording' },
137
+ rationale: { type: 'string' },
138
+ },
139
+ }
140
+ }
141
+ )
142
+
143
+ // === Step 3: Test Strategy (steps-c/step-03) ===
144
+ phase('STRATEGY')
145
+ const strategy = await agent(
146
+ `ATDD step 3 (test strategy). Read _byan/workflow/simple/testarch/atdd/steps-c/step-03-test-strategy.md. ` +
147
+ `Acceptance criteria: ${JSON.stringify((preflight && preflight.acceptance_criteria) || [])}.\n` +
148
+ `1) Convert each acceptance criterion into test scenarios, including negative/edge cases where risk is high. ` +
149
+ `2) Select the best level per scenario: E2E for critical journeys, API for business logic/service contracts, Component for UI behavior; avoid duplicate coverage across levels. ` +
150
+ `3) Assign P0-P3 priorities by risk + business impact. ` +
151
+ `4) Confirm every test is designed to FAIL before implementation (TDD red phase). ` +
152
+ `Output the prioritized scenario plan split into apiScenarios and e2eScenarios.`,
153
+ {
154
+ label: 'test-strategy',
155
+ phase: 'STRATEGY',
156
+ schema: {
157
+ type: 'object',
158
+ required: ['apiScenarios', 'e2eScenarios'],
159
+ properties: {
160
+ apiScenarios: { type: 'array', items: { type: 'string' } },
161
+ e2eScenarios: { type: 'array', items: { type: 'string' } },
162
+ redPhaseConfirmed: { type: 'boolean' },
163
+ notes: { type: 'string' },
164
+ },
165
+ }
166
+ }
167
+ )
168
+
169
+ // === Step 4: Orchestrate Parallel FAILING Test Generation (steps-c/step-04 + 04a + 04b) ===
170
+ // Source mandates TWO subprocesses launched in PARALLEL and waits for BOTH:
171
+ // 04a -> FAILING API tests, 04b -> FAILING E2E tests. This is a genuine
172
+ // fan-out over two independent generators, so parallel() mirrors it exactly.
173
+ phase('GENERATE')
174
+ const [apiOut, e2eOut] = await parallel([
175
+ () =>
176
+ agent(
177
+ `ATDD subprocess 4A (FAILING API tests, TDD RED phase). Read _byan/workflow/simple/testarch/atdd/steps-c/step-04a-subprocess-api-failing.md. ` +
178
+ `Temp output id: ${JSON.stringify(runId)}.\n` +
179
+ `From the API scenarios ${JSON.stringify((strategy && strategy.apiScenarios) || [])}, generate FAILING API test files under tests/api/. ` +
180
+ `Each test MUST use test.skip() (intentional red phase), assert EXPECTED request/response contracts + status codes + error cases (NO expect(true).toBe(true) placeholders), ` +
181
+ `use realistic data via factories, and carry priority tags [P0]-[P3]. Track fixture needs (do not build them yet). Do NOT generate E2E tests. Do NOT run tests.`,
182
+ { label: 'gen-api-red', phase: 'GENERATE', schema: GEN_SCHEMA }
183
+ ),
184
+ () =>
185
+ agent(
186
+ `ATDD subprocess 4B (FAILING E2E tests, TDD RED phase). Read _byan/workflow/simple/testarch/atdd/steps-c/step-04b-subprocess-e2e-failing.md. ` +
187
+ `Temp output id: ${JSON.stringify(runId)}.\n` +
188
+ `From the E2E scenarios ${JSON.stringify((strategy && strategy.e2eScenarios) || [])}, generate FAILING E2E test files under tests/e2e/. ` +
189
+ `Each test MUST use test.skip() (intentional red phase), assert EXPECTED UI behavior with resilient selectors (getByRole/getByText/getByLabel), follow network-first, use deterministic waits (no hard sleeps), ` +
190
+ `cover complete user journeys, and carry priority tags [P0]-[P3]. Track fixture needs (do not build them yet). Do NOT generate API tests. Do NOT run tests.`,
191
+ { label: 'gen-e2e-red', phase: 'GENERATE', schema: GEN_SCHEMA }
192
+ ),
193
+ ])
194
+
195
+ // === Step 4C: Aggregate + verify TDD red-phase compliance (steps-c/step-04c) ===
196
+ phase('AGGREGATE')
197
+ // Source step-04c: if either subprocess failed OR red-phase compliance fails,
198
+ // "report error and stop (don't proceed)". Surface the gap as a verdict; the
199
+ // orchestrating skill decides at the human gate (gap is not a silent cut).
200
+ const apiOk = redPhaseCompliant(apiOut)
201
+ const e2eOk = redPhaseCompliant(e2eOut)
202
+ log(`red-phase compliance: api=${apiOk} e2e=${e2eOk}`)
203
+
204
+ if (!apiOk || !e2eOk) {
205
+ const blocking = []
206
+ if (!apiOk) blocking.push('API subprocess failed red-phase compliance (success/RED/test.skip/expected_to_fail/no-placeholder)')
207
+ if (!e2eOk) blocking.push('E2E subprocess failed red-phase compliance (success/RED/test.skip/expected_to_fail/no-placeholder)')
208
+ return {
209
+ workflow: 'testarch-atdd',
210
+ story,
211
+ status: 'red-phase-violation',
212
+ summary: 'Aggregation stopped: at least one subprocess did not produce compliant FAILING tests.',
213
+ blocking,
214
+ apiTestCount: (apiOut && apiOut.test_count) || 0,
215
+ e2eTestCount: (e2eOut && e2eOut.test_count) || 0,
216
+ steps: 5,
217
+ needsHumanGate: true,
218
+ }
219
+ }
220
+
221
+ const aggregate = await agent(
222
+ `ATDD step 4C (aggregate). Read _byan/workflow/simple/testarch/atdd/steps-c/step-04c-aggregate.md. ` +
223
+ `Both subprocesses are red-phase compliant. API result: ${JSON.stringify(apiOut)}. E2E result: ${JSON.stringify(e2eOut)}.\n` +
224
+ `1) Write all generated test files to disk (tests/api/*, tests/e2e/*) keeping every test.skip(). ` +
225
+ `2) Aggregate + dedupe fixture needs and create the minimal red-phase fixture infrastructure (e.g. tests/fixtures/test-data.ts). ` +
226
+ `3) Generate the ATDD checklist (story summary, AC coverage, RED-phase status, GREEN-phase next steps: remove test.skip() -> run -> verify pass, implementation guidance for endpoints + UI flows) and save it under the configured output folder as atdd-checklist-<story-id>.md. ` +
227
+ `Do NOT remove any test.skip() and do NOT run the tests yet. Report counts and written paths.`,
228
+ {
229
+ label: 'aggregate',
230
+ phase: 'AGGREGATE',
231
+ schema: {
232
+ type: 'object',
233
+ required: ['filesWritten', 'totalTests', 'checklistPath'],
234
+ properties: {
235
+ filesWritten: { type: 'array', items: { type: 'string' } },
236
+ totalTests: { type: 'integer' },
237
+ apiTests: { type: 'integer' },
238
+ e2eTests: { type: 'integer' },
239
+ fixturesCreated: { type: 'integer' },
240
+ checklistPath: { type: 'string' },
241
+ acceptanceCriteriaCovered: { type: 'array', items: { type: 'string' } },
242
+ notes: { type: 'string' },
243
+ },
244
+ }
245
+ }
246
+ )
247
+
248
+ // === Step 5: Validate & Complete (steps-c/step-05) ===
249
+ phase('VALIDATE')
250
+ const validation = await agent(
251
+ `ATDD step 5 (validate & complete). Read _byan/workflow/simple/testarch/atdd/steps-c/step-05-validate-and-complete.md and the workflow checklist.md. ` +
252
+ `Aggregation result: ${JSON.stringify(aggregate)}.\n` +
253
+ `Validate against the checklist: prerequisites satisfied; test files created correctly; checklist matches the acceptance criteria; every test is designed to FAIL before implementation (RED phase). ` +
254
+ `List any gaps that still need fixing. Then give a completion summary: test files created, checklist output path, key risks/assumptions, and the next recommended workflow (implementation, then 'automate' for the green phase).`,
255
+ {
256
+ label: 'validate-complete',
257
+ phase: 'VALIDATE',
258
+ schema: {
259
+ type: 'object',
260
+ required: ['valid'],
261
+ properties: {
262
+ valid: { type: 'boolean', description: 'true only if all checklist completion criteria are satisfied' },
263
+ gaps: { type: 'array', items: { type: 'string' } },
264
+ nextWorkflow: { type: 'string' },
265
+ summary: { type: 'string' },
266
+ },
267
+ }
268
+ }
269
+ )
270
+
271
+ // Return DATA only. The orchestrating skill presents this at the human gate
272
+ // and records FD/strict state via MCP. No platform state is written here.
273
+ return {
274
+ workflow: 'testarch-atdd',
275
+ story,
276
+ status: validation.valid ? 'red-phase-ready' : 'gaps-found',
277
+ mode: (mode && mode.mode) || 'ai-generation',
278
+ totalTests: (aggregate && aggregate.totalTests) || 0,
279
+ apiTests: (aggregate && aggregate.apiTests) || (apiOut && apiOut.test_count) || 0,
280
+ e2eTests: (aggregate && aggregate.e2eTests) || (e2eOut && e2eOut.test_count) || 0,
281
+ checklistPath: (aggregate && aggregate.checklistPath) || '',
282
+ gaps: (validation && validation.gaps) || [],
283
+ nextWorkflow: (validation && validation.nextWorkflow) || 'automate',
284
+ steps: 6,
285
+ needsHumanGate: true,
286
+ result: validation,
287
+ }
@@ -0,0 +1,229 @@
1
+ export const meta = {
2
+ name: 'testarch-automate',
3
+ description: 'Native port of the BYAN testarch-automate workflow: expand test automation coverage after implementation (or analyze an existing codebase) by preflighting the framework, identifying targets and levels, generating API + E2E tests in parallel, aggregating into files and fixtures, and validating into an automation summary. Returns a structured verdict for the orchestrating skill to present at the human gate.',
4
+ phases: [
5
+ { title: 'PREFLIGHT', detail: 'verify framework, determine mode (BMad-integrated vs standalone), load context + TEA knowledge fragments' },
6
+ { title: 'TARGETS', detail: 'identify automation targets, choose test levels (E2E/API/Component/Unit), assign P0-P3 priorities, produce coverage plan' },
7
+ { title: 'GENERATE', detail: 'parallel fan-out: subprocess A generates API tests, subprocess B generates E2E tests' },
8
+ { title: 'AGGREGATE', detail: 'aggregate subprocess outputs, write test files + shared fixtures/factories/helpers, compute summary stats' },
9
+ { title: 'VALIDATE', detail: 'validate against the checklist and produce the automation summary; return the verdict' },
10
+ ],
11
+ }
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // FD / STRICT STATE CONTRACT (re-asserted inline — enforcement-bridge).
15
+ //
16
+ // The in-CLI Workflow tool runs this script OUTSIDE the conversation turn, so
17
+ // BYAN's main-thread hooks (fd-phase-guard, strict-scope-guard, strict-stop-
18
+ // guard, mantra-validate) DO NOT fire here. This script therefore:
19
+ // - NEVER imports/requires _byan/.../lib/fd-state.js and NEVER writes
20
+ // fd-state.json directly (forbidden by byan-lint-workflows.js).
21
+ // - uses NO wall-clock (wall-clock / wall-clock) and NO randomness (RNG
22
+ // / crypto). The source step-03 derives a temp-file timestamp from
23
+ // a wall-clock read; the sandbox forbids that (it breaks resume), so any id/ts is
24
+ // passed in via `args` instead (args.runId).
25
+ // - returns DATA only. The orchestrating skill is the human-gated conductor;
26
+ // IT records FD/strict state via byan_fd_* / byan_strict_* MCP tools AT the
27
+ // gate. The generated tests + automation summary are the workflow product
28
+ // (written by the leaves), not BYAN platform state.
29
+ // ---------------------------------------------------------------------------
30
+
31
+ // Inputs mirror workflow.yaml variables. The source is autonomous
32
+ // (execution_hints.autonomous=true, interactive=false): it proceeds without
33
+ // prompts unless blocked, so this is a linear step sequence (one agent per
34
+ // real source step), not a retry loop.
35
+ const sourceDir = (args && args.sourceDir) || '{project-root}'
36
+ const testDir = (args && args.testDir) || '{project-root}/tests'
37
+ const coverageTarget = (args && args.coverageTarget) || 'critical-paths'
38
+ const targetFeature = (args && args.targetFeature) || null // optional: focus a feature/files
39
+ const runId = (args && args.runId) || 'run' // ts/id passed in (no clock in sandbox)
40
+
41
+ // Step 1 of the source HALTs if no test framework is configured. That gate is a
42
+ // hard precondition, not a human decision, so the script surfaces it as a
43
+ // verdict field rather than continuing to invent tests against a missing config.
44
+ const PREFLIGHT_SCHEMA = {
45
+ type: 'object',
46
+ required: ['frameworkReady', 'mode'],
47
+ properties: {
48
+ frameworkReady: { type: 'boolean', description: 'true only if playwright.config.ts or cypress.config.ts exists AND package.json has the test deps' },
49
+ framework: { type: 'string', description: 'detected framework (playwright | cypress | none)' },
50
+ mode: { type: 'string', description: 'bmad-integrated (story/tech-spec/test-design found) or standalone (source only)' },
51
+ usePlaywrightUtils: { type: 'boolean', description: 'value of tea_use_playwright_utils from config' },
52
+ knowledgeFragments: { type: 'array', items: { type: 'string' }, description: 'TEA knowledge fragments loaded' },
53
+ notes: { type: 'string' },
54
+ },
55
+ }
56
+
57
+ const PLAN_SCHEMA = {
58
+ type: 'object',
59
+ required: ['targets', 'levels'],
60
+ properties: {
61
+ targets: { type: 'array', items: { type: 'string' }, description: 'features/files to test' },
62
+ levels: { type: 'array', items: { type: 'string' }, description: 'selected test levels: e2e, api, component, unit' },
63
+ priorities: { type: 'object', description: 'priority assignment summary (P0-P3)' },
64
+ justification: { type: 'string', description: 'why this coverage scope (critical-paths/comprehensive/selective)' },
65
+ },
66
+ }
67
+
68
+ const SUBPROCESS_SCHEMA = {
69
+ type: 'object',
70
+ required: ['success', 'subprocess', 'testCount'],
71
+ properties: {
72
+ success: { type: 'boolean' },
73
+ subprocess: { type: 'string', description: 'api-tests | e2e-tests' },
74
+ files: { type: 'array', items: { type: 'string' }, description: 'test file paths produced' },
75
+ testCount: { type: 'integer' },
76
+ fixtureNeeds: { type: 'array', items: { type: 'string' } },
77
+ summary: { type: 'string' },
78
+ },
79
+ }
80
+
81
+ const VALIDATE_SCHEMA = {
82
+ type: 'object',
83
+ required: ['passed'],
84
+ properties: {
85
+ passed: { type: 'boolean', description: 'true only if all checklist completion criteria are met' },
86
+ gaps: { type: 'array', items: { type: 'string' }, description: 'unmet checklist items' },
87
+ summaryPath: { type: 'string', description: 'path to the written automation summary' },
88
+ nextWorkflow: { type: 'string', description: 'recommended follow-up workflow (test-review or trace)' },
89
+ },
90
+ }
91
+
92
+ // --- Step 1: Preflight & Context Loading (steps-c/step-01) -----------------
93
+ phase('PREFLIGHT')
94
+ const preflight = await agent(
95
+ `You are the Master Test Architect running testarch-automate (autonomous mode: proceed without prompts unless blocked).\n` +
96
+ `STEP 1 - Preflight & Context. Source dir: ${JSON.stringify(sourceDir)}; test dir: ${JSON.stringify(testDir)}.\n` +
97
+ `1. Verify a test framework exists (playwright.config.ts or cypress.config.ts AND test deps in package.json). ` +
98
+ `If the framework is completely missing, set frameworkReady=false (the source HALTs here with "Run the framework workflow first") and do NOT fabricate tests.\n` +
99
+ `2. Determine execution mode: bmad-integrated if a story / tech-spec / test-design artifact is present, else standalone (source code only).\n` +
100
+ `3. Load context: framework config, existing test structure under the test dir, existing tests (for coverage gaps). Read tea_use_playwright_utils from config.\n` +
101
+ `4. Load the required TEA knowledge fragments (test-levels-framework, test-priorities-matrix, data-factories, selective-testing, ci-burn-in, test-quality; plus playwright-utils OR traditional fixture/network-first fragments per config).\n` +
102
+ `Report the detected framework, mode, playwright-utils flag, and the fragments loaded.`,
103
+ { label: 'preflight', phase: 'PREFLIGHT', schema: PREFLIGHT_SCHEMA }
104
+ )
105
+ log(`preflight: frameworkReady=${Boolean(preflight && preflight.frameworkReady)} mode=${preflight && preflight.mode}`)
106
+
107
+ // Hard precondition gate (mirrors step-01 HALT). Return early as a verdict;
108
+ // the human-gated skill decides whether to run the framework workflow first.
109
+ if (!preflight || !preflight.frameworkReady) {
110
+ return {
111
+ workflow: 'testarch-automate',
112
+ status: 'halted-no-framework',
113
+ summary: 'No test framework configured (playwright.config.ts / cypress.config.ts missing). Run the framework workflow first.',
114
+ steps: 1,
115
+ preflight,
116
+ needsHumanGate: true,
117
+ }
118
+ }
119
+
120
+ // --- Step 2: Identify Automation Targets (steps-c/step-02) -----------------
121
+ phase('TARGETS')
122
+ const plan = await agent(
123
+ `STEP 2 - Identify automation targets and build the coverage plan. Mode: ${preflight.mode}. ` +
124
+ `Coverage target: ${JSON.stringify(coverageTarget)}. Focus: ${targetFeature ? JSON.stringify(targetFeature) : 'auto-discover features in the source dir'}.\n` +
125
+ `1. Determine targets: bmad-integrated -> map acceptance criteria to scenarios, check existing ATDD outputs to avoid duplication, expand with edge/negative paths. ` +
126
+ `standalone -> focus the given feature/files if provided, else auto-discover; prioritize critical paths, integrations, untested logic.\n` +
127
+ `2. Choose test levels per test-levels-framework: E2E for critical journeys, API for business logic/contracts, Component for UI behavior, Unit for pure logic/edge cases. Avoid duplicate coverage across levels.\n` +
128
+ `3. Assign priorities per test-priorities-matrix (P0 critical+high-risk, P1 important+medium/high-risk, P2 secondary+edge, P3 optional).\n` +
129
+ `4. Produce a concise coverage plan: targets by level, priority assignments, and justification for the ${coverageTarget} scope.`,
130
+ { label: 'coverage-plan', phase: 'TARGETS', schema: PLAN_SCHEMA }
131
+ )
132
+ log(`plan: targets=${(plan && plan.targets && plan.targets.length) || 0} levels=${(plan && plan.levels && plan.levels.join(',')) || ''}`)
133
+
134
+ // --- Step 3: Orchestrate Parallel Test Generation (steps-c/step-03) --------
135
+ // The source mandates a PARALLEL fan-out: subprocess 3A (API) and 3B (E2E)
136
+ // run simultaneously, and step-03c waits for BOTH before aggregating. parallel()
137
+ // is the faithful native shape (NOT sequential — sequential is a SYSTEM FAILURE
138
+ // per the source's own rules).
139
+ phase('GENERATE')
140
+ const planJson = JSON.stringify(plan)
141
+ const [apiGen, e2eGen] = await parallel([
142
+ // Subprocess A: API tests ONLY (steps-c/step-03a-subprocess-api).
143
+ () =>
144
+ agent(
145
+ `SUBPROCESS A (run ${runId}) - generate API tests ONLY (no E2E, no fixtures yet, do not run tests).\n` +
146
+ `Coverage plan: ${planJson}. Playwright-utils enabled: ${Boolean(preflight.usePlaywrightUtils)}.\n` +
147
+ `From the plan, identify API endpoints, request/response shapes, auth needs, error scenarios. ` +
148
+ `For each, create tests/api/[feature].spec.ts: use apiRequest() if playwright-utils enabled (else the request fixture), ` +
149
+ `data factories for test data, priority tags [P0]-[P3], happy-path AND error scenarios, proper TS types, deterministic assertions. ` +
150
+ `Track (do NOT create) the fixture needs for the aggregation step. Report the files, test count, and fixture needs.`,
151
+ { label: 'gen-api', phase: 'GENERATE', schema: SUBPROCESS_SCHEMA }
152
+ ),
153
+ // Subprocess B: E2E tests ONLY (steps-c/step-03b-subprocess-e2e).
154
+ () =>
155
+ agent(
156
+ `SUBPROCESS B (run ${runId}) - generate E2E tests ONLY (no API, no fixtures yet, do not run tests).\n` +
157
+ `Coverage plan: ${planJson}. Playwright-utils enabled: ${Boolean(preflight.usePlaywrightUtils)}.\n` +
158
+ `From the plan, identify the critical user journeys. ` +
159
+ `For each, create tests/e2e/[feature].spec.ts: fixture-architecture patterns, network-first (intercept BEFORE navigate), ` +
160
+ `resilient selectors (getByRole/getByText/getByLabel), priority tags [P0]-[P3], complete journeys (not isolated clicks), ` +
161
+ `proper TS types, deterministic waits (no hard sleeps). ` +
162
+ `Track (do NOT create) the fixture needs for aggregation. Report the files, test count, and fixture needs.`,
163
+ { label: 'gen-e2e', phase: 'GENERATE', schema: SUBPROCESS_SCHEMA }
164
+ ),
165
+ ])
166
+ log(
167
+ `generate: api.success=${Boolean(apiGen && apiGen.success)} (${(apiGen && apiGen.testCount) || 0}) ` +
168
+ `e2e.success=${Boolean(e2eGen && e2eGen.success)} (${(e2eGen && e2eGen.testCount) || 0})`
169
+ )
170
+
171
+ // Mirror of step-03 / step-03c exit guard: do NOT aggregate if either
172
+ // subprocess failed. This is a precondition, not a human decision.
173
+ const bothSucceeded = Boolean(apiGen && apiGen.success) && Boolean(e2eGen && e2eGen.success)
174
+ if (!bothSucceeded) {
175
+ return {
176
+ workflow: 'testarch-automate',
177
+ status: 'failed-generation',
178
+ summary: 'One or both test-generation subprocesses failed; aggregation skipped (mirrors step-03 exit condition).',
179
+ steps: 3,
180
+ preflight,
181
+ plan,
182
+ generation: { api: apiGen, e2e: e2eGen },
183
+ needsHumanGate: true,
184
+ }
185
+ }
186
+
187
+ // --- Step 3C: Aggregate (steps-c/step-03c-aggregate) -----------------------
188
+ phase('AGGREGATE')
189
+ const aggregate = await agent(
190
+ `STEP 3C - Aggregate the two subprocess outputs and complete the test infrastructure (do NOT regenerate tests, do NOT run them).\n` +
191
+ `API subprocess output: ${JSON.stringify(apiGen)}\nE2E subprocess output: ${JSON.stringify(e2eGen)}\n` +
192
+ `1. Write all API and E2E test files to disk.\n` +
193
+ `2. Aggregate the fixture needs from both subprocesses (de-duplicate), categorize them (auth fixtures, data factories, network mocks, helpers).\n` +
194
+ `3. Generate the shared fixture infrastructure: tests/fixtures/auth.ts (test.extend with auto-cleanup), tests/fixtures/data-factories.ts (faker-based, override-able), tests/fixtures/network-mocks.ts, tests/fixtures/helpers.ts.\n` +
195
+ `4. Compute summary statistics: total/api/e2e test counts, fixtures created, per-priority breakdown (P0-P3), knowledge fragments used.\n` +
196
+ `Report the written files and the computed statistics.`,
197
+ { label: 'aggregate', phase: 'AGGREGATE' }
198
+ )
199
+
200
+ // --- Step 4: Validate & Summarize (steps-c/step-04-validate-and-summarize) -
201
+ phase('VALIDATE')
202
+ const validate = await agent(
203
+ `STEP 4 - Validate the generated outputs against the automate checklist and produce the automation summary.\n` +
204
+ `Aggregation result: ${aggregate}\n` +
205
+ `1. Validate per checklist.md: framework readiness, coverage mapping, test quality/structure, fixtures/factories/helpers, no duplicate coverage, priority tags present, network-first applied, deterministic (no hard waits). Fix gaps before completing; report any that remain.\n` +
206
+ `2. Write the automation summary to the output folder (automation-summary.md): coverage plan by level and priority, files created/updated, key assumptions and risks, and the recommended next workflow (test-review or trace).\n` +
207
+ `Set passed=true only if every completion criterion is satisfied.`,
208
+ { label: 'validate-summarize', phase: 'VALIDATE', schema: VALIDATE_SCHEMA }
209
+ )
210
+ log(`validate: passed=${Boolean(validate && validate.passed)} gaps=${(validate && validate.gaps && validate.gaps.length) || 0}`)
211
+
212
+ // Return DATA only. The orchestrating skill presents this at the human gate
213
+ // and records FD/strict state via MCP.
214
+ return {
215
+ workflow: 'testarch-automate',
216
+ status: validate && validate.passed ? 'complete' : 'needs-fixes',
217
+ summary:
218
+ `Generated ${(apiGen.testCount || 0) + (e2eGen.testCount || 0)} tests ` +
219
+ `(${apiGen.testCount || 0} API, ${e2eGen.testCount || 0} E2E) for ${coverageTarget} coverage in ${preflight.mode} mode.`,
220
+ steps: 5,
221
+ mode: preflight.mode,
222
+ coverageTarget,
223
+ preflight,
224
+ plan,
225
+ generation: { api: apiGen, e2e: e2eGen },
226
+ aggregation: aggregate,
227
+ validation: validate,
228
+ needsHumanGate: true,
229
+ }