npm - create-byan-agent - Versions diffs - 2.19.1 → 2.20.0 - Mend

create-byan-agent 2.19.1 → 2.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/install/templates/.claude/workflows/testarch-test-review.js ADDED Viewed

@@ -0,0 +1,321 @@
+export const meta = {
+  name: 'testarch-test-review',
+  description: 'Autonomous TEA test-quality review: load knowledge, discover tests, fan out 5 parallel quality-dimension checks, aggregate a weighted 0-100 score, and produce a report. Returns a verdict for the human gate.',
+  phases: [
+    { title: 'LOAD-CONTEXT' },
+    { title: 'DISCOVER-TESTS' },
+    { title: 'QUALITY-EVALUATION' },
+    { title: 'AGGREGATE-SCORES' },
+    { title: 'GENERATE-REPORT' }
+  ]
+}
+// FD/STRICT CONTRACT (re-asserted): this script returns data only. It never
+// imports lib/fd-state.js, never writes fd-state.json, and uses no wall-clock
+// or randomness primitive (those break resume in the Workflow sandbox). Any
+// timestamp/id is passed in via args. The orchestrating skill records FD /
+// strict state via MCP at the human gate that lives OUTSIDE this script.
+// The byan-lint-workflows linter forbids fd-state coupling here.
+// --- Inputs (mirror workflow.yaml variables; all passed via args, no defaults that touch the clock) ---
+const testDir = (args && args.test_dir) || './tests'
+const reviewScope = (args && args.review_scope) || 'suite' // single | directory | suite
+const targetPath = (args && args.target_path) || testDir   // for scope=single/directory
+const usePlaywrightUtils = !!(args && args.tea_use_playwright_utils)
+const timestamp = (args && args.timestamp) || 'run' // passed in by orchestrator; never generated here
+const outputFile = (args && args.output_file) || `_byan-output/test-review-${timestamp}.md`
+const knowledgeIndex = (args && args.knowledge_index) || '_byan/connaissance/testarch/tea-index.csv'
+// Step 1: Load Context & Knowledge Base (steps-c/step-01-load-context.md)
+phase('LOAD-CONTEXT')
+const context = await agent(
+  `You are the Master Test Architect running step 01 (load-context) of the testarch-test-review workflow.
+Read the real source step: _byan/workflow/simple/testarch/test-review/steps-c/step-01-load-context.md.
+Do exactly what it says:
+1. Determine review scope = "${reviewScope}" (single = one file, directory = one folder, suite = whole repo). Target path: ${targetPath}.
+2. Load the TEA knowledge base. Read the index at ${knowledgeIndex} and check tea_use_playwright_utils (=${usePlaywrightUtils}) to pick the fragment set.
+   Core fragments: test-quality.md, data-factories.md, test-levels-framework.md, selective-testing.md, test-healing-patterns.md, selector-resilience.md, timing-debugging.md.
+   ${usePlaywrightUtils
+      ? 'Playwright Utils ENABLED -> also load: overview.md, api-request.md, network-recorder.md, auth-session.md, intercept-network-call.md, recurse.md, log.md, file-utils.md, burn-in.md, network-error-monitor.md, fixtures-composition.md.'
+      : 'Playwright Utils DISABLED -> also load: fixture-architecture.md, network-first.md, playwright-config.md, component-tdd.md, ci-burn-in.md.'}
+3. Gather context artifacts if present: story file (acceptance criteria), test-design doc (priorities), framework config.
+Summarize what was found.`,
+  {
+    label: 'load-context',
+    phase: 'LOAD-CONTEXT',
+    schema: {
+      type: 'object',
+      required: ['scope', 'knowledgeFragmentsLoaded'],
+      properties: {
+        scope: { type: 'string', enum: ['single', 'directory', 'suite'] },
+        knowledgeFragmentsLoaded: { type: 'array', items: { type: 'string' } },
+        playwrightUtils: { type: 'boolean' },
+        storyFile: { type: 'string' },
+        testDesignFile: { type: 'string' },
+        frameworkConfig: { type: 'string' },
+        summary: { type: 'string' }
+      }
+    }
+  }
+)
+log(`scope=${context.scope} fragments=${(context.knowledgeFragmentsLoaded || []).length}`)
+// Step 2: Discover & Parse Tests (steps-c/step-02-discover-tests.md)
+phase('DISCOVER-TESTS')
+const discovery = await agent(
+  `Run step 02 (discover-tests) of testarch-test-review.
+Read the real source step: _byan/workflow/simple/testarch/test-review/steps-c/step-02-discover-tests.md.
+1. Discover test files for scope "${context.scope}":
+   - single: use the provided file path (${targetPath})
+   - directory: glob test files under the selected folder (${targetPath})
+   - suite: glob all tests in the repo (root: ${testDir})
+   HALT (set halted=true, testFileCount=0) if no test files are found.
+2. Per file, parse metadata: file size & line count, detected framework, describe/test block counts,
+   test IDs and priority markers (P0/P1/P2/P3), imports/fixtures/factories/network interception,
+   waits/timeouts and control flow (if/try/catch).`,
+  {
+    label: 'discover-tests',
+    phase: 'DISCOVER-TESTS',
+    schema: {
+      type: 'object',
+      required: ['halted', 'testFileCount'],
+      properties: {
+        halted: { type: 'boolean' },
+        testFileCount: { type: 'integer' },
+        framework: { type: 'string' },
+        testFiles: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              file: { type: 'string' },
+              lineCount: { type: 'integer' },
+              describeBlocks: { type: 'integer' },
+              testBlocks: { type: 'integer' },
+              testIds: { type: 'array', items: { type: 'string' } }
+            }
+          }
+        },
+        summary: { type: 'string' }
+      }
+    }
+  }
+)
+log(`discovered ${discovery.testFileCount} test file(s), halted=${discovery.halted}`)
+// If discovery halted (no tests), short-circuit with a verdict — the human gate decides next move.
+if (discovery.halted || discovery.testFileCount === 0) {
+  return {
+    workflow: 'testarch-test-review',
+    summary: 'No tests found in scope — review halted at discovery (mirrors step-02 HALT condition).',
+    steps: 2,
+    halted: true,
+    scope: context.scope,
+    needsHumanGate: true,
+    result: { reason: 'no-tests-found', testFileCount: 0 }
+  }
+}
+// Step 3: Orchestrate 5 Parallel Quality-Dimension Subprocesses (steps-c/step-03 + 03a-03e)
+// True fan-out in the source ("Launch FIVE subprocesses in PARALLEL") -> parallel() of 5 thunks.
+// Each subprocess is read-only, checks ONE dimension, and returns a 0-100 score with severity-weighted
+// violations (HIGH 10 / MEDIUM 5 / LOW 2), except coverage which uses its own gap-based penalty.
+phase('QUALITY-EVALUATION')
+const dimensionSchema = {
+  type: 'object',
+  required: ['dimension', 'score', 'violations'],
+  properties: {
+    dimension: { type: 'string' },
+    score: { type: 'integer', minimum: 0, maximum: 100 },
+    grade: { type: 'string' },
+    violations: {
+      type: 'array',
+      items: {
+        type: 'object',
+        properties: {
+          file: { type: 'string' },
+          line: { type: 'integer' },
+          severity: { type: 'string', enum: ['HIGH', 'MEDIUM', 'LOW'] },
+          category: { type: 'string' },
+          description: { type: 'string' },
+          suggestion: { type: 'string' }
+        }
+      }
+    },
+    violationSummary: {
+      type: 'object',
+      properties: {
+        HIGH: { type: 'integer' },
+        MEDIUM: { type: 'integer' },
+        LOW: { type: 'integer' }
+      }
+    },
+    recommendations: { type: 'array', items: { type: 'string' } },
+    summary: { type: 'string' }
+  }
+}
+const filesForPrompt = JSON.stringify(discovery.testFiles || [])
+const [determinism, isolation, maintainability, coverage, performance] = await parallel([
+  // 3A — Determinism (steps-c/step-03a-subprocess-determinism.md)
+  () => agent(
+    `Subprocess 3A (DETERMINISM only). Read the real source: _byan/workflow/simple/testarch/test-review/steps-c/step-03a-subprocess-determinism.md.
+Read-only analysis of these test files: ${filesForPrompt}.
+Flag non-determinism: HIGH = RNG calls, wall-clock reads/a wall-clock read unmocked, setTimeout/setInterval without waits, unmocked external API calls, random-path FS ops, non-deterministic DB ordering;
+MEDIUM = waitForTimeout hard waits, flaky selectors, race conditions, test-order dependencies; LOW = shared state, unfixed timezone.
+Score = max(0, 100 - penalty) with HIGH=10, MEDIUM=5, LOW=2. Check determinism ONLY.`,
+    { label: 'subprocess-determinism', phase: 'QUALITY-EVALUATION', schema: dimensionSchema }
+  ),
+  // 3B — Isolation (steps-c/step-03b-subprocess-isolation.md)
+  () => agent(
+    `Subprocess 3B (ISOLATION only). Read the real source: _byan/workflow/simple/testarch/test-review/steps-c/step-03b-subprocess-isolation.md.
+Read-only analysis of these test files: ${filesForPrompt}.
+Flag isolation issues: HIGH = global state mutations, test-order dependencies, shared DB records without cleanup, beforeAll/afterAll side effects leaking;
+MEDIUM = missing cleanup, state-mutating shared fixtures, assumed execution order, env vars modified without restore; LOW = shared (non-mutated) data, missing describe grouping.
+Score = max(0, 100 - penalty) with HIGH=10, MEDIUM=5, LOW=2. Check isolation ONLY.`,
+    { label: 'subprocess-isolation', phase: 'QUALITY-EVALUATION', schema: dimensionSchema }
+  ),
+  // 3C — Maintainability (steps-c/step-03c-subprocess-maintainability.md)
+  () => agent(
+    `Subprocess 3C (MAINTAINABILITY only). Read the real source: _byan/workflow/simple/testarch/test-review/steps-c/step-03c-subprocess-maintainability.md.
+Read-only analysis of these test files: ${filesForPrompt}.
+Flag maintainability issues: HIGH = tests >100 lines, no describe grouping, duplicate/copy-paste logic, unclear names (no Given/When/Then), magic numbers/strings;
+MEDIUM = missing comments on complex logic, inconsistent naming, nesting >3 levels, large setup/teardown; LOW = minor style, helper-extraction opportunities, inconsistent assertion styles.
+Score = max(0, 100 - penalty) with HIGH=10, MEDIUM=5, LOW=2. Check maintainability ONLY.`,
+    { label: 'subprocess-maintainability', phase: 'QUALITY-EVALUATION', schema: dimensionSchema }
+  ),
+  // 3D — Coverage (steps-c/step-03d-subprocess-coverage.md) — distinct gap-based scoring
+  () => agent(
+    `Subprocess 3D (COVERAGE only). Read the real source: _byan/workflow/simple/testarch/test-review/steps-c/step-03d-subprocess-coverage.md.
+Read-only analysis of these test files: ${filesForPrompt}.
+Flag coverage gaps: HIGH = critical/P0 paths untested, API endpoints without tests, error handling untested, missing auth/authz tests;
+MEDIUM = uncovered edge cases (boundaries, null/empty), only happy path, missing integration layer, weak assertion coverage; LOW = additional cases, minor edges, incomplete docs.
+Scoring is DISTINCT here: if any HIGH gap exists, score = max(0, 50 - highCount*10); else score = max(0, 100 - totalViolations*5). Check coverage ONLY.`,
+    { label: 'subprocess-coverage', phase: 'QUALITY-EVALUATION', schema: dimensionSchema }
+  ),
+  // 3E — Performance (steps-c/step-03e-subprocess-performance.md)
+  () => agent(
+    `Subprocess 3E (PERFORMANCE only). Read the real source: _byan/workflow/simple/testarch/test-review/steps-c/step-03e-subprocess-performance.md.
+Read-only analysis of these test files: ${filesForPrompt}.
+Flag performance issues: HIGH = unnecessary test.describe.serial (not parallelizable), slow setup/teardown (fresh DB per test), excessive navigation, no fixture reuse;
+MEDIUM = hard waits >2s, inefficient selectors (page.$$ vs locators), large datasets without pagination, missing optimizations; LOW = unused parallelization, minor inefficiencies, excessive logging.
+Score = max(0, 100 - penalty) with HIGH=10, MEDIUM=5, LOW=2. Check performance ONLY.`,
+    { label: 'subprocess-performance', phase: 'QUALITY-EVALUATION', schema: dimensionSchema }
+  )
+])
+log(`dimensions: det=${determinism.score} iso=${isolation.score} maint=${maintainability.score} cov=${coverage.score} perf=${performance.score}`)
+// Step 3F: Aggregate Scores (steps-c/step-03f-aggregate-scores.md)
+// Weighted overall score with the source's exact weights; grade thresholds A>=90 B>=80 C>=70 D>=60 else F.
+phase('AGGREGATE-SCORES')
+const aggregate = await agent(
+  `Run step 03F (aggregate-scores) of testarch-test-review.
+Read the real source: _byan/workflow/simple/testarch/test-review/steps-c/step-03f-aggregate-scores.md.
+You are given the 5 quality-dimension results (each already a 0-100 score with violations):
+determinism=${JSON.stringify(determinism)}
+isolation=${JSON.stringify(isolation)}
+maintainability=${JSON.stringify(maintainability)}
+coverage=${JSON.stringify(coverage)}
+performance=${JSON.stringify(performance)}
+Do NOT re-evaluate quality — only aggregate.
+1. Weighted overall score = round( det*0.25 + iso*0.25 + maint*0.20 + cov*0.15 + perf*0.15 ).
+2. Grade: >=90 A, >=80 B, >=70 C, >=60 D, else F.
+3. Aggregate all violations across dimensions; count by severity (HIGH/MEDIUM/LOW) plus total.
+4. Prioritize recommendations (impact HIGH if a dimension score <70), keep the top 10.`,
+  {
+    label: 'aggregate-scores',
+    phase: 'AGGREGATE-SCORES',
+    schema: {
+      type: 'object',
+      required: ['overallScore', 'overallGrade', 'violationSummary'],
+      properties: {
+        overallScore: { type: 'integer', minimum: 0, maximum: 100 },
+        overallGrade: { type: 'string', enum: ['A', 'B', 'C', 'D', 'F'] },
+        dimensionScores: {
+          type: 'object',
+          properties: {
+            determinism: { type: 'integer' },
+            isolation: { type: 'integer' },
+            maintainability: { type: 'integer' },
+            coverage: { type: 'integer' },
+            performance: { type: 'integer' }
+          }
+        },
+        violationSummary: {
+          type: 'object',
+          required: ['total', 'HIGH', 'MEDIUM', 'LOW'],
+          properties: {
+            total: { type: 'integer' },
+            HIGH: { type: 'integer' },
+            MEDIUM: { type: 'integer' },
+            LOW: { type: 'integer' }
+          }
+        },
+        topRecommendations: { type: 'array', items: { type: 'string' } },
+        qualityAssessment: { type: 'string' }
+      }
+    }
+  }
+)
+log(`overall=${aggregate.overallScore}/100 grade=${aggregate.overallGrade} violations=${aggregate.violationSummary.total}`)
+// Step 4: Generate Report & Validate (steps-c/step-04-generate-report.md)
+phase('GENERATE-REPORT')
+const report = await agent(
+  `Run step 04 (generate-report) of testarch-test-review.
+Read the real source: _byan/workflow/simple/testarch/test-review/steps-c/step-04-generate-report.md
+and the template _byan/workflow/simple/testarch/test-review/test-review-template.md.
+Using the aggregated results: ${JSON.stringify(aggregate)}
+and scope=${context.scope}, write the report to ${outputFile} including:
+- Score summary (overall ${aggregate.overallScore}/100, grade ${aggregate.overallGrade})
+- Critical findings with concrete fixes (HIGH-severity violations)
+- Warnings and recommendations (top recommendations)
+- Context references (story / test-design if available)
+Then validate the report against _byan/workflow/simple/testarch/test-review/checklist.md and fix any gaps.
+Report the scope reviewed, overall score, critical blocker count, and the next recommended workflow (e.g. automate or trace).
+Do NOT make the approve/request-changes/block decision — that is the human gate.`,
+  {
+    label: 'generate-report',
+    phase: 'GENERATE-REPORT',
+    schema: {
+      type: 'object',
+      required: ['reportPath', 'checklistValidated'],
+      properties: {
+        reportPath: { type: 'string' },
+        checklistValidated: { type: 'boolean' },
+        criticalBlockers: { type: 'integer' },
+        nextRecommendedWorkflow: { type: 'string' },
+        completionSummary: { type: 'string' }
+      }
+    }
+  }
+)
+log(`report -> ${report.reportPath} checklistValidated=${report.checklistValidated}`)
+// Final verdict object. The decision (Approve / Approve-with-comments / Request-changes / Block)
+// stays OUT of this script — the orchestrating skill presents this at the human gate.
+return {
+  workflow: 'testarch-test-review',
+  summary: `Test-quality review of scope "${context.scope}" across ${discovery.testFileCount} file(s): overall ${aggregate.overallScore}/100 (grade ${aggregate.overallGrade}), ${aggregate.violationSummary.total} violations (${aggregate.violationSummary.HIGH} HIGH).`,
+  steps: 5,
+  halted: false,
+  scope: context.scope,
+  testFileCount: discovery.testFileCount,
+  overallScore: aggregate.overallScore,
+  overallGrade: aggregate.overallGrade,
+  dimensionScores: aggregate.dimensionScores,
+  violationSummary: aggregate.violationSummary,
+  topRecommendations: aggregate.topRecommendations,
+  reportPath: report.reportPath,
+  criticalBlockers: report.criticalBlockers,
+  nextRecommendedWorkflow: report.nextRecommendedWorkflow,
+  needsHumanGate: true,
+  result: {
+    dimensions: { determinism, isolation, maintainability, coverage, performance },
+    aggregate,
+    report
+  }
+}

package/install/templates/.claude/workflows/testarch-trace.js ADDED Viewed

@@ -0,0 +1,316 @@
+export const meta = {
+  name: 'testarch-trace',
+  description: 'Generate a requirements-to-tests traceability matrix, analyze coverage gaps, and apply deterministic quality-gate logic (PASS/CONCERNS/FAIL/WAIVED). Mirrors the BYAN testarch-trace Create-mode steps; returns a verdict for the human gate.',
+  phases: [
+    { title: 'LOAD_CONTEXT' },
+    { title: 'DISCOVER_TESTS' },
+    { title: 'MAP_CRITERIA' },
+    { title: 'ANALYZE_GAPS' },
+    { title: 'GATE_DECISION' }
+  ]
+}
+// FD/STRICT CONTRACT (re-asserted): this script returns data only. It never
+// imports lib/fd-state.js, never writes fd-state.json, and records no platform
+// state. The orchestrating skill presents the gate verdict at the human gate
+// and records FD/strict state via MCP. The byan-lint-workflows linter forbids
+// fd-state coupling, and the sandbox forbids import/require/fs/clock/RNG.
+// Any timestamp or run id must arrive through `args` (no wall-clock / RNG).
+const source = '/home/yan/BYAN/_byan/workflow/simple/testarch/trace'
+const testDir = (args && args.testDir) || '{project-root}/tests'
+const sourceDir = (args && args.sourceDir) || '{project-root}'
+const story = (args && args.story) || '{project-root} story / inline acceptance criteria'
+const coverageLevels = (args && args.coverageLevels) || 'e2e,api,component,unit'
+const gateType = (args && args.gateType) || 'story'
+const decisionMode = (args && args.decisionMode) || 'deterministic'
+const runId = (args && args.runId) || 'trace-run'
+// Step 1 (steps-c/step-01-load-context.md): gather acceptance criteria,
+// priorities, knowledge base and supporting artifacts. HALT if AC missing.
+phase('LOAD_CONTEXT')
+const context = await agent(
+  [
+    'You are the Master Test Architect running Step 1 of the testarch-trace workflow.',
+    'Read the real source step at ' + source + '/steps-c/step-01-load-context.md and follow it.',
+    'Goal: gather acceptance criteria (AC), their priorities (P0/P1/P2/P3), and supporting artifacts for traceability.',
+    'Story / AC input: ' + story + '.',
+    'Prerequisite: acceptance criteria MUST be available. If AC are missing, do NOT invent them: set acGate to "HALT".',
+    'Load the tea knowledge base index ({project-root}/_byan/connaissance/testarch/tea-index.csv): test-priorities-matrix, risk-governance, probability-impact, test-quality, selective-testing.',
+    'Load artifacts if present: story file + AC, test design doc (priorities), tech spec / PRD. Summarize what was found.'
+  ].join(' '),
+  {
+    label: 'load-context',
+    phase: 'LOAD_CONTEXT',
+    schema: {
+      type: 'object',
+      required: ['acGate', 'criteria'],
+      properties: {
+        acGate: { type: 'string', enum: ['OK', 'HALT'] },
+        haltReason: { type: 'string' },
+        criteria: {
+          type: 'array',
+          items: {
+            type: 'object',
+            required: ['id', 'priority'],
+            properties: {
+              id: { type: 'string' },
+              description: { type: 'string' },
+              priority: { type: 'string', enum: ['P0', 'P1', 'P2', 'P3'] }
+            }
+          }
+        },
+        artifactsSummary: { type: 'string' }
+      }
+    }
+  }
+)
+if (context.acGate === 'HALT') {
+  // Mirror the source HALT: AC are a hard prerequisite. Return early; the
+  // human gate decides how to supply criteria. No state is written here.
+  return {
+    workflow: 'testarch-trace',
+    runId,
+    summary: 'Halted at Step 1: acceptance criteria are required and were not provided.',
+    steps: 5,
+    completedSteps: 1,
+    halted: true,
+    haltReason: context.haltReason || 'Acceptance criteria missing.',
+    needsHumanGate: true,
+    result: context
+  }
+}
+// Step 2 (steps-c/step-02-discover-tests.md): discover tests under test_dir and
+// classify by level (E2E / API / Component / Unit), recording test IDs.
+phase('DISCOVER_TESTS')
+const tests = await agent(
+  [
+    'Step 2 of testarch-trace. Read ' + source + '/steps-c/step-02-discover-tests.md and follow it.',
+    'Search the test directory (' + testDir + ', source: ' + sourceDir + ') for tests relevant to the acceptance criteria from Step 1.',
+    'Match by: explicit test IDs (e.g. 1.3-E2E-001), feature-name matches, and spec patterns (*.spec.*, *.test.*).',
+    'Categorize each discovered test by level among: ' + coverageLevels + ' (E2E, API, Component, Unit).',
+    'Record test IDs, describe blocks, file:line, and any priority markers present.'
+  ].join(' '),
+  {
+    label: 'discover-tests',
+    phase: 'DISCOVER_TESTS',
+    schema: {
+      type: 'object',
+      required: ['tests', 'levelCounts'],
+      properties: {
+        tests: {
+          type: 'array',
+          items: {
+            type: 'object',
+            required: ['testId', 'level'],
+            properties: {
+              testId: { type: 'string' },
+              level: { type: 'string', enum: ['E2E', 'API', 'Component', 'Unit'] },
+              location: { type: 'string' },
+              describeBlock: { type: 'string' }
+            }
+          }
+        },
+        levelCounts: {
+          type: 'object',
+          properties: {
+            E2E: { type: 'integer' },
+            API: { type: 'integer' },
+            Component: { type: 'integer' },
+            Unit: { type: 'integer' }
+          }
+        }
+      }
+    }
+  }
+)
+// Step 3 (steps-c/step-03-map-criteria.md): build the traceability matrix
+// linking each AC to tests with a coverage status; validate P0/P1 coverage and
+// flag unjustified duplicate coverage across levels.
+phase('MAP_CRITERIA')
+const matrix = await agent(
+  [
+    'Step 3 of testarch-trace. Read ' + source + '/steps-c/step-03-map-criteria.md and follow it.',
+    'For EACH acceptance criterion from Step 1, map it to matching tests from Step 2.',
+    'Coverage status of each criterion is one of: FULL, PARTIAL, NONE, UNIT-ONLY, INTEGRATION-ONLY.',
+    'Record the criterion priority and the levels of the mapped tests.',
+    'Validate coverage logic: P0/P1 criteria MUST have coverage; flag duplicate coverage across levels that has no defense-in-depth justification.'
+  ].join(' '),
+  {
+    label: 'map-criteria',
+    phase: 'MAP_CRITERIA',
+    schema: {
+      type: 'object',
+      required: ['traceabilityMatrix'],
+      properties: {
+        traceabilityMatrix: {
+          type: 'array',
+          items: {
+            type: 'object',
+            required: ['id', 'priority', 'coverage'],
+            properties: {
+              id: { type: 'string' },
+              priority: { type: 'string', enum: ['P0', 'P1', 'P2', 'P3'] },
+              coverage: { type: 'string', enum: ['FULL', 'PARTIAL', 'NONE', 'UNIT-ONLY', 'INTEGRATION-ONLY'] },
+              tests: { type: 'array', items: { type: 'string' } }
+            }
+          }
+        },
+        coverageLogicIssues: { type: 'array', items: { type: 'string' } }
+      }
+    }
+  }
+)
+// Step 4 (steps-c/step-04-analyze-gaps.md): Phase 1 final step. Gap analysis by
+// risk priority, recommendations, coverage statistics (overall + per-priority),
+// and the complete coverage matrix. NO gate decision here (that is Step 5).
+phase('ANALYZE_GAPS')
+const coverage = await agent(
+  [
+    'Step 4 of testarch-trace (Phase 1 final). Read ' + source + '/steps-c/step-04-analyze-gaps.md and follow it.',
+    'From the traceability matrix from Step 3, compute the gap analysis and coverage statistics. Do NOT make a gate decision here.',
+    'Gaps: uncovered (coverage=NONE) split into critical=P0, high=P1, medium=P2, low=P3; plus partial-coverage and unit-only items.',
+    'Recommendations: URGENT atdd for P0 gaps, HIGH automate for P1 gaps, MEDIUM complete partial coverage, LOW test-review for quality.',
+    'Statistics: overall coverage percentage = round(fullyCovered / totalRequirements * 100); plus per-priority breakdown (P0/P1/P2/P3 total, covered=FULL, percentage). P0 percentage = round(p0Covered / p0Total * 100).',
+    'Produce the complete Phase 1 coverage matrix object (requirements + coverage_statistics + gap_analysis + recommendations).'
+  ].join(' '),
+  {
+    label: 'analyze-gaps',
+    phase: 'ANALYZE_GAPS',
+    schema: {
+      type: 'object',
+      required: ['phase', 'coverageStatistics', 'gapAnalysis', 'recommendations'],
+      properties: {
+        phase: { type: 'string', enum: ['PHASE_1_COMPLETE'] },
+        coverageStatistics: {
+          type: 'object',
+          required: ['totalRequirements', 'fullyCovered', 'overallCoveragePercentage', 'p0Percentage'],
+          properties: {
+            totalRequirements: { type: 'integer' },
+            fullyCovered: { type: 'integer' },
+            partiallyCovered: { type: 'integer' },
+            uncovered: { type: 'integer' },
+            overallCoveragePercentage: { type: 'integer' },
+            p0Total: { type: 'integer' },
+            p0Covered: { type: 'integer' },
+            p0Percentage: { type: 'integer' }
+          }
+        },
+        gapAnalysis: {
+          type: 'object',
+          properties: {
+            criticalGaps: { type: 'array', items: { type: 'string' } },
+            highGaps: { type: 'array', items: { type: 'string' } },
+            mediumGaps: { type: 'array', items: { type: 'string' } },
+            lowGaps: { type: 'array', items: { type: 'string' } }
+          }
+        },
+        recommendations: {
+          type: 'array',
+          items: {
+            type: 'object',
+            required: ['priority', 'action'],
+            properties: {
+              priority: { type: 'string', enum: ['URGENT', 'HIGH', 'MEDIUM', 'LOW'] },
+              action: { type: 'string' },
+              requirements: { type: 'array', items: { type: 'string' } }
+            }
+          }
+        }
+      }
+    }
+  }
+)
+// Phase 1 gate from the source: Step 5 must not run unless Phase 1 is complete.
+if (coverage.phase !== 'PHASE_1_COMPLETE') {
+  return {
+    workflow: 'testarch-trace',
+    runId,
+    summary: 'Phase 1 (coverage matrix) did not complete; gate decision cannot proceed.',
+    steps: 5,
+    completedSteps: 4,
+    halted: true,
+    haltReason: 'Phase 1 not complete - cannot proceed to gate decision.',
+    needsHumanGate: true,
+    result: { context, tests, matrix, coverage }
+  }
+}
+// Step 5 (steps-c/step-05-gate-decision.md): Phase 2. Apply the DETERMINISTIC
+// gate decision tree from the source over the Phase 1 statistics. The
+// rule-based verdict is computed; the human accept/waive decision stays OUT of
+// this script (returned for the gate). decisionMode=manual is surfaced too.
+phase('GATE_DECISION')
+const gate = await agent(
+  [
+    'Step 5 of testarch-trace (Phase 2 gate). Read ' + source + '/steps-c/step-05-gate-decision.md and follow its deterministic decision tree.',
+    'Inputs from Phase 1: p0Coverage = coverage_statistics.p0Percentage, overallCoverage = coverage_statistics.overallCoveragePercentage, criticalGaps = count of P0 uncovered.',
+    'Apply EXACTLY these rules in order:',
+    '(1) if p0Coverage < 100 -> FAIL (P0 coverage below required 100%, critical requirements uncovered).',
+    '(2) else if overallCoverage >= 90 -> PASS (P0 at 100% and overall meets the 90% target).',
+    '(3) else if overallCoverage >= 75 -> CONCERNS (P0 at 100% but overall below 90% target).',
+    '(4) else -> FAIL (overall below the 75% minimum, significant gaps).',
+    'Gate type: ' + gateType + '. Decision mode: ' + decisionMode + '.',
+    'WAIVED is a manual stakeholder waiver only and is NOT decided here: leave waiverApplicable as a flag for the human gate. Do not self-apply a waiver.',
+    'Return the deterministic decision, the rationale string, and the gate criteria (p0 met?, overall status MET/PARTIAL/NOT MET).'
+  ].join(' '),
+  {
+    label: 'gate-decision',
+    phase: 'GATE_DECISION',
+    schema: {
+      type: 'object',
+      required: ['decision', 'rationale', 'gateCriteria'],
+      properties: {
+        decision: { type: 'string', enum: ['PASS', 'CONCERNS', 'FAIL'] },
+        rationale: { type: 'string' },
+        gateCriteria: {
+          type: 'object',
+          required: ['p0Status', 'overallStatus'],
+          properties: {
+            p0CoverageRequired: { type: 'string' },
+            p0CoverageActual: { type: 'string' },
+            p0Status: { type: 'string', enum: ['MET', 'NOT MET'] },
+            overallCoverageTarget: { type: 'string' },
+            overallCoverageActual: { type: 'string' },
+            overallStatus: { type: 'string', enum: ['MET', 'PARTIAL', 'NOT MET'] }
+          }
+        },
+        waiverApplicable: { type: 'boolean' },
+        uncoveredRequirements: { type: 'array', items: { type: 'string' } }
+      }
+    }
+  }
+)
+// Single top-level return. The deterministic verdict travels to the human gate;
+// the human (with gate_type=' + gateType + ') may PASS-through, demand fixes, or
+// apply a WAIVED override. None of that is decided in-script.
+return {
+  workflow: 'testarch-trace',
+  runId,
+  gateType,
+  decisionMode,
+  summary:
+    'Traceability matrix built and quality gate evaluated deterministically: ' +
+    gate.decision +
+    '. ' +
+    gate.rationale,
+  steps: 5,
+  completedSteps: 5,
+  halted: false,
+  decision: gate.decision,
+  waiverApplicable: gate.waiverApplicable === true,
+  needsHumanGate: true,
+  result: {
+    context,
+    tests,
+    matrix,
+    coverage,
+    gate
+  }
+}