gaia-framework 1.105.0 → 1.127.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.claude/commands/gaia-bridge-disable.md +18 -0
  2. package/.claude/commands/gaia-bridge-enable.md +18 -0
  3. package/.claude/commands/gaia-fill-test-gaps.md +17 -0
  4. package/CLAUDE.md +64 -1
  5. package/_gaia/_config/gaia-help.csv +2 -0
  6. package/_gaia/_config/global.yaml +14 -1
  7. package/_gaia/_config/lifecycle-sequence.yaml +23 -3
  8. package/_gaia/_config/skill-manifest.csv +1 -0
  9. package/_gaia/_config/workflow-manifest.csv +2 -0
  10. package/_gaia/core/agents/orchestrator.md +1 -1
  11. package/_gaia/core/protocols/review-gate-check.xml +45 -5
  12. package/_gaia/core/validators/test-environment-validator.js +191 -0
  13. package/_gaia/core/workflows/bridge-toggle/checklist.md +11 -0
  14. package/_gaia/core/workflows/bridge-toggle/instructions.xml +69 -0
  15. package/_gaia/core/workflows/bridge-toggle/workflow.yaml +27 -0
  16. package/_gaia/dev/skills/_skill-index.yaml +13 -0
  17. package/_gaia/dev/skills/code-review-standards.md +50 -0
  18. package/_gaia/dev/skills/edge-cases.md +201 -0
  19. package/_gaia/lifecycle/knowledge/brownfield/ci-test-detection.md +194 -0
  20. package/_gaia/lifecycle/knowledge/brownfield/test-execution-scan.md +13 -0
  21. package/_gaia/lifecycle/skills/document-rulesets.md +93 -3
  22. package/_gaia/lifecycle/templates/story-template.md +7 -7
  23. package/_gaia/lifecycle/templates/test-gap-analysis-template.md +221 -0
  24. package/_gaia/lifecycle/workflows/4-implementation/check-review-gate/checklist.md +1 -1
  25. package/_gaia/lifecycle/workflows/4-implementation/check-review-gate/instructions.xml +11 -11
  26. package/_gaia/lifecycle/workflows/4-implementation/code-review/instructions.xml +1 -1
  27. package/_gaia/lifecycle/workflows/4-implementation/create-story/instructions.xml +73 -2
  28. package/_gaia/lifecycle/workflows/4-implementation/dev-story/instructions.xml +25 -2
  29. package/_gaia/lifecycle/workflows/4-implementation/retrospective/instructions.xml +1 -1
  30. package/_gaia/lifecycle/workflows/4-implementation/run-all-reviews/instructions.xml +132 -9
  31. package/_gaia/lifecycle/workflows/4-implementation/security-review/checklist.md +2 -0
  32. package/_gaia/lifecycle/workflows/4-implementation/sprint-planning/instructions.xml +13 -0
  33. package/_gaia/lifecycle/workflows/4-implementation/sprint-planning/workflow.yaml +8 -0
  34. package/_gaia/lifecycle/workflows/4-implementation/validate-story/checklist.md +1 -0
  35. package/_gaia/lifecycle/workflows/4-implementation/validate-story/instructions.xml +11 -0
  36. package/_gaia/lifecycle/workflows/5-deployment/deployment-checklist/instructions.xml +11 -0
  37. package/_gaia/lifecycle/workflows/anytime/brownfield-onboarding/instructions.xml +48 -1
  38. package/_gaia/lifecycle/workflows/anytime/brownfield-onboarding/workflow.yaml +10 -0
  39. package/_gaia/testing/agents/test-architect.md +2 -0
  40. package/_gaia/testing/workflows/ci-setup/instructions.xml +6 -6
  41. package/_gaia/testing/workflows/fill-test-gaps/checklist.md +16 -0
  42. package/_gaia/testing/workflows/fill-test-gaps/instructions.xml +128 -0
  43. package/_gaia/testing/workflows/fill-test-gaps/workflow.yaml +30 -0
  44. package/_gaia/testing/workflows/test-gap-analysis/instructions.xml +47 -14
  45. package/_gaia/testing/workflows/test-gap-analysis/workflow.yaml +1 -0
  46. package/gaia-install.sh +46 -0
  47. package/package.json +3 -3
@@ -0,0 +1,27 @@
1
+ # Bridge Toggle Workflow
2
+ # Enable or disable the Test Execution Bridge via /gaia-bridge-enable or /gaia-bridge-disable.
3
+ # Wraps the manual bridge activation into a single command with idempotency,
4
+ # comment-preserving YAML writes, and a post-toggle summary.
5
+ #
6
+ # Traces: FR-316, ADR-028 §10.20.12
7
+
8
+ name: bridge-toggle
9
+ display_name: "Bridge Toggle"
10
+ description: "Enable or disable the Test Execution Bridge in global.yaml"
11
+ module: core
12
+ agent: orchestrator
13
+
14
+ parameters:
15
+ mode:
16
+ type: string
17
+ required: true
18
+ allowed: [enable, disable]
19
+ description: "Target state — 'enable' sets bridge_enabled to true, 'disable' sets it to false"
20
+
21
+ instructions: "{installed_path}/core/workflows/bridge-toggle/instructions.xml"
22
+ validation: "{installed_path}/core/workflows/bridge-toggle/checklist.md"
23
+
24
+ config_source: "{installed_path}/core/config.yaml"
25
+
26
+ output:
27
+ primary: "{project-root}/_gaia/_config/global.yaml"
@@ -39,6 +39,7 @@ skills:
39
39
  - { id: review-checklist, line_range: [14, 59], description: "Universal code review checklist" }
40
40
  - { id: solid-principles, line_range: [60, 132], description: "SOLID violation detection" }
41
41
  - { id: complexity-metrics, line_range: [133, 226], description: "Cyclomatic and cognitive complexity" }
42
+ - { id: review-gate-completion, line_range: [228, 276], description: "Review gate completion artifacts and review-summary.md hard gate enforcement" }
42
43
 
43
44
  - file: documentation-standards.md
44
45
  sections:
@@ -54,6 +55,18 @@ skills:
54
55
  - { id: secrets-management, line_range: [119, 166], description: "Environment-based secrets" }
55
56
  - { id: cors-csrf, line_range: [167, 230], description: "CORS and CSRF configuration, protection" }
56
57
 
58
+ - file: edge-cases.md
59
+ sections:
60
+ - { id: overview, line_range: [26, 36], description: "Skill purpose, JIT loading mandate, 8K NFR-042 budget" }
61
+ - { id: when-to-invoke, line_range: [37, 50], description: "When create-story and other workflows load this skill" }
62
+ - { id: input-contract, line_range: [51, 68], description: "Required input fields and context token limits" }
63
+ - { id: output-schema, line_range: [69, 105], description: "Structured output: id, scenario, input, expected, category with category enum" }
64
+ - { id: analysis-heuristics, line_range: [106, 121], description: "Enumeration prompts for boundary, input, state, failure, security, timing, resources, idempotency" }
65
+ - { id: token-budget, line_range: [122, 135], description: "NFR-042 8K budget enforcement and truncation rules" }
66
+ - { id: failure-handling, line_range: [136, 152], description: "Non-blocking failure and timeout handling, warn-and-continue contract" }
67
+ - { id: usage-example, line_range: [153, 194], description: "Sample input/output pair" }
68
+ - { id: notes, line_range: [195, 201], description: "Notes on persistence, stack-agnostic usage, and related task file" }
69
+
57
70
  - file: figma-integration.md
58
71
  sections:
59
72
  - { id: detection, line_range: [48, 135], description: "Figma MCP detection probe, failure handling, adapter selection, security guardrails, API scopes, error sanitization" }
@@ -224,3 +224,53 @@ function validate(input) {
224
224
  | Parameter count | > 4 | Suggest object parameter |
225
225
  | Nesting depth | > 3 levels | Require flattening |
226
226
  | Class methods | > 10 public | Suggest decomposition |
227
+
228
+ <!-- SECTION: review-gate-completion -->
229
+ ## Review Gate Completion Requirements
230
+
231
+ Before a story transitions from `review` to `done`, all 6 individual review reports AND the consolidated review-summary.md must exist in the filesystem. This is a **hard gate** — it is enforced structurally by the `review-gate-check` protocol and is not advisory.
232
+
233
+ ### Required Review Artifacts
234
+
235
+ | Artifact | Path | Required When |
236
+ |---|---|---|
237
+ | Code review | `docs/implementation-artifacts/{story_key}-review.md` | Always |
238
+ | Security review | `docs/implementation-artifacts/{story_key}-security-review.md` | Always |
239
+ | QA tests | `docs/test-artifacts/{story_key}-qa-tests.md` | Always |
240
+ | Test automation | `docs/test-artifacts/{story_key}-test-automation.md` | Always |
241
+ | Test review | `docs/test-artifacts/{story_key}-test-review.md` | Always |
242
+ | Performance review | `docs/implementation-artifacts/{story_key}-performance-review.md` | Always |
243
+ | **Review summary** | `docs/implementation-artifacts/{story_key}-review-summary.md` | **Always — enforced hard gate** |
244
+
245
+ ### Enforcement Mechanism (Live)
246
+
247
+ The hard gate is enforced by `_gaia/core/protocols/review-gate-check.xml` — step 2 "Evaluate Gate and Transition". Before invoking `status-sync` to move a story from `review` to `done`, the protocol:
248
+
249
+ 1. Builds the summary file path `{implementation_artifacts}/{story_key}-review-summary.md`
250
+ 2. Checks whether the file exists
251
+ 3. If missing AND any of the 6 individual review reports exist → HALT with: `Review summary missing for {story_key}. Run /gaia-run-all-reviews {story_key} to generate the summary, or create it manually via /gaia-create-review-summary {story_key}.`
252
+ 4. If missing AND all 6 individual review reports are also missing → skip the check (story never entered review)
253
+ 5. If present → gate passes, transition proceeds
254
+
255
+ **This is a live hard gate, not a guidance note.** Stories with missing summaries physically cannot transition to `done` — the protocol will halt.
256
+
257
+ ### Auto-Generation via run-all-reviews
258
+
259
+ `/gaia-run-all-reviews` auto-generates the review-summary.md as the final step of its 6-review pipeline (see `_gaia/lifecycle/workflows/4-implementation/run-all-reviews/instructions.xml` step 8). The summary aggregates the 6 review verdicts (read from the Review Gate table in the story file and from each review's report) — it does not re-run the reviews.
260
+
261
+ ### Manual Generation
262
+
263
+ If auto-generation fails or is skipped, create the summary manually by copying the schema in `run-all-reviews/instructions.xml` step 8 and filling in the verdicts from the individual reports.
264
+
265
+ ### Review Summary Schema
266
+
267
+ ```yaml
268
+ ---
269
+ story_key: {story_key}
270
+ date: {YYYY-MM-DD}
271
+ overall_status: PASSED | FAILED | INCOMPLETE
272
+ reviewers: [code-review, qa-tests, security-review, test-automate, test-review, review-perf]
273
+ ---
274
+ ```
275
+
276
+ Followed by 6 sections (one per review) with verdict + report link + one-line synopsis, then a final aggregate Gate Status table.
@@ -0,0 +1,201 @@
1
+ ---
2
+ name: "edge-cases"
3
+ version: '1.0'
4
+ applicable_agents: [typescript-dev, angular-dev, flutter-dev, java-dev, python-dev, mobile-dev, go-dev]
5
+ test_scenarios:
6
+ - scenario: M+ story receives structured edge case list
7
+ expected: edge_case_results populated with id, scenario, input, expected, category fields
8
+ - scenario: Skill fails or times out
9
+ expected: Caller logs warning, sets edge_case_results=[], continues without blocking
10
+ - scenario: S-sized story
11
+ expected: Skill is not invoked — size gate in create-story excludes S
12
+ - scenario: Input context exceeds 5K tokens
13
+ expected: architecture_excerpt is truncated first, warning logged, proceeds
14
+ - scenario: Output exceeds 3K tokens
15
+ expected: Lower-priority entries dropped, warning logged, partial results returned
16
+ ---
17
+
18
+ # Edge Cases Skill
19
+
20
+ > Structured edge case analysis for M+ stories. Invoked as a mandatory sub-step by `/gaia-create-story` after acceptance criteria are drafted, and available as a standalone skill for any agent or workflow that needs to enumerate edge cases, error scenarios, and boundary conditions.
21
+
22
+ **Traces to:** FR-227, NFR-042, ADR-030 §10.22
23
+
24
+ ---
25
+
26
+ <!-- SECTION: overview -->
27
+ ## Overview
28
+
29
+ The edge-cases skill enumerates scenarios that are *not* the happy path — boundary conditions, error paths, timing issues, input extremes, failure modes, and concurrency hazards — and returns them as a structured list so downstream artifacts (stories, tests, reviews) can trace coverage.
30
+
31
+ This skill is JIT-loaded. It MUST NOT be pre-loaded by any workflow. Token budget for a single invocation is capped at 8K tokens (NFR-042) including the input context and the generated output.
32
+
33
+ When invoked from `/gaia-create-story`, the skill is scoped to a single story's acceptance criteria and runs in-context — no separate workflow invocation, no sub-agent spawn.
34
+
35
+ ---
36
+
37
+ <!-- SECTION: when-to-invoke -->
38
+ ## When to Invoke
39
+
40
+ - `/gaia-create-story` — mandatory for stories with size M, L, or XL (the size gate at Step 4 of create-story instructions.xml enforces this). S-sized stories skip this skill to preserve token budget.
41
+ - `/gaia-edge-cases` — standalone command for ad-hoc edge case brainstorming on existing artifacts.
42
+ - Other workflows MAY load this skill when a step references `edge-cases.md` by name.
43
+
44
+ **Do NOT invoke** this skill for:
45
+ - Stories that are already marked `done` (edge cases should be captured during planning)
46
+ - Purely cosmetic / copy changes (no behavior to enumerate)
47
+ - S-sized stories (gate excludes them by design)
48
+
49
+ ---
50
+
51
+ <!-- SECTION: input-contract -->
52
+ ## Input Contract
53
+
54
+ The caller passes the following context to the skill:
55
+
56
+ | Field | Type | Required | Description |
57
+ |---|---|---|---|
58
+ | `story_key` | string | yes | e.g., `E19-S9` — used as prefix for edge case IDs |
59
+ | `story_title` | string | yes | Human-readable title |
60
+ | `story_description` | string | yes | The user story paragraph (As a / I want / so that) |
61
+ | `acceptance_criteria` | string[] | yes | List of AC strings in Given/When/Then format |
62
+ | `size` | enum | yes | One of `S`, `M`, `L`, `XL` (skill halts if `S`) |
63
+ | `architecture_excerpt` | string | no | Optional relevant ADR or architecture section |
64
+
65
+ Total input context MUST stay under ~5K tokens to leave room for the output inside the 8K budget.
66
+
67
+ ---
68
+
69
+ <!-- SECTION: output-schema -->
70
+ ## Output Schema
71
+
72
+ The skill returns a structured list. Each edge case is an object with exactly these fields:
73
+
74
+ ```yaml
75
+ edge_case_results:
76
+ - id: "EC-1" # string — sequential EC-{N} numbering, unique within a single invocation
77
+ scenario: "..." # string — one-line description of the edge case
78
+ input: "..." # string — specific input / precondition that triggers it
79
+ expected: "..." # string — expected system behavior or output
80
+ category: "..." # enum — see category list below
81
+ ```
82
+
83
+ **Required fields (all five MUST be present on every result):**
84
+ - `id` — `EC-{N}` format (EC-1, EC-2, ...)
85
+ - `scenario` — what the edge case is
86
+ - `input` — the triggering input, state, or precondition
87
+ - `expected` — the expected behavior
88
+ - `category` — one of the categories below
89
+
90
+ **Category enum:**
91
+ - `boundary` — min/max values, empty sets, off-by-one, buffer limits
92
+ - `error` — validation failures, exception paths, invalid inputs
93
+ - `timing` — race conditions, timeouts, retries, rate limits
94
+ - `concurrency` — parallel access, locking, idempotency
95
+ - `integration` — upstream/downstream dependency failures, contract mismatches
96
+ - `security` — authz bypass, injection, privilege escalation
97
+ - `data` — malformed data, encoding, unicode, large payloads
98
+ - `environment` — offline, degraded, platform-specific quirks
99
+
100
+ If no edge cases are identified, return an empty list and log a note — do NOT fabricate edge cases to pad the output.
101
+
102
+ Output format when returned to the caller: YAML-serializable list. The caller (e.g., `/gaia-create-story`) stores this list in the `edge_case_results` variable before writing the story file to disk.
103
+
104
+ ---
105
+
106
+ <!-- SECTION: analysis-heuristics -->
107
+ ## Analysis Heuristics
108
+
109
+ Use these prompts to drive enumeration — one or two results per heuristic is typical, not all will apply:
110
+
111
+ 1. **Boundary sweep** — for every numeric input, what happens at 0, 1, max, max+1, negative, and fractional? For every collection, what happens empty and full?
112
+ 2. **Input extremes** — what about empty strings, very long strings, unicode, null, missing fields, extra fields, wrong types?
113
+ 3. **State transitions** — can the operation be invoked in an unexpected state? What if it is called twice? What if it is called while another operation is in flight?
114
+ 4. **Failure paths** — what upstream dependencies can fail? What happens on timeout, 5xx, partial response, network partition?
115
+ 5. **Security angles** — can a lower-privileged user trigger it? Can input be injected into a downstream query/command?
116
+ 6. **Time and clock** — what happens across DST transitions, leap seconds, at midnight, with negative clock skew?
117
+ 7. **Resource limits** — what happens under memory pressure, slow disk, saturated queues?
118
+ 8. **Idempotency** — is the operation safe to retry? What if the same request arrives twice with the same id?
119
+
120
+ ---
121
+
122
+ <!-- SECTION: token-budget -->
123
+ ## Token Budget (NFR-042)
124
+
125
+ The skill invocation MUST stay under 8K tokens total. Guidance:
126
+
127
+ - Input context: ≤ 5K tokens (story, ACs, optional architecture excerpt)
128
+ - Output: ≤ 3K tokens (typically 5–15 edge cases)
129
+ - If the caller supplies an input that would exceed 5K, the skill truncates the `architecture_excerpt` first, then the `story_description`, and finally caps the number of ACs considered (with a warning)
130
+ - If the generated output would exceed 3K tokens, the skill truncates the list to the highest-priority edge cases (boundary + error + security first) and logs a warning to Dev Notes: "Edge case output truncated at 3K tokens — N results dropped"
131
+
132
+ The caller is responsible for checking the total token count before persisting the output. When running inside `/gaia-create-story`, token usage is logged to Dev Notes whenever it exceeds 80% of the 8K budget.
133
+
134
+ ---
135
+
136
+ <!-- SECTION: failure-handling -->
137
+ ## Failure and Timeout Handling
138
+
139
+ This skill is **non-blocking**. Callers MUST treat skill failure as a warning, never as a hard error.
140
+
141
+ | Failure mode | Caller behavior |
142
+ |---|---|
143
+ | Skill file not found | Log warning "Edge case skill not loaded — continuing without edge cases", set `edge_case_results = []`, proceed |
144
+ | Skill invocation timeout (> 30s wall clock) | Log warning "Edge case analysis timed out — continuing without edge cases", set `edge_case_results = []`, proceed |
145
+ | Malformed output (missing required fields) | Log warning "Edge case output schema invalid — continuing without edge cases", set `edge_case_results = []`, proceed |
146
+ | Token budget exceeded | Truncate output with warning (see Token Budget section), still return partial results |
147
+ | Empty result (no edge cases found) | Log note "No edge cases identified for {story_key}", set `edge_case_results = []`, proceed normally — this is a valid outcome, NOT a failure |
148
+
149
+ Under no circumstance should an edge-case failure block story creation. The story is written to disk with whatever `edge_case_results` is available, plus a Dev Notes entry describing any degradation.
150
+
151
+ ---
152
+
153
+ <!-- SECTION: usage-example -->
154
+ ## Usage Example
155
+
156
+ Input (from `/gaia-create-story` context):
157
+
158
+ ```yaml
159
+ story_key: "E19-S9"
160
+ story_title: "Edge Case Mandatory Sub-Step"
161
+ size: "M"
162
+ acceptance_criteria:
163
+ - "Given a story of size M, when create-story runs, then edge-cases.md is invoked"
164
+ - "Given the skill times out, when it fails, then story creation continues with a warning"
165
+ ```
166
+
167
+ Output:
168
+
169
+ ```yaml
170
+ edge_case_results:
171
+ - id: "EC-1"
172
+ scenario: "Story size missing from frontmatter"
173
+ input: "size field is null or absent"
174
+ expected: "Default to skip (treat as S), log warning"
175
+ category: "error"
176
+ - id: "EC-2"
177
+ scenario: "Skill file deleted between registry load and invocation"
178
+ input: "edge-cases.md missing from dev/skills/"
179
+ expected: "Caller logs warning, sets edge_case_results=[], continues"
180
+ category: "error"
181
+ - id: "EC-3"
182
+ scenario: "Input context exceeds 5K tokens"
183
+ input: "Very large architecture_excerpt"
184
+ expected: "Truncate architecture_excerpt first, warn, proceed"
185
+ category: "boundary"
186
+ - id: "EC-4"
187
+ scenario: "Two simultaneous invocations for the same story"
188
+ input: "Parallel runs of /gaia-create-story E19-S9"
189
+ expected: "Each invocation is independent; no shared state"
190
+ category: "concurrency"
191
+ ```
192
+
193
+ ---
194
+
195
+ <!-- SECTION: notes -->
196
+ ## Notes
197
+
198
+ - The `edge_case_results` output is captured in the caller's runtime state as a named variable before the story file is written. The create-story workflow stores these results in the story's Dev Notes or Test Scenarios section.
199
+ - This skill does NOT modify files on disk. Callers persist the output.
200
+ - The skill is stack-agnostic — it works for typescript, angular, flutter, java, python, mobile, and go stories.
201
+ - See also: `_gaia/core/tasks/review-edge-case-hunter.xml` for the standalone `/gaia-edge-cases` workflow task that wraps this skill.
@@ -0,0 +1,194 @@
1
+ # CI Test Execution Detection — Brownfield Knowledge Fragment
2
+
3
+ > **Version:** 1.0.0
4
+ > **Story:** E19-S21 (documents implementation from E19-S13)
5
+ > **Traces to:** FR-232, NFR-041
6
+ > **Category:** runtime-behavior
7
+ > **Source of truth:** `Gaia-framework/src/brownfield/ci-test-detector.js`
8
+
9
+ ## Purpose
10
+
11
+ Detect whether a brownfield project actually executes tests in CI by scanning
12
+ its CI configuration files for real test execution steps. This fragment
13
+ documents the detection patterns, the output schema, and the zero-false-positive
14
+ rules that the programmatic module enforces.
15
+
16
+ This knowledge fragment is the companion to `test-execution-scan.md`:
17
+
18
+ - `test-execution-scan.md` covers **local test runner** detection and execution
19
+ (Jest, Vitest, pytest, JUnit, Go test, Flutter, BATS, etc.)
20
+ - `ci-test-detection.md` (this file) covers **CI pipeline** test execution
21
+ detection — whether the project's pipelines actually run those runners on
22
+ every commit.
23
+
24
+ ## Scope
25
+
26
+ The CI test execution detector identifies the **first** CI provider found that
27
+ runs tests. It scans supported providers in priority order and stops on the
28
+ first match. Detection is strictly pattern-based against configuration file
29
+ contents — file presence alone is never sufficient.
30
+
31
+ ### Supported CI providers
32
+
33
+ The implementation in `ci-test-detector.js` (E19-S13) supports **6 providers**:
34
+
35
+ | # | Provider | Enum value | Config file(s) | Field scanned |
36
+ |---|----------------|------------------|------------------------------------|--------------------------|
37
+ | 1 | GitHub Actions | `github-actions` | `.github/workflows/*.yml` / `*.yaml` | `run:` |
38
+ | 2 | GitLab CI | `gitlab` | `.gitlab-ci.yml` | `script:` list items |
39
+ | 3 | CircleCI | `circleci` | `.circleci/config.yml` | `run:` |
40
+ | 4 | Azure Pipelines| `azure` | `azure-pipelines.yml` | `script:` / `bash:` |
41
+ | 5 | Jenkins | `jenkins` | `Jenkinsfile` | `sh '...'` / `sh "..."` |
42
+ | 6 | Bitbucket | `bitbucket` | `bitbucket-pipelines.yml` | `script:` list items |
43
+
44
+ > **Note — Travis CI is intentionally NOT supported.** The original E19-S13
45
+ > story text mentioned `.travis.yml`, but Travis CI is deprecated and the
46
+ > implementation replaced it with `.gitlab-ci.yml` and `bitbucket-pipelines.yml`.
47
+ > This fragment reflects the actual shipped implementation, not the original
48
+ > story wording.
49
+
50
+ ## Test Command Patterns
51
+
52
+ A CI step qualifies as "test execution" only when its command value matches
53
+ one of the following canonical patterns (from `TEST_COMMAND_PATTERNS` in the
54
+ module):
55
+
56
+ - `npm test`
57
+ - `npm run test`
58
+ - `pytest`
59
+ - `./gradlew test` / `./gradlew.bat test`
60
+ - `go test`
61
+ - `bats`
62
+ - `mvn test`
63
+ - `vitest`
64
+ - `npx vitest`
65
+
66
+ Patterns use word-boundary regexes so `pytest-cov` still matches via `pytest`
67
+ but embedded mentions inside unrelated tokens do not.
68
+
69
+ ## Detection Algorithm by Provider
70
+
71
+ All YAML scanners share the same pipeline: split the file into lines, skip
72
+ comments, extract the command value from the recognized field, apply the
73
+ false-positive guard, then apply the test command patterns.
74
+
75
+ ### GitHub Actions
76
+
77
+ - **Glob:** `.github/workflows/*.yml` and `*.yaml`
78
+ - **Line pattern:** `/^\s*-?\s*run:\s*(.+)$/`
79
+ - **Behavior:** iterate every workflow file in the directory; collect any `run:`
80
+ field whose value matches a test command pattern.
81
+ - **Stops on first provider with ≥ 1 matching command.**
82
+
83
+ ### GitLab CI
84
+
85
+ - **File:** `.gitlab-ci.yml`
86
+ - **Line pattern:** `/^\s+-\s+(.+)$/` (YAML list items under `script:`)
87
+ - **Behavior:** treat `script:` arrays as the authoritative source. Bare list
88
+ items whose value matches a test command pattern count as test execution.
89
+
90
+ ### CircleCI
91
+
92
+ - **File:** `.circleci/config.yml`
93
+ - **Line pattern:** `/^\s*-?\s*run:\s*(.+)$/`
94
+ - **Behavior:** mirrors the GitHub Actions scanner — `steps[].run.command`
95
+ fields are matched via the `run:` prefix.
96
+
97
+ ### Azure Pipelines
98
+
99
+ - **File:** `azure-pipelines.yml`
100
+ - **Line pattern:** `/^\s*-?\s*(?:script|bash):\s*(.+)$/`
101
+ - **Behavior:** matches both `script:` and `bash:` task values.
102
+
103
+ ### Jenkins
104
+
105
+ - **File:** `Jenkinsfile` (Groovy, not YAML)
106
+ - **Comment skip:** lines starting with `//`
107
+ - **Line pattern:** `/\bsh\s+['"](.+?)['"]/`
108
+ - **Behavior:** matches `sh 'npm test'` and `sh "pytest"` style declarative
109
+ and scripted pipeline steps.
110
+
111
+ ### Bitbucket Pipelines
112
+
113
+ - **File:** `bitbucket-pipelines.yml`
114
+ - **Line pattern:** `/^\s+-\s+(.+)$/` (YAML list items under `script:`)
115
+ - **Behavior:** identical to the GitLab scanner — both providers express steps
116
+ as YAML list items beneath `script:`.
117
+
118
+ ## Output Schema
119
+
120
+ The detector resolves to a single object per project:
121
+
122
+ ```json
123
+ {
124
+ "ci_test_execution": "github-actions" | "gitlab" | "circleci" | "azure" | "jenkins" | "bitbucket" | null,
125
+ "test_commands": ["<matched command line>", "..."]
126
+ }
127
+ ```
128
+
129
+ - `ci_test_execution` — the enum value of the first provider whose config file
130
+ contains at least one matching test command. `null` when no supported
131
+ provider contains a matching command.
132
+ - `test_commands` — the raw command strings that matched the test patterns, in
133
+ discovery order. Empty array when `ci_test_execution` is `null`.
134
+
135
+ Consumers should treat a `null` result as "no CI test execution detected" —
136
+ the project may still have local test runners (see `test-execution-scan.md`),
137
+ but no supported CI pipeline runs them on commit.
138
+
139
+ ## Zero-False-Positive Rules (NFR-041)
140
+
141
+ The detector enforces NFR-041 (zero false positives) through three guards:
142
+
143
+ 1. **Config file presence is not sufficient.** A repo may contain
144
+ `.github/workflows/deploy.yml` that only runs `terraform apply` — it is not
145
+ a test execution pipeline and will produce `null`.
146
+ 2. **Comment skip.** YAML lines matching `/^\s*#/` and Groovy lines matching
147
+ `/^\s*\/\//` are skipped before any command extraction. Test commands
148
+ mentioned inside comments never qualify.
149
+ 3. **False-positive guard.** Extracted command values starting with `echo` are
150
+ rejected via `isFalsePositive`. This filters banners such as
151
+ `- echo "running npm test now"` that look like test execution but are just
152
+ log lines.
153
+
154
+ Pattern matches use word boundaries (`\b`) so `npm-test-utils` or
155
+ `pytestplugin` do not trigger a match against `npm test` / `pytest`. Only
156
+ actual command invocations count.
157
+
158
+ ## Integration Notes
159
+
160
+ - **Companion to test-execution-scan.md.** The two fragments are designed to
161
+ be read together during brownfield onboarding. Local test runner detection
162
+ answers "does this project have tests?"; CI test detection answers "does
163
+ the pipeline actually run them?". A gap exists when the runner detector
164
+ finds a suite but the CI detector returns `null`.
165
+ - **Correlation with E19-S12 runner detection.** The test commands matched
166
+ here (`npm test`, `pytest`, `./gradlew test`, etc.) deliberately mirror the
167
+ runner commands produced by `test-runner-detector.js`. Brownfield workflows
168
+ can cross-reference the two outputs to flag pipelines that run tests for
169
+ only a subset of detected runners (e.g., a monorepo that runs `npm test`
170
+ but skips `pytest`).
171
+ - **First-match semantics.** The detector iterates providers in the fixed
172
+ order (GitHub Actions → GitLab → CircleCI → Azure → Jenkins → Bitbucket)
173
+ and returns on the first match. Projects that use more than one CI provider
174
+ will only surface the first match — callers that need multi-provider
175
+ detection should invoke the individual scanners directly.
176
+
177
+ ## Style and Format
178
+
179
+ This fragment follows the same conventions as its sibling knowledge files in
180
+ `_gaia/lifecycle/knowledge/brownfield/`:
181
+
182
+ - `test-execution-scan.md` — local test runner detection
183
+ - `config-contradiction-scan.md` — configuration contradiction scanning
184
+ - `dead-code-scan.md` — unused code detection
185
+
186
+ ## See Also
187
+
188
+ - [`test-execution-scan.md`](./test-execution-scan.md) — local test runner
189
+ detection and execution (the companion scan that runs tests, while this
190
+ fragment identifies whether CI runs them)
191
+ - `Gaia-framework/src/brownfield/ci-test-detector.js` — the source of truth
192
+ for every pattern and enum value in this fragment
193
+ - `Gaia-framework/src/brownfield/test-runner-detector.js` — the runner
194
+ detector correlated with CI test command matching
@@ -254,3 +254,16 @@ Format:
254
254
 
255
255
  {YAML gap entries here}
256
256
  ```
257
+
258
+ ## See Also
259
+
260
+ - [`ci-test-detection.md`](./ci-test-detection.md) — CI pipeline test execution
261
+ detection. This scan covers **local test runner** detection and execution
262
+ (Jest, Vitest, pytest, JUnit, Go test, Flutter, BATS); `ci-test-detection.md`
263
+ is the companion fragment that covers **CI pipeline** test execution
264
+ detection across GitHub Actions, GitLab CI, CircleCI, Azure Pipelines,
265
+ Jenkins, and Bitbucket Pipelines. Read both together during brownfield
266
+ onboarding: the runner scan answers "does this project have tests?" and the
267
+ CI scan answers "does the pipeline actually run them?".
268
+ - `config-contradiction-scan.md` — configuration contradiction scanning
269
+ - `dead-code-scan.md` — unused code detection
@@ -1,9 +1,9 @@
1
1
  ---
2
2
  name: document-rulesets
3
- version: '1.1'
3
+ version: '1.2'
4
4
  applicable_agents: [validator]
5
- description: 'Document-specific validation rulesets for artifact type detection (path and frontmatter), structural quality checks per artifact type (application, infrastructure, platform PRDs), and two-pass validation logic.'
6
- sections: [type-detection, prd-rules, infra-prd-rules, platform-prd-rules, arch-rules, ux-rules, test-plan-rules, epics-rules, two-pass-logic]
5
+ description: 'Document-specific validation rulesets for artifact type detection (path and frontmatter), structural quality checks per artifact type (application, infrastructure, platform PRDs, gap analysis output), and two-pass validation logic.'
6
+ sections: [type-detection, prd-rules, infra-prd-rules, platform-prd-rules, arch-rules, ux-rules, test-plan-rules, epics-rules, gap-analysis-rules, two-pass-logic]
7
7
  ---
8
8
 
9
9
  <!-- SECTION: type-detection -->
@@ -38,6 +38,7 @@ If no frontmatter match is found, detect the artifact type from the file path ba
38
38
  | `ux-design.md` | ux-rules | UX Design Specification |
39
39
  | `test-plan.md` | test-plan-rules | Test Plan |
40
40
  | `epics-and-stories.md` | epics-rules | Epics and Stories |
41
+ | `test-gap-analysis-*.md` | gap-analysis-rules | Test Gap Analysis Output (E19-S3, FR-223) |
41
42
 
42
43
  ### Path-Based Detection Algorithm
43
44
 
@@ -223,6 +224,95 @@ Verify each story has: key, title, user story (As a/I want/So that), acceptance
223
224
  Verify all `depends_on` and `blocks` references point to existing stories. Check for circular dependencies. Verify priority ordering respects dependency chains. Flag broken references as WARNING.
224
225
  <!-- END SECTION -->
225
226
 
227
+ <!-- SECTION: gap-analysis-rules -->
228
+ ## Gap Analysis Output Validation Rules
229
+
230
+ Structural quality checks for the test gap analysis output artifact produced by `/gaia-test-gap-analysis`. These rules validate conformance to the FR-223 output schema defined by `_gaia/lifecycle/templates/test-gap-analysis-template.md` (E19-S3, ADR-030 §10.22).
231
+
232
+ **Scope:** files matching `docs/test-artifacts/test-gap-analysis-*.md`.
233
+
234
+ **Schema version:** 1.0.0
235
+
236
+ ### YAML Frontmatter — Required Fields
237
+
238
+ The frontmatter block must parse cleanly as YAML and contain all five required fields. Missing any field is a WARNING. A frontmatter parse failure (malformed YAML, missing `---` delimiters, unquoted special characters) is a CRITICAL finding.
239
+
240
+ | Field | Type | Constraint |
241
+ |-------|------|------------|
242
+ | `mode` | enum | must be `coverage` or `verification` |
243
+ | `date` | string | non-empty ISO-8601 date (YYYY-MM-DD) |
244
+ | `project` | string | non-empty |
245
+ | `story_count` | integer | >= 0 |
246
+ | `gap_count` | integer | >= 0 |
247
+
248
+ ### Gap Type Enum (Closed)
249
+
250
+ The `gap_type` field on every Gap Table row must match exactly one of these four values. Any other value is a CRITICAL finding. This enum is closed by design — adding a new gap type is a breaking change and requires a schema version bump.
251
+
252
+ - `missing-test`
253
+ - `unexecuted`
254
+ - `uncovered-ac`
255
+ - `missing-edge-case`
256
+
257
+ ### Severity Enum (Closed)
258
+
259
+ The `severity` field on every Gap Table row must match exactly one of these four values. Any other value is a CRITICAL finding.
260
+
261
+ - `critical`
262
+ - `high`
263
+ - `medium`
264
+ - `low`
265
+
266
+ ### Required Sections
267
+
268
+ The output must contain these four top-level sections in this order. Missing any section is a WARNING.
269
+
270
+ 1. `## Executive Summary`
271
+ 2. `## Gap Table`
272
+ 3. `## Per-Story Detail`
273
+ 4. `## Recommendations`
274
+
275
+ ### Gap Table Column Order
276
+
277
+ The Gap Table must declare its columns in this exact order. A table with columns in a different order or with missing columns is a WARNING.
278
+
279
+ 1. `story_key`
280
+ 2. `gap_type`
281
+ 3. `severity`
282
+ 4. `description`
283
+
284
+ ### Cross-Field Consistency
285
+
286
+ - If `gap_count == 0`, the Executive Summary should contain the phrase `No coverage gaps detected` — absence is an INFO finding.
287
+ - `gap_count` should equal the number of data rows in the Gap Table (excluding the header and separator rows) — mismatch is a WARNING.
288
+
289
+ ### Generated vs Executed Tracking (E19-S7, FR-226)
290
+
291
+ Verification-mode outputs must report generated and executed test case
292
+ counts per story and in aggregate. These rules apply only when `mode:
293
+ verification` appears in the frontmatter.
294
+
295
+ - The Executive Summary must contain a `Generated vs Executed` row in the
296
+ format `{total_executed}/{total_generated} ({aggregate_exec_ratio}%)`.
297
+ Missing row is a WARNING.
298
+ - Each Per-Story Detail subsection must declare three fields: `generated`
299
+ (integer >= 0), `executed` (integer >= 0), and `exec_ratio` (percentage
300
+ with one decimal place, e.g., `60.0%`). A missing field on a present
301
+ story subsection is a WARNING.
302
+ - `exec_ratio` must equal `round((executed / generated) * 100, 1)` when
303
+ `generated > 0`. When `generated == 0`, `exec_ratio` must be `0.0%` and
304
+ the subsection should include the note `0/0 (no generated tests)` —
305
+ absence of the note is an INFO finding.
306
+ - Stories with `executed == 0` and `generated > 0` should be flagged as
307
+ HIGH gap priority — absence of the HIGH flag for such stories is a
308
+ WARNING.
309
+ - The aggregate row values must be consistent with the sum of per-story
310
+ counts: `total_generated == sum(story.generated)` and
311
+ `total_executed == sum(story.executed)` — mismatch is a WARNING.
312
+
313
+ **References:** FR-223, FR-226, ADR-030 §10.22, stories E19-S3 and E19-S7, test cases TGA-17–20, TGA-30–32.
314
+ <!-- END SECTION -->
315
+
226
316
  <!-- SECTION: two-pass-logic -->
227
317
  ## Two-Pass Validation Logic
228
318
 
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  template: 'story'
3
- version: 1.2.0
3
+ version: 1.4.0
4
4
  used_by: ['create-story']
5
5
  key: "{story_key}"
6
6
  title: "{story_title}"
@@ -109,12 +109,12 @@ As a {role}, I want to {action}, so that {benefit}.
109
109
 
110
110
  | Review | Status | Report |
111
111
  |--------|--------|--------|
112
- | Code Review | PENDING | — |
113
- | QA Tests | PENDING | — |
114
- | Security Review | PENDING | — |
115
- | Test Automation | PENDING | — |
116
- | Test Review | PENDING | — |
117
- | Performance Review | PENDING | — |
112
+ | Code Review | UNVERIFIED | — |
113
+ | QA Tests | UNVERIFIED | — |
114
+ | Security Review | UNVERIFIED | — |
115
+ | Test Automation | UNVERIFIED | — |
116
+ | Test Review | UNVERIFIED | — |
117
+ | Performance Review | UNVERIFIED | — |
118
118
 
119
119
  > Story moves to `done` only when ALL reviews show PASSED.
120
120