forgecraft-mcp 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +525 -525
- package/dist/artifacts/commit-hooks.d.ts +1 -1
- package/dist/artifacts/commit-hooks.d.ts.map +1 -1
- package/dist/artifacts/commit-hooks.js +2 -0
- package/dist/artifacts/commit-hooks.js.map +1 -1
- package/dist/cli/commands.d.ts +35 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +109 -2
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +51 -44
- package/dist/cli/help.js.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +10 -1
- package/dist/cli.js.map +1 -1
- package/dist/registry/renderer-skeletons.js +92 -92
- package/dist/shared/gs-score-logger.js +6 -6
- package/dist/shared/result-utils.d.ts +27 -0
- package/dist/shared/result-utils.d.ts.map +1 -0
- package/dist/shared/result-utils.js +41 -0
- package/dist/shared/result-utils.js.map +1 -0
- package/dist/tools/add-module.js +123 -123
- package/dist/tools/advice-registry.js +18 -18
- package/dist/tools/check-cascade-report.js +64 -64
- package/dist/tools/close-cycle-helpers.d.ts +21 -2
- package/dist/tools/close-cycle-helpers.d.ts.map +1 -1
- package/dist/tools/close-cycle-helpers.js +66 -10
- package/dist/tools/close-cycle-helpers.js.map +1 -1
- package/dist/tools/close-cycle.d.ts +2 -2
- package/dist/tools/close-cycle.d.ts.map +1 -1
- package/dist/tools/close-cycle.js +1 -1
- package/dist/tools/close-cycle.js.map +1 -1
- package/dist/tools/configure-mcp.d.ts +3 -0
- package/dist/tools/configure-mcp.d.ts.map +1 -1
- package/dist/tools/configure-mcp.js +10 -0
- package/dist/tools/configure-mcp.js.map +1 -1
- package/dist/tools/consolidate-status.d.ts +81 -0
- package/dist/tools/consolidate-status.d.ts.map +1 -0
- package/dist/tools/consolidate-status.js +251 -0
- package/dist/tools/consolidate-status.js.map +1 -0
- package/dist/tools/forgecraft-dispatch.d.ts.map +1 -1
- package/dist/tools/forgecraft-dispatch.js +13 -0
- package/dist/tools/forgecraft-dispatch.js.map +1 -1
- package/dist/tools/forgecraft-router.d.ts +8 -0
- package/dist/tools/forgecraft-router.d.ts.map +1 -1
- package/dist/tools/forgecraft-router.js +21 -1
- package/dist/tools/forgecraft-router.js.map +1 -1
- package/dist/tools/forgecraft-schema-params.d.ts +13 -4
- package/dist/tools/forgecraft-schema-params.d.ts.map +1 -1
- package/dist/tools/forgecraft-schema-params.js +21 -0
- package/dist/tools/forgecraft-schema-params.js.map +1 -1
- package/dist/tools/forgecraft-schema.d.ts +14 -5
- package/dist/tools/forgecraft-schema.d.ts.map +1 -1
- package/dist/tools/forgecraft-schema.js +3 -0
- package/dist/tools/forgecraft-schema.js.map +1 -1
- package/dist/tools/gate-violations.d.ts +59 -0
- package/dist/tools/gate-violations.d.ts.map +1 -0
- package/dist/tools/gate-violations.js +152 -0
- package/dist/tools/gate-violations.js.map +1 -0
- package/dist/tools/generate-session-prompt.d.ts +3 -3
- package/dist/tools/generate-session-prompt.d.ts.map +1 -1
- package/dist/tools/generate-session-prompt.js +57 -15
- package/dist/tools/generate-session-prompt.js.map +1 -1
- package/dist/tools/refresh-output.js +14 -14
- package/dist/tools/roadmap-builder.d.ts.map +1 -1
- package/dist/tools/roadmap-builder.js +19 -9
- package/dist/tools/roadmap-builder.js.map +1 -1
- package/dist/tools/scaffold-spec-stubs.js +115 -115
- package/dist/tools/scaffold-templates.js +62 -62
- package/dist/tools/session-prompt-builders.d.ts.map +1 -1
- package/dist/tools/session-prompt-builders.js +34 -10
- package/dist/tools/session-prompt-builders.js.map +1 -1
- package/dist/tools/setup-artifact-writers.d.ts +30 -0
- package/dist/tools/setup-artifact-writers.d.ts.map +1 -1
- package/dist/tools/setup-artifact-writers.js +120 -8
- package/dist/tools/setup-artifact-writers.js.map +1 -1
- package/dist/tools/setup-phase1.d.ts +3 -0
- package/dist/tools/setup-phase1.d.ts.map +1 -1
- package/dist/tools/setup-phase1.js +79 -35
- package/dist/tools/setup-phase1.js.map +1 -1
- package/dist/tools/setup-phase2.d.ts +2 -0
- package/dist/tools/setup-phase2.d.ts.map +1 -1
- package/dist/tools/setup-phase2.js +10 -1
- package/dist/tools/setup-phase2.js.map +1 -1
- package/dist/tools/setup-project.d.ts +18 -0
- package/dist/tools/setup-project.d.ts.map +1 -1
- package/dist/tools/setup-project.js +77 -1
- package/dist/tools/setup-project.js.map +1 -1
- package/dist/tools/spec-parser-tags.d.ts +9 -0
- package/dist/tools/spec-parser-tags.d.ts.map +1 -1
- package/dist/tools/spec-parser-tags.js +92 -0
- package/dist/tools/spec-parser-tags.js.map +1 -1
- package/package.json +89 -86
- package/templates/analytics/instructions.yaml +37 -37
- package/templates/analytics/mcp-servers.yaml +11 -11
- package/templates/analytics/structure.yaml +25 -25
- package/templates/api/instructions.yaml +231 -231
- package/templates/api/mcp-servers.yaml +22 -13
- package/templates/api/nfr.yaml +23 -23
- package/templates/api/review.yaml +103 -103
- package/templates/api/structure.yaml +34 -34
- package/templates/api/verification.yaml +132 -132
- package/templates/cli/instructions.yaml +31 -31
- package/templates/cli/mcp-servers.yaml +11 -11
- package/templates/cli/review.yaml +53 -53
- package/templates/cli/structure.yaml +16 -16
- package/templates/data-lineage/instructions.yaml +28 -28
- package/templates/data-lineage/mcp-servers.yaml +22 -22
- package/templates/data-pipeline/instructions.yaml +84 -84
- package/templates/data-pipeline/mcp-servers.yaml +13 -13
- package/templates/data-pipeline/nfr.yaml +39 -39
- package/templates/data-pipeline/structure.yaml +23 -23
- package/templates/fintech/hooks.yaml +55 -55
- package/templates/fintech/instructions.yaml +112 -112
- package/templates/fintech/mcp-servers.yaml +13 -13
- package/templates/fintech/nfr.yaml +46 -46
- package/templates/fintech/playbook.yaml +210 -210
- package/templates/fintech/verification.yaml +239 -239
- package/templates/game/instructions.yaml +289 -289
- package/templates/game/mcp-servers.yaml +38 -38
- package/templates/game/nfr.yaml +64 -64
- package/templates/game/playbook.yaml +214 -214
- package/templates/game/review.yaml +97 -97
- package/templates/game/structure.yaml +67 -67
- package/templates/game/verification.yaml +174 -174
- package/templates/healthcare/instructions.yaml +42 -42
- package/templates/healthcare/mcp-servers.yaml +13 -13
- package/templates/healthcare/nfr.yaml +47 -47
- package/templates/hipaa/instructions.yaml +41 -41
- package/templates/hipaa/mcp-servers.yaml +13 -13
- package/templates/infra/instructions.yaml +104 -104
- package/templates/infra/mcp-servers.yaml +20 -20
- package/templates/infra/nfr.yaml +46 -46
- package/templates/infra/review.yaml +65 -65
- package/templates/infra/structure.yaml +25 -25
- package/templates/library/instructions.yaml +36 -36
- package/templates/library/mcp-servers.yaml +20 -20
- package/templates/library/review.yaml +56 -56
- package/templates/library/structure.yaml +19 -19
- package/templates/medallion-architecture/instructions.yaml +41 -41
- package/templates/medallion-architecture/mcp-servers.yaml +22 -22
- package/templates/ml/instructions.yaml +85 -85
- package/templates/ml/mcp-servers.yaml +11 -11
- package/templates/ml/nfr.yaml +39 -39
- package/templates/ml/structure.yaml +25 -25
- package/templates/ml/verification.yaml +156 -156
- package/templates/mobile/instructions.yaml +44 -44
- package/templates/mobile/mcp-servers.yaml +11 -11
- package/templates/mobile/nfr.yaml +49 -49
- package/templates/mobile/structure.yaml +27 -27
- package/templates/mobile/verification.yaml +121 -121
- package/templates/observability-xray/instructions.yaml +40 -40
- package/templates/observability-xray/mcp-servers.yaml +15 -15
- package/templates/realtime/instructions.yaml +42 -42
- package/templates/realtime/mcp-servers.yaml +13 -13
- package/templates/soc2/instructions.yaml +41 -41
- package/templates/soc2/mcp-servers.yaml +24 -24
- package/templates/social/instructions.yaml +43 -43
- package/templates/social/mcp-servers.yaml +24 -24
- package/templates/state-machine/instructions.yaml +42 -42
- package/templates/state-machine/mcp-servers.yaml +11 -11
- package/templates/tools-registry.yaml +164 -164
- package/templates/universal/hooks.yaml +723 -531
- package/templates/universal/instructions.yaml +1692 -1692
- package/templates/universal/mcp-servers.yaml +50 -50
- package/templates/universal/nfr.yaml +197 -197
- package/templates/universal/reference.yaml +326 -326
- package/templates/universal/review.yaml +204 -204
- package/templates/universal/skills.yaml +262 -262
- package/templates/universal/structure.yaml +67 -67
- package/templates/universal/verification.yaml +416 -416
- package/templates/web-react/hooks.yaml +44 -44
- package/templates/web-react/instructions.yaml +207 -207
- package/templates/web-react/mcp-servers.yaml +20 -20
- package/templates/web-react/nfr.yaml +27 -27
- package/templates/web-react/review.yaml +94 -94
- package/templates/web-react/structure.yaml +46 -46
- package/templates/web-react/verification.yaml +126 -126
- package/templates/web-static/instructions.yaml +115 -115
- package/templates/web-static/mcp-servers.yaml +20 -20
- package/templates/web3/instructions.yaml +44 -44
- package/templates/web3/mcp-servers.yaml +11 -11
- package/templates/web3/verification.yaml +159 -159
- package/templates/zero-trust/instructions.yaml +41 -41
- package/templates/zero-trust/mcp-servers.yaml +15 -15
|
@@ -1,416 +1,416 @@
|
|
|
1
|
-
tag: UNIVERSAL
|
|
2
|
-
section: verification
|
|
3
|
-
title: "Contract-First Baseline Verification"
|
|
4
|
-
description: >
|
|
5
|
-
Applicable to every domain. Establishes type contracts, schema validation,
|
|
6
|
-
unit-level assertions, and hardening gates as the completeness floor before
|
|
7
|
-
any domain-specific verification is applied.
|
|
8
|
-
|
|
9
|
-
Phases gate on release_phase: contract-definition + execution + evidence gates
|
|
10
|
-
are always active. pre-release-hardening gates are blocking at pre-release.
|
|
11
|
-
release-candidate gates are blocking at release-candidate.
|
|
12
|
-
deployment-gates and post-deployment gates are blocking at production.
|
|
13
|
-
|
|
14
|
-
S = 0.40 is achievable with the three base phases alone. Hardening phases
|
|
15
|
-
raise the ceiling to 0.90 — domain strategies supply the remaining 0.10.
|
|
16
|
-
cycle_model: |
|
|
17
|
-
ForgeCraft verification maps to the GS paper's 4 loops + 1 independent cycle:
|
|
18
|
-
|
|
19
|
-
LOOP 1 — INITIALIZATION (once per project)
|
|
20
|
-
Runs: spec → architecture → constitution → ADRs → use cases
|
|
21
|
-
Gate: derivability — stateless agent can build system from artifacts alone
|
|
22
|
-
OWASP: N/A (no running service yet)
|
|
23
|
-
|
|
24
|
-
LOOP 2 — INCREMENTAL / SHORT LOOP (per roadmap item, agile-style)
|
|
25
|
-
Runs: partial spec → implement → verify → Status.md → human review
|
|
26
|
-
Gate: tests pass + feature exercised at boundary + cascade complete
|
|
27
|
-
OWASP ASVS Level 1: static checks enforced at every commit (execution.owasp-l1-static)
|
|
28
|
-
Less deterministic — human in the loop guides direction at each iteration
|
|
29
|
-
|
|
30
|
-
LOOP 3 — PRE-RELEASE (before environment promotion)
|
|
31
|
-
Runs: mutation testing + DAST + load test + chaos resilience
|
|
32
|
-
Gate: MSI ≥ 80%, zero HIGH DAST findings, p95 ≤ SLA
|
|
33
|
-
OWASP ASVS Level 2: dynamic analysis, systematic verification (pre-release-hardening phase)
|
|
34
|
-
|
|
35
|
-
LOOP 4 — HOTFIX (emergency patches)
|
|
36
|
-
Runs: minimal fix → smoke tests → ADR + cascade immediately after stabilization
|
|
37
|
-
Gate: smoke tests pass; ADR filed within 24h of stabilization
|
|
38
|
-
OWASP ASVS Level 1 minimum; Level 2 if auth/data path affected
|
|
39
|
-
|
|
40
|
-
LOOP 5 — DEPLOYMENT/HARDENING (independent, longer cadence)
|
|
41
|
-
Runs: post-implementation, on its own schedule (weekly/monthly/per-major)
|
|
42
|
-
Gate: pentest, full mutation, compatibility matrix, accessibility audit
|
|
43
|
-
OWASP ASVS Level 3: penetration testing, BOLA/IDOR, session fixation
|
|
44
|
-
Managed independently from feature loops — see deployment-gates and release-candidate phases
|
|
45
|
-
uncertainty_levels:
|
|
46
|
-
- deterministic
|
|
47
|
-
completeness_ceiling: 0.90
|
|
48
|
-
|
|
49
|
-
phases:
|
|
50
|
-
|
|
51
|
-
- id: contract-definition
|
|
52
|
-
title: "Define Contracts Before Code"
|
|
53
|
-
rationale: >
|
|
54
|
-
Every function, module boundary, and API surface must have a machine-checkable
|
|
55
|
-
contract before implementation begins. This transforms implicit intent into
|
|
56
|
-
explicit invariants the verify loop can check automatically.
|
|
57
|
-
steps:
|
|
58
|
-
- id: define-type-contracts
|
|
59
|
-
instruction: >
|
|
60
|
-
For every public function and class, define TypeScript types or Python type
|
|
61
|
-
annotations for all parameters and return values. No `any`, no untyped
|
|
62
|
-
function signatures. Run `tsc --noEmit` or `mypy` before proceeding.
|
|
63
|
-
contract: >
|
|
64
|
-
Zero type errors reported by the compiler. Every exported function has
|
|
65
|
-
explicit parameter and return types.
|
|
66
|
-
tools: ["tsc --noEmit", "mypy --strict"]
|
|
67
|
-
expected_output: "Exit code 0 from type checker with zero errors"
|
|
68
|
-
pass_criterion: "tsc --noEmit exits 0"
|
|
69
|
-
|
|
70
|
-
- id: define-schema-contracts
|
|
71
|
-
instruction: >
|
|
72
|
-
For every external interface (HTTP request/response, config file, event payload,
|
|
73
|
-
CLI flags), define a Zod schema (TypeScript) or Pydantic model (Python) colocated
|
|
74
|
-
with the module that owns it. The schema must be the single source of truth —
|
|
75
|
-
types must be derived from it, not maintained separately.
|
|
76
|
-
contract: >
|
|
77
|
-
All external interfaces have a named Zod schema or Pydantic model that
|
|
78
|
-
is used at the entry point and is importable by tests.
|
|
79
|
-
tools: ["zod", "pydantic"]
|
|
80
|
-
expected_output: "Schema files present and exported from module barrel"
|
|
81
|
-
pass_criterion: >
|
|
82
|
-
grep -r 'z.object\|z.string\|BaseModel' src/ returns ≥1 result per external interface
|
|
83
|
-
|
|
84
|
-
- id: define-error-contracts
|
|
85
|
-
instruction: >
|
|
86
|
-
For every module, define a custom error class that carries: message, operation name,
|
|
87
|
-
and a timestamp. No bare `throw new Error(...)` in business logic. In tests, assert
|
|
88
|
-
on the specific error class, not just the message string.
|
|
89
|
-
contract: >
|
|
90
|
-
Every module directory has an errors file. No bare `new Error()` in business logic
|
|
91
|
-
files (src/ excluding test files).
|
|
92
|
-
tools: ["grep", "eslint"]
|
|
93
|
-
expected_output: "Custom error classes present; eslint no-throw-generic rule passes"
|
|
94
|
-
pass_criterion: "grep -r 'throw new Error' src --include='*.ts' --exclude='*.test.ts' returns 0 matches"
|
|
95
|
-
|
|
96
|
-
- id: execution
|
|
97
|
-
title: "Run Type Checker + Unit Tests"
|
|
98
|
-
rationale: >
|
|
99
|
-
Contracts defined above are verified by automated tooling. This is the
|
|
100
|
-
deterministic verify loop — no human judgment needed at this phase.
|
|
101
|
-
steps:
|
|
102
|
-
- id: run-type-check
|
|
103
|
-
instruction: >
|
|
104
|
-
Run the type checker in strict mode. Treat warnings as errors.
|
|
105
|
-
All type errors must be resolved before running tests.
|
|
106
|
-
contract: "Zero compiler errors in strict mode"
|
|
107
|
-
tools: ["tsc --noEmit --strict", "mypy --strict"]
|
|
108
|
-
expected_output: "Exit code 0"
|
|
109
|
-
pass_criterion: "tsc --noEmit exits 0"
|
|
110
|
-
|
|
111
|
-
- id: run-unit-tests
|
|
112
|
-
instruction: >
|
|
113
|
-
Run the full test suite. All tests must pass. Coverage must meet
|
|
114
|
-
the project's coverage gate (≥80% line coverage overall, ≥90% on changed files).
|
|
115
|
-
contract: "All tests pass; coverage gate met"
|
|
116
|
-
tools: ["jest --runInBand --coverage", "pytest --cov"]
|
|
117
|
-
expected_output: "Test results JSON + coverage report"
|
|
118
|
-
pass_criterion: "Exit code 0; coverage ≥ 80% overall"
|
|
119
|
-
|
|
120
|
-
- id: owasp-l1-static
|
|
121
|
-
instruction: >
|
|
122
|
-
Run static security analysis as part of the development loop.
|
|
123
|
-
OWASP ASVS Level 1 checks that run at commit time:
|
|
124
|
-
(1) Secrets detection — no credentials, tokens, or keys in source (pre-commit-secrets hook)
|
|
125
|
-
(2) Dependency audit — no HIGH/CRITICAL CVEs in direct dependencies (audit command from forgecraft.yaml tools.audit)
|
|
126
|
-
(3) Injection pattern scan — grep for string-concatenated SQL, shell exec with user input, eval() patterns
|
|
127
|
-
(4) Hardcoded config scan — no URLs, IPs, ports, credentials in non-config files
|
|
128
|
-
These run automatically via pre-commit hooks (see .claude/hooks/). No manual action required
|
|
129
|
-
unless a finding is reported.
|
|
130
|
-
contract: >
|
|
131
|
-
Zero secrets detected. Zero HIGH/CRITICAL CVEs in dependencies.
|
|
132
|
-
Zero string-concatenated SQL queries. Zero eval() with dynamic input.
|
|
133
|
-
tools: ["pre-commit-secrets.sh", "forgecraft.yaml tools.audit", "pre-commit-prod-quality.sh"]
|
|
134
|
-
expected_output: "Hook output: PASS or list of violations with file:line"
|
|
135
|
-
pass_criterion: "All pre-commit hooks exit 0 on staged files"
|
|
136
|
-
owasp_asvs_level: 1
|
|
137
|
-
|
|
138
|
-
- id: evidence
|
|
139
|
-
title: "Persist and Interpret Results"
|
|
140
|
-
rationale: >
|
|
141
|
-
Verification evidence must be persisted as artifacts so the verify loop
|
|
142
|
-
can compare pass/fail state across iterations. Without persisted evidence,
|
|
143
|
-
regressions go undetected.
|
|
144
|
-
steps:
|
|
145
|
-
- id: persist-coverage-report
|
|
146
|
-
instruction: >
|
|
147
|
-
Save the coverage report to coverage/ in the project root. If CI is
|
|
148
|
-
configured, upload as a build artifact. Record the overall line coverage
|
|
149
|
-
percentage in the session log.
|
|
150
|
-
contract: "coverage/lcov.info or coverage/coverage.json exists after test run"
|
|
151
|
-
tools: ["jest --coverage", "pytest-cov", "lcov"]
|
|
152
|
-
expected_output: "coverage/ directory with lcov.info or JSON summary"
|
|
153
|
-
pass_criterion: "coverage/ directory non-empty after test run"
|
|
154
|
-
|
|
155
|
-
- id: record-type-error-count
|
|
156
|
-
instruction: >
|
|
157
|
-
Record the number of type errors before and after each verify loop pass.
|
|
158
|
-
If the count does not decrease across passes, the fix prompt is not effective —
|
|
159
|
-
escalate to human review.
|
|
160
|
-
contract: "Type error count is 0 at end of final pass"
|
|
161
|
-
tools: ["tsc --noEmit 2>&1 | grep 'error TS'"]
|
|
162
|
-
expected_output: "Integer error count per pass"
|
|
163
|
-
pass_criterion: "Error count = 0"
|
|
164
|
-
|
|
165
|
-
- id: pre-release-hardening
|
|
166
|
-
title: "Pre-Release Hardening Gates"
|
|
167
|
-
rationale: >
|
|
168
|
-
Before promoting to pre-release (beta/RC-candidate), the implementation
|
|
169
|
-
must survive adversarial conditions it will encounter in production:
|
|
170
|
-
mutant code that slips through tests, external attack vectors, load spikes,
|
|
171
|
-
and infrastructure failures. These gates are advisory during development
|
|
172
|
-
but blocking at release_phase = pre-release.
|
|
173
|
-
release_phase_gate: pre-release
|
|
174
|
-
steps:
|
|
175
|
-
- id: mutation-testing
|
|
176
|
-
instruction: >
|
|
177
|
-
Run mutation testing on changed files using Stryker (JS/TS) or mutmut (Python).
|
|
178
|
-
Mutation score on changed files must be ≥ 80%. A low mutation score means
|
|
179
|
-
tests pass but do not actually catch logic errors — the test suite is checking
|
|
180
|
-
the wrong things.
|
|
181
|
-
Configuration: stryker.config.json or .stryker.conf.json at project root.
|
|
182
|
-
Target only files modified in this release branch to keep runtime reasonable.
|
|
183
|
-
If forgecraft.yaml has tools.mutation configured, run that command directly.
|
|
184
|
-
The pre-commit hook will check mutation score if the tool is configured.
|
|
185
|
-
contract: >
|
|
186
|
-
Mutation score ≥ 80% on changed files. Stryker/mutmut exits with surviving-mutant
|
|
187
|
-
count below the 20% threshold for changed lines.
|
|
188
|
-
tools: ["stryker run", "mutmut run && mutmut results"]
|
|
189
|
-
expected_output: "Stryker HTML report or mutmut results table with mutation score"
|
|
190
|
-
pass_criterion: "Mutation score on changed files ≥ 80%"
|
|
191
|
-
requires_human_review: true
|
|
192
|
-
|
|
193
|
-
- id: dast-scan
|
|
194
|
-
instruction: >
|
|
195
|
-
Run OWASP ZAP dynamic analysis against a running instance of the service.
|
|
196
|
-
Use the baseline scan for APIs: `docker run -t owasp/zap2docker-stable
|
|
197
|
-
zap-api-scan.py -t <openapi-spec-url> -f openapi`.
|
|
198
|
-
All HIGH severity findings must be resolved before pre-release.
|
|
199
|
-
MEDIUM findings must be triaged (accepted with documented rationale or fixed).
|
|
200
|
-
contract: >
|
|
201
|
-
Zero HIGH severity findings from ZAP API scan.
|
|
202
|
-
Every MEDIUM finding has a documented disposition (fixed or accepted with ADR).
|
|
203
|
-
tools: ["owasp/zap2docker-stable", "zap-api-scan.py", "zap-baseline.py"]
|
|
204
|
-
expected_output: "ZAP HTML or JSON report with findings by severity"
|
|
205
|
-
pass_criterion: "Zero HIGH findings; all MEDIUM findings triaged"
|
|
206
|
-
requires_human_review: true
|
|
207
|
-
owasp_asvs_level: 2
|
|
208
|
-
|
|
209
|
-
- id: load-test
|
|
210
|
-
instruction: >
|
|
211
|
-
Run a load test at 2× peak expected traffic using k6, Locust, or Artillery.
|
|
212
|
-
Peak is defined as the highest observed or projected request rate in the
|
|
213
|
-
production traffic model. Test duration: minimum 10 minutes.
|
|
214
|
-
Accept criteria: p95 latency ≤ SLA threshold; error rate < 1%.
|
|
215
|
-
contract: >
|
|
216
|
-
Under 2× peak load: p95 response time ≤ SLA; error rate < 1%;
|
|
217
|
-
no memory leak detected (heap stable across test duration).
|
|
218
|
-
tools: ["k6 run", "locust", "artillery run"]
|
|
219
|
-
expected_output: "k6/Locust/Artillery HTML or JSON summary with p50/p95/p99 and error rate"
|
|
220
|
-
pass_criterion: "p95 ≤ SLA; error rate < 1% sustained over test duration"
|
|
221
|
-
requires_human_review: true
|
|
222
|
-
|
|
223
|
-
- id: chaos-resilience
|
|
224
|
-
instruction: >
|
|
225
|
-
Inject one network failure and one dependency failure using Toxiproxy,
|
|
226
|
-
Chaos Monkey, or AWS Fault Injection Simulator.
|
|
227
|
-
The service must degrade gracefully: return 503 with Retry-After header
|
|
228
|
-
(not 500 or silent hang) when a downstream is unavailable.
|
|
229
|
-
Recovery time from injected failure must be ≤ 30 seconds.
|
|
230
|
-
contract: >
|
|
231
|
-
Service returns structured error response (not 500) when dependency fails.
|
|
232
|
-
Recovers automatically within 30 seconds of dependency restoration.
|
|
233
|
-
No data corruption or stuck transactions during failure window.
|
|
234
|
-
tools: ["toxiproxy-cli", "chaos-monkey", "aws fis"]
|
|
235
|
-
expected_output: "Chaos test report showing failure injection + recovery timeline"
|
|
236
|
-
pass_criterion: "Graceful degradation confirmed; recovery time ≤ 30s"
|
|
237
|
-
requires_human_review: true
|
|
238
|
-
|
|
239
|
-
- id: release-candidate
|
|
240
|
-
title: "Release Candidate Gates"
|
|
241
|
-
rationale: >
|
|
242
|
-
The release candidate is the last checkpoint before production. It must pass
|
|
243
|
-
security penetration testing, accessibility audit (if UI), full compatibility
|
|
244
|
-
matrix validation, and mutation testing at the overall (not just changed-files)
|
|
245
|
-
threshold. Human sign-off is required on all findings.
|
|
246
|
-
release_phase_gate: release-candidate
|
|
247
|
-
steps:
|
|
248
|
-
- id: penetration-testing
|
|
249
|
-
instruction: >
|
|
250
|
-
Perform OWASP Top 10 penetration testing. At minimum cover:
|
|
251
|
-
injection (SQL, command, LDAP), broken authentication (session fixation,
|
|
252
|
-
token leakage, brute-force), sensitive data exposure (TLS config, response
|
|
253
|
-
headers, error messages), broken access control (BOLA/IDOR, privilege
|
|
254
|
-
escalation, JWT algorithm confusion), security misconfiguration (default
|
|
255
|
-
creds, verbose errors), and XXE/XSS if a UI is present.
|
|
256
|
-
Use OWASP ZAP active scan + manual testing for BOLA/IDOR.
|
|
257
|
-
contract: >
|
|
258
|
-
Zero Critical or High CVSS ≥ 7.0 findings unresolved.
|
|
259
|
-
All Medium findings (CVSS 4.0–6.9) have documented disposition.
|
|
260
|
-
OWASP Top 10 categories fully covered in test plan.
|
|
261
|
-
tools: ["owasp/zap2docker-stable", "burpsuite", "sqlmap", "nikto"]
|
|
262
|
-
expected_output: "Penetration test report with CVSS scores, reproduction steps, and remediation status"
|
|
263
|
-
pass_criterion: "Zero unresolved High/Critical; all Medium triaged with ADR or fix"
|
|
264
|
-
requires_human_review: true
|
|
265
|
-
owasp_asvs_level: 3
|
|
266
|
-
|
|
267
|
-
- id: mutation-testing-full
|
|
268
|
-
instruction: >
|
|
269
|
-
Run mutation testing against the full codebase (not just changed files).
|
|
270
|
-
Overall mutation score must be ≥ 80%. This validates that the entire
|
|
271
|
-
test suite is specification-grade, not just the code touched in this release.
|
|
272
|
-
contract: "Overall mutation score ≥ 80% across full codebase"
|
|
273
|
-
tools: ["stryker run --all-files", "mutmut run"]
|
|
274
|
-
expected_output: "Full mutation testing report with per-module scores"
|
|
275
|
-
pass_criterion: "Overall mutation score ≥ 80%"
|
|
276
|
-
requires_human_review: false
|
|
277
|
-
|
|
278
|
-
- id: compatibility-matrix
|
|
279
|
-
instruction: >
|
|
280
|
-
Validate the software against all supported runtime versions specified in
|
|
281
|
-
package.json engines field (Node.js), pyproject.toml (Python), or equivalent.
|
|
282
|
-
Run the full test suite against each supported major version.
|
|
283
|
-
If a database is used, validate against all supported DB major versions.
|
|
284
|
-
contract: >
|
|
285
|
-
Full test suite passes on every supported runtime version in the
|
|
286
|
-
declared compatibility matrix. Zero version-specific failures unreported.
|
|
287
|
-
tools: ["nvm use", "pyenv", "matrix CI strategy"]
|
|
288
|
-
expected_output: "CI matrix run report showing pass/fail per runtime version"
|
|
289
|
-
pass_criterion: "All tests pass on all declared runtime versions"
|
|
290
|
-
requires_human_review: false
|
|
291
|
-
|
|
292
|
-
- id: accessibility-audit
|
|
293
|
-
instruction: >
|
|
294
|
-
If the project has a user interface: run axe-core or Lighthouse accessibility
|
|
295
|
-
audit against all primary user journeys. WCAG 2.1 AA compliance required.
|
|
296
|
-
If no UI, mark this step as N/A with justification.
|
|
297
|
-
contract: "Zero WCAG 2.1 AA violations on primary user journeys, or step marked N/A with justification"
|
|
298
|
-
tools: ["axe-core", "lighthouse --only-categories=accessibility", "pa11y"]
|
|
299
|
-
expected_output: "axe/Lighthouse accessibility report or N/A justification"
|
|
300
|
-
pass_criterion: "Zero critical/serious axe violations; Lighthouse accessibility score ≥ 90"
|
|
301
|
-
requires_human_review: true
|
|
302
|
-
|
|
303
|
-
- id: deployment-gates
|
|
304
|
-
title: "Deployment Readiness Gates"
|
|
305
|
-
rationale: >
|
|
306
|
-
Before production deployment, the operational configuration must be validated:
|
|
307
|
-
canary or rolling deploy is configured, rollback threshold is defined and
|
|
308
|
-
tested, smoke tests cover the critical path, and observability is confirmed
|
|
309
|
-
active. These gates prevent deploying code that cannot be safely rolled back.
|
|
310
|
-
release_phase_gate: production
|
|
311
|
-
steps:
|
|
312
|
-
- id: canary-config
|
|
313
|
-
instruction: >
|
|
314
|
-
Confirm deployment strategy is configured (canary, blue-green, or rolling).
|
|
315
|
-
For canary: define the traffic split percentage and success threshold before
|
|
316
|
-
promoting to 100%. For rolling: define the max-surge and max-unavailable
|
|
317
|
-
settings. Document the rollback trigger condition (error rate threshold,
|
|
318
|
-
latency threshold, or health check failure count).
|
|
319
|
-
contract: >
|
|
320
|
-
Deployment strategy config present (k8s deployment, railway.json,
|
|
321
|
-
fly.toml, ECS service, or equivalent). Rollback trigger threshold
|
|
322
|
-
documented in runbook or infra config.
|
|
323
|
-
tools: ["kubectl", "fly", "railway", "aws ecs"]
|
|
324
|
-
expected_output: "Deployment config file + documented rollback threshold"
|
|
325
|
-
pass_criterion: "Deploy config present; rollback threshold defined and reachable in runbook"
|
|
326
|
-
requires_human_review: true
|
|
327
|
-
|
|
328
|
-
- id: smoke-tests
|
|
329
|
-
instruction: >
|
|
330
|
-
Define and run smoke tests covering the top 3–5 critical user journeys.
|
|
331
|
-
Smoke tests must be runnable against a live environment in < 60 seconds.
|
|
332
|
-
For web UIs: use Playwright — navigate to the live URL, wait for async
|
|
333
|
-
data to load, take a full-page screenshot, assert critical elements are
|
|
334
|
-
present and non-empty. Playwright catches rendering bugs, broken async
|
|
335
|
-
fetches, blank pages, and layout collapses that API-only tests miss entirely.
|
|
336
|
-
For APIs: use Hurl or Newman for machine-readable output.
|
|
337
|
-
These same tests run automatically post-deploy.
|
|
338
|
-
contract: >
|
|
339
|
-
Smoke test suite exists, runs in < 60 seconds, covers the critical path.
|
|
340
|
-
Web UIs: Playwright script with element assertions + screenshot artifact.
|
|
341
|
-
APIs: Hurl/Newman with status code + response schema assertions.
|
|
342
|
-
All smoke tests pass against staging before deploy to production.
|
|
343
|
-
tools: ["playwright", "newman run", "hurl", "k6 run --vus 1"]
|
|
344
|
-
expected_output: "Smoke test results with pass/fail per check, screenshot for web UIs, total duration"
|
|
345
|
-
pass_criterion: "All checks pass; screenshot confirms page renders correctly; total duration < 60s"
|
|
346
|
-
requires_human_review: false
|
|
347
|
-
|
|
348
|
-
- id: observability-verified
|
|
349
|
-
instruction: >
|
|
350
|
-
Confirm that structured logging, metrics, and alerting are active in the
|
|
351
|
-
target environment. Required: log aggregation (Datadog, Splunk, CloudWatch,
|
|
352
|
-
or equivalent), error rate alert (fires within 5 minutes of >1% error rate),
|
|
353
|
-
and latency alert (fires within 5 minutes of p95 > SLA). Run a synthetic
|
|
354
|
-
error and confirm the alert fires.
|
|
355
|
-
contract: >
|
|
356
|
-
Log aggregation active; error rate alert configured and tested;
|
|
357
|
-
latency alert configured and tested. Alert runbook linked in deploy notes.
|
|
358
|
-
tools: ["datadog", "cloudwatch", "grafana", "pagerduty"]
|
|
359
|
-
expected_output: "Screenshot or API response showing alerts active + test alert fired"
|
|
360
|
-
pass_criterion: "Synthetic error triggers alert within 5 minutes"
|
|
361
|
-
requires_human_review: true
|
|
362
|
-
|
|
363
|
-
- id: post-deployment
|
|
364
|
-
title: "Post-Deployment Health Verification"
|
|
365
|
-
rationale: >
|
|
366
|
-
After production deployment, synthetic probes and observability must confirm
|
|
367
|
-
that the new version is serving traffic correctly. The deployment is not
|
|
368
|
-
complete until post-deployment verification passes. If it fails, rollback
|
|
369
|
-
is the default response — not patching in production.
|
|
370
|
-
release_phase_gate: production
|
|
371
|
-
steps:
|
|
372
|
-
- id: synthetic-health-probes
|
|
373
|
-
instruction: >
|
|
374
|
-
Run the smoke test suite against the production environment within 5 minutes
|
|
375
|
-
of deploy completion. For web UIs: Playwright navigates to the live URL,
|
|
376
|
-
waits for async data, asserts critical elements, and saves a screenshot.
|
|
377
|
-
The screenshot is the proof artifact — view it after every deploy.
|
|
378
|
-
If any check fails, trigger rollback immediately.
|
|
379
|
-
contract: >
|
|
380
|
-
All smoke tests pass against production within 5 minutes of deployment.
|
|
381
|
-
Web UIs: screenshot artifact saved and reviewed. API: response schema valid.
|
|
382
|
-
Rollback is initiated automatically or within 10 minutes if any smoke test fails.
|
|
383
|
-
tools: ["newman run", "hurl", "k6 run --vus 1"]
|
|
384
|
-
expected_output: "Post-deploy smoke test results with timestamp"
|
|
385
|
-
pass_criterion: "All smoke tests pass against production; timestamp within 5 min of deploy"
|
|
386
|
-
requires_human_review: false
|
|
387
|
-
|
|
388
|
-
- id: error-rate-monitoring
|
|
389
|
-
instruction: >
|
|
390
|
-
Monitor the error rate for 30 minutes after deployment. Error rate must stay
|
|
391
|
-
below the rollback threshold. Compare p95 latency against pre-deploy baseline
|
|
392
|
-
(from load test results). If error rate spikes above threshold or latency
|
|
393
|
-
degrades > 20% vs baseline, initiate rollback.
|
|
394
|
-
contract: >
|
|
395
|
-
Error rate stays below rollback threshold for 30 minutes post-deploy.
|
|
396
|
-
p95 latency does not degrade > 20% vs pre-deploy baseline.
|
|
397
|
-
tools: ["datadog", "cloudwatch", "grafana"]
|
|
398
|
-
expected_output: "30-minute error rate and latency time series with baseline comparison"
|
|
399
|
-
pass_criterion: "Error rate below threshold; latency within 20% of baseline for 30 min"
|
|
400
|
-
requires_human_review: true
|
|
401
|
-
|
|
402
|
-
- id: incident-runbook-verified
|
|
403
|
-
instruction: >
|
|
404
|
-
Confirm that the incident runbook for this service is current and accessible.
|
|
405
|
-
The runbook must cover: how to roll back this version, how to escalate,
|
|
406
|
-
how to check the canary status, and how to interpret the health dashboard.
|
|
407
|
-
Have one team member who was not involved in the deployment review and
|
|
408
|
-
confirm the runbook is complete.
|
|
409
|
-
contract: >
|
|
410
|
-
Incident runbook exists at docs/runbooks/<service>.md or equivalent.
|
|
411
|
-
Runbook covers rollback, escalation, canary status, and dashboard links.
|
|
412
|
-
Independent review confirmed by a second team member.
|
|
413
|
-
tools: ["docs/runbooks/"]
|
|
414
|
-
expected_output: "Runbook link + peer review confirmation"
|
|
415
|
-
pass_criterion: "Runbook current; independent reviewer confirms completeness"
|
|
416
|
-
requires_human_review: true
|
|
1
|
+
tag: UNIVERSAL
|
|
2
|
+
section: verification
|
|
3
|
+
title: "Contract-First Baseline Verification"
|
|
4
|
+
description: >
|
|
5
|
+
Applicable to every domain. Establishes type contracts, schema validation,
|
|
6
|
+
unit-level assertions, and hardening gates as the completeness floor before
|
|
7
|
+
any domain-specific verification is applied.
|
|
8
|
+
|
|
9
|
+
Phases gate on release_phase: contract-definition + execution + evidence gates
|
|
10
|
+
are always active. pre-release-hardening gates are blocking at pre-release.
|
|
11
|
+
release-candidate gates are blocking at release-candidate.
|
|
12
|
+
deployment-gates and post-deployment gates are blocking at production.
|
|
13
|
+
|
|
14
|
+
S = 0.40 is achievable with the three base phases alone. Hardening phases
|
|
15
|
+
raise the ceiling to 0.90 — domain strategies supply the remaining 0.10.
|
|
16
|
+
cycle_model: |
|
|
17
|
+
ForgeCraft verification maps to the GS paper's 4 loops + 1 independent cycle:
|
|
18
|
+
|
|
19
|
+
LOOP 1 — INITIALIZATION (once per project)
|
|
20
|
+
Runs: spec → architecture → constitution → ADRs → use cases
|
|
21
|
+
Gate: derivability — stateless agent can build system from artifacts alone
|
|
22
|
+
OWASP: N/A (no running service yet)
|
|
23
|
+
|
|
24
|
+
LOOP 2 — INCREMENTAL / SHORT LOOP (per roadmap item, agile-style)
|
|
25
|
+
Runs: partial spec → implement → verify → Status.md → human review
|
|
26
|
+
Gate: tests pass + feature exercised at boundary + cascade complete
|
|
27
|
+
OWASP ASVS Level 1: static checks enforced at every commit (execution.owasp-l1-static)
|
|
28
|
+
Less deterministic — human in the loop guides direction at each iteration
|
|
29
|
+
|
|
30
|
+
LOOP 3 — PRE-RELEASE (before environment promotion)
|
|
31
|
+
Runs: mutation testing + DAST + load test + chaos resilience
|
|
32
|
+
Gate: MSI ≥ 80%, zero HIGH DAST findings, p95 ≤ SLA
|
|
33
|
+
OWASP ASVS Level 2: dynamic analysis, systematic verification (pre-release-hardening phase)
|
|
34
|
+
|
|
35
|
+
LOOP 4 — HOTFIX (emergency patches)
|
|
36
|
+
Runs: minimal fix → smoke tests → ADR + cascade immediately after stabilization
|
|
37
|
+
Gate: smoke tests pass; ADR filed within 24h of stabilization
|
|
38
|
+
OWASP ASVS Level 1 minimum; Level 2 if auth/data path affected
|
|
39
|
+
|
|
40
|
+
LOOP 5 — DEPLOYMENT/HARDENING (independent, longer cadence)
|
|
41
|
+
Runs: post-implementation, on its own schedule (weekly/monthly/per-major)
|
|
42
|
+
Gate: pentest, full mutation, compatibility matrix, accessibility audit
|
|
43
|
+
OWASP ASVS Level 3: penetration testing, BOLA/IDOR, session fixation
|
|
44
|
+
Managed independently from feature loops — see deployment-gates and release-candidate phases
|
|
45
|
+
uncertainty_levels:
|
|
46
|
+
- deterministic
|
|
47
|
+
completeness_ceiling: 0.90
|
|
48
|
+
|
|
49
|
+
phases:
|
|
50
|
+
|
|
51
|
+
- id: contract-definition
|
|
52
|
+
title: "Define Contracts Before Code"
|
|
53
|
+
rationale: >
|
|
54
|
+
Every function, module boundary, and API surface must have a machine-checkable
|
|
55
|
+
contract before implementation begins. This transforms implicit intent into
|
|
56
|
+
explicit invariants the verify loop can check automatically.
|
|
57
|
+
steps:
|
|
58
|
+
- id: define-type-contracts
|
|
59
|
+
instruction: >
|
|
60
|
+
For every public function and class, define TypeScript types or Python type
|
|
61
|
+
annotations for all parameters and return values. No `any`, no untyped
|
|
62
|
+
function signatures. Run `tsc --noEmit` or `mypy` before proceeding.
|
|
63
|
+
contract: >
|
|
64
|
+
Zero type errors reported by the compiler. Every exported function has
|
|
65
|
+
explicit parameter and return types.
|
|
66
|
+
tools: ["tsc --noEmit", "mypy --strict"]
|
|
67
|
+
expected_output: "Exit code 0 from type checker with zero errors"
|
|
68
|
+
pass_criterion: "tsc --noEmit exits 0"
|
|
69
|
+
|
|
70
|
+
- id: define-schema-contracts
|
|
71
|
+
instruction: >
|
|
72
|
+
For every external interface (HTTP request/response, config file, event payload,
|
|
73
|
+
CLI flags), define a Zod schema (TypeScript) or Pydantic model (Python) colocated
|
|
74
|
+
with the module that owns it. The schema must be the single source of truth —
|
|
75
|
+
types must be derived from it, not maintained separately.
|
|
76
|
+
contract: >
|
|
77
|
+
All external interfaces have a named Zod schema or Pydantic model that
|
|
78
|
+
is used at the entry point and is importable by tests.
|
|
79
|
+
tools: ["zod", "pydantic"]
|
|
80
|
+
expected_output: "Schema files present and exported from module barrel"
|
|
81
|
+
pass_criterion: >
|
|
82
|
+
grep -r 'z.object\|z.string\|BaseModel' src/ returns ≥1 result per external interface
|
|
83
|
+
|
|
84
|
+
- id: define-error-contracts
|
|
85
|
+
instruction: >
|
|
86
|
+
For every module, define a custom error class that carries: message, operation name,
|
|
87
|
+
and a timestamp. No bare `throw new Error(...)` in business logic. In tests, assert
|
|
88
|
+
on the specific error class, not just the message string.
|
|
89
|
+
contract: >
|
|
90
|
+
Every module directory has an errors file. No bare `new Error()` in business logic
|
|
91
|
+
files (src/ excluding test files).
|
|
92
|
+
tools: ["grep", "eslint"]
|
|
93
|
+
expected_output: "Custom error classes present; eslint no-throw-generic rule passes"
|
|
94
|
+
pass_criterion: "grep -r 'throw new Error' src --include='*.ts' --exclude='*.test.ts' returns 0 matches"
|
|
95
|
+
|
|
96
|
+
- id: execution
|
|
97
|
+
title: "Run Type Checker + Unit Tests"
|
|
98
|
+
rationale: >
|
|
99
|
+
Contracts defined above are verified by automated tooling. This is the
|
|
100
|
+
deterministic verify loop — no human judgment needed at this phase.
|
|
101
|
+
steps:
|
|
102
|
+
- id: run-type-check
|
|
103
|
+
instruction: >
|
|
104
|
+
Run the type checker in strict mode. Treat warnings as errors.
|
|
105
|
+
All type errors must be resolved before running tests.
|
|
106
|
+
contract: "Zero compiler errors in strict mode"
|
|
107
|
+
tools: ["tsc --noEmit --strict", "mypy --strict"]
|
|
108
|
+
expected_output: "Exit code 0"
|
|
109
|
+
pass_criterion: "tsc --noEmit exits 0"
|
|
110
|
+
|
|
111
|
+
- id: run-unit-tests
|
|
112
|
+
instruction: >
|
|
113
|
+
Run the full test suite. All tests must pass. Coverage must meet
|
|
114
|
+
the project's coverage gate (≥80% line coverage overall, ≥90% on changed files).
|
|
115
|
+
contract: "All tests pass; coverage gate met"
|
|
116
|
+
tools: ["jest --runInBand --coverage", "pytest --cov"]
|
|
117
|
+
expected_output: "Test results JSON + coverage report"
|
|
118
|
+
pass_criterion: "Exit code 0; coverage ≥ 80% overall"
|
|
119
|
+
|
|
120
|
+
- id: owasp-l1-static
|
|
121
|
+
instruction: >
|
|
122
|
+
Run static security analysis as part of the development loop.
|
|
123
|
+
OWASP ASVS Level 1 checks that run at commit time:
|
|
124
|
+
(1) Secrets detection — no credentials, tokens, or keys in source (pre-commit-secrets hook)
|
|
125
|
+
(2) Dependency audit — no HIGH/CRITICAL CVEs in direct dependencies (audit command from forgecraft.yaml tools.audit)
|
|
126
|
+
(3) Injection pattern scan — grep for string-concatenated SQL, shell exec with user input, eval() patterns
|
|
127
|
+
(4) Hardcoded config scan — no URLs, IPs, ports, credentials in non-config files
|
|
128
|
+
These run automatically via pre-commit hooks (see .claude/hooks/). No manual action required
|
|
129
|
+
unless a finding is reported.
|
|
130
|
+
contract: >
|
|
131
|
+
Zero secrets detected. Zero HIGH/CRITICAL CVEs in dependencies.
|
|
132
|
+
Zero string-concatenated SQL queries. Zero eval() with dynamic input.
|
|
133
|
+
tools: ["pre-commit-secrets.sh", "forgecraft.yaml tools.audit", "pre-commit-prod-quality.sh"]
|
|
134
|
+
expected_output: "Hook output: PASS or list of violations with file:line"
|
|
135
|
+
pass_criterion: "All pre-commit hooks exit 0 on staged files"
|
|
136
|
+
owasp_asvs_level: 1
|
|
137
|
+
|
|
138
|
+
- id: evidence
|
|
139
|
+
title: "Persist and Interpret Results"
|
|
140
|
+
rationale: >
|
|
141
|
+
Verification evidence must be persisted as artifacts so the verify loop
|
|
142
|
+
can compare pass/fail state across iterations. Without persisted evidence,
|
|
143
|
+
regressions go undetected.
|
|
144
|
+
steps:
|
|
145
|
+
- id: persist-coverage-report
|
|
146
|
+
instruction: >
|
|
147
|
+
Save the coverage report to coverage/ in the project root. If CI is
|
|
148
|
+
configured, upload as a build artifact. Record the overall line coverage
|
|
149
|
+
percentage in the session log.
|
|
150
|
+
contract: "coverage/lcov.info or coverage/coverage.json exists after test run"
|
|
151
|
+
tools: ["jest --coverage", "pytest-cov", "lcov"]
|
|
152
|
+
expected_output: "coverage/ directory with lcov.info or JSON summary"
|
|
153
|
+
pass_criterion: "coverage/ directory non-empty after test run"
|
|
154
|
+
|
|
155
|
+
- id: record-type-error-count
|
|
156
|
+
instruction: >
|
|
157
|
+
Record the number of type errors before and after each verify loop pass.
|
|
158
|
+
If the count does not decrease across passes, the fix prompt is not effective —
|
|
159
|
+
escalate to human review.
|
|
160
|
+
contract: "Type error count is 0 at end of final pass"
|
|
161
|
+
tools: ["tsc --noEmit 2>&1 | grep 'error TS'"]
|
|
162
|
+
expected_output: "Integer error count per pass"
|
|
163
|
+
pass_criterion: "Error count = 0"
|
|
164
|
+
|
|
165
|
+
- id: pre-release-hardening
|
|
166
|
+
title: "Pre-Release Hardening Gates"
|
|
167
|
+
rationale: >
|
|
168
|
+
Before promoting to pre-release (beta/RC-candidate), the implementation
|
|
169
|
+
must survive adversarial conditions it will encounter in production:
|
|
170
|
+
mutant code that slips through tests, external attack vectors, load spikes,
|
|
171
|
+
and infrastructure failures. These gates are advisory during development
|
|
172
|
+
but blocking at release_phase = pre-release.
|
|
173
|
+
release_phase_gate: pre-release
|
|
174
|
+
steps:
|
|
175
|
+
- id: mutation-testing
|
|
176
|
+
instruction: >
|
|
177
|
+
Run mutation testing on changed files using Stryker (JS/TS) or mutmut (Python).
|
|
178
|
+
Mutation score on changed files must be ≥ 80%. A low mutation score means
|
|
179
|
+
tests pass but do not actually catch logic errors — the test suite is checking
|
|
180
|
+
the wrong things.
|
|
181
|
+
Configuration: stryker.config.json or .stryker.conf.json at project root.
|
|
182
|
+
Target only files modified in this release branch to keep runtime reasonable.
|
|
183
|
+
If forgecraft.yaml has tools.mutation configured, run that command directly.
|
|
184
|
+
The pre-commit hook will check mutation score if the tool is configured.
|
|
185
|
+
contract: >
|
|
186
|
+
Mutation score ≥ 80% on changed files. Stryker/mutmut exits with surviving-mutant
|
|
187
|
+
count below the 20% threshold for changed lines.
|
|
188
|
+
tools: ["stryker run", "mutmut run && mutmut results"]
|
|
189
|
+
expected_output: "Stryker HTML report or mutmut results table with mutation score"
|
|
190
|
+
pass_criterion: "Mutation score on changed files ≥ 80%"
|
|
191
|
+
requires_human_review: true
|
|
192
|
+
|
|
193
|
+
- id: dast-scan
|
|
194
|
+
instruction: >
|
|
195
|
+
Run OWASP ZAP dynamic analysis against a running instance of the service.
|
|
196
|
+
Use the baseline scan for APIs: `docker run -t owasp/zap2docker-stable
|
|
197
|
+
zap-api-scan.py -t <openapi-spec-url> -f openapi`.
|
|
198
|
+
All HIGH severity findings must be resolved before pre-release.
|
|
199
|
+
MEDIUM findings must be triaged (accepted with documented rationale or fixed).
|
|
200
|
+
contract: >
|
|
201
|
+
Zero HIGH severity findings from ZAP API scan.
|
|
202
|
+
Every MEDIUM finding has a documented disposition (fixed or accepted with ADR).
|
|
203
|
+
tools: ["owasp/zap2docker-stable", "zap-api-scan.py", "zap-baseline.py"]
|
|
204
|
+
expected_output: "ZAP HTML or JSON report with findings by severity"
|
|
205
|
+
pass_criterion: "Zero HIGH findings; all MEDIUM findings triaged"
|
|
206
|
+
requires_human_review: true
|
|
207
|
+
owasp_asvs_level: 2
|
|
208
|
+
|
|
209
|
+
- id: load-test
|
|
210
|
+
instruction: >
|
|
211
|
+
Run a load test at 2× peak expected traffic using k6, Locust, or Artillery.
|
|
212
|
+
Peak is defined as the highest observed or projected request rate in the
|
|
213
|
+
production traffic model. Test duration: minimum 10 minutes.
|
|
214
|
+
Accept criteria: p95 latency ≤ SLA threshold; error rate < 1%.
|
|
215
|
+
contract: >
|
|
216
|
+
Under 2× peak load: p95 response time ≤ SLA; error rate < 1%;
|
|
217
|
+
no memory leak detected (heap stable across test duration).
|
|
218
|
+
tools: ["k6 run", "locust", "artillery run"]
|
|
219
|
+
expected_output: "k6/Locust/Artillery HTML or JSON summary with p50/p95/p99 and error rate"
|
|
220
|
+
pass_criterion: "p95 ≤ SLA; error rate < 1% sustained over test duration"
|
|
221
|
+
requires_human_review: true
|
|
222
|
+
|
|
223
|
+
- id: chaos-resilience
|
|
224
|
+
instruction: >
|
|
225
|
+
Inject one network failure and one dependency failure using Toxiproxy,
|
|
226
|
+
Chaos Monkey, or AWS Fault Injection Simulator.
|
|
227
|
+
The service must degrade gracefully: return 503 with Retry-After header
|
|
228
|
+
(not 500 or silent hang) when a downstream is unavailable.
|
|
229
|
+
Recovery time from injected failure must be ≤ 30 seconds.
|
|
230
|
+
contract: >
|
|
231
|
+
Service returns structured error response (not 500) when dependency fails.
|
|
232
|
+
Recovers automatically within 30 seconds of dependency restoration.
|
|
233
|
+
No data corruption or stuck transactions during failure window.
|
|
234
|
+
tools: ["toxiproxy-cli", "chaos-monkey", "aws fis"]
|
|
235
|
+
expected_output: "Chaos test report showing failure injection + recovery timeline"
|
|
236
|
+
pass_criterion: "Graceful degradation confirmed; recovery time ≤ 30s"
|
|
237
|
+
requires_human_review: true
|
|
238
|
+
|
|
239
|
+
- id: release-candidate
|
|
240
|
+
title: "Release Candidate Gates"
|
|
241
|
+
rationale: >
|
|
242
|
+
The release candidate is the last checkpoint before production. It must pass
|
|
243
|
+
security penetration testing, accessibility audit (if UI), full compatibility
|
|
244
|
+
matrix validation, and mutation testing at the overall (not just changed-files)
|
|
245
|
+
threshold. Human sign-off is required on all findings.
|
|
246
|
+
release_phase_gate: release-candidate
|
|
247
|
+
steps:
|
|
248
|
+
- id: penetration-testing
|
|
249
|
+
instruction: >
|
|
250
|
+
Perform OWASP Top 10 penetration testing. At minimum cover:
|
|
251
|
+
injection (SQL, command, LDAP), broken authentication (session fixation,
|
|
252
|
+
token leakage, brute-force), sensitive data exposure (TLS config, response
|
|
253
|
+
headers, error messages), broken access control (BOLA/IDOR, privilege
|
|
254
|
+
escalation, JWT algorithm confusion), security misconfiguration (default
|
|
255
|
+
creds, verbose errors), and XXE/XSS if a UI is present.
|
|
256
|
+
Use OWASP ZAP active scan + manual testing for BOLA/IDOR.
|
|
257
|
+
contract: >
|
|
258
|
+
Zero Critical or High CVSS ≥ 7.0 findings unresolved.
|
|
259
|
+
All Medium findings (CVSS 4.0–6.9) have documented disposition.
|
|
260
|
+
OWASP Top 10 categories fully covered in test plan.
|
|
261
|
+
tools: ["owasp/zap2docker-stable", "burpsuite", "sqlmap", "nikto"]
|
|
262
|
+
expected_output: "Penetration test report with CVSS scores, reproduction steps, and remediation status"
|
|
263
|
+
pass_criterion: "Zero unresolved High/Critical; all Medium triaged with ADR or fix"
|
|
264
|
+
requires_human_review: true
|
|
265
|
+
owasp_asvs_level: 3
|
|
266
|
+
|
|
267
|
+
- id: mutation-testing-full
|
|
268
|
+
instruction: >
|
|
269
|
+
Run mutation testing against the full codebase (not just changed files).
|
|
270
|
+
Overall mutation score must be ≥ 80%. This validates that the entire
|
|
271
|
+
test suite is specification-grade, not just the code touched in this release.
|
|
272
|
+
contract: "Overall mutation score ≥ 80% across full codebase"
|
|
273
|
+
tools: ["stryker run --all-files", "mutmut run"]
|
|
274
|
+
expected_output: "Full mutation testing report with per-module scores"
|
|
275
|
+
pass_criterion: "Overall mutation score ≥ 80%"
|
|
276
|
+
requires_human_review: false
|
|
277
|
+
|
|
278
|
+
- id: compatibility-matrix
|
|
279
|
+
instruction: >
|
|
280
|
+
Validate the software against all supported runtime versions specified in
|
|
281
|
+
package.json engines field (Node.js), pyproject.toml (Python), or equivalent.
|
|
282
|
+
Run the full test suite against each supported major version.
|
|
283
|
+
If a database is used, validate against all supported DB major versions.
|
|
284
|
+
contract: >
|
|
285
|
+
Full test suite passes on every supported runtime version in the
|
|
286
|
+
declared compatibility matrix. Zero version-specific failures unreported.
|
|
287
|
+
tools: ["nvm use", "pyenv", "matrix CI strategy"]
|
|
288
|
+
expected_output: "CI matrix run report showing pass/fail per runtime version"
|
|
289
|
+
pass_criterion: "All tests pass on all declared runtime versions"
|
|
290
|
+
requires_human_review: false
|
|
291
|
+
|
|
292
|
+
- id: accessibility-audit
|
|
293
|
+
instruction: >
|
|
294
|
+
If the project has a user interface: run axe-core or Lighthouse accessibility
|
|
295
|
+
audit against all primary user journeys. WCAG 2.1 AA compliance required.
|
|
296
|
+
If no UI, mark this step as N/A with justification.
|
|
297
|
+
contract: "Zero WCAG 2.1 AA violations on primary user journeys, or step marked N/A with justification"
|
|
298
|
+
tools: ["axe-core", "lighthouse --only-categories=accessibility", "pa11y"]
|
|
299
|
+
expected_output: "axe/Lighthouse accessibility report or N/A justification"
|
|
300
|
+
pass_criterion: "Zero critical/serious axe violations; Lighthouse accessibility score ≥ 90"
|
|
301
|
+
requires_human_review: true
|
|
302
|
+
|
|
303
|
+
- id: deployment-gates
|
|
304
|
+
title: "Deployment Readiness Gates"
|
|
305
|
+
rationale: >
|
|
306
|
+
Before production deployment, the operational configuration must be validated:
|
|
307
|
+
canary or rolling deploy is configured, rollback threshold is defined and
|
|
308
|
+
tested, smoke tests cover the critical path, and observability is confirmed
|
|
309
|
+
active. These gates prevent deploying code that cannot be safely rolled back.
|
|
310
|
+
release_phase_gate: production
|
|
311
|
+
steps:
|
|
312
|
+
- id: canary-config
|
|
313
|
+
instruction: >
|
|
314
|
+
Confirm deployment strategy is configured (canary, blue-green, or rolling).
|
|
315
|
+
For canary: define the traffic split percentage and success threshold before
|
|
316
|
+
promoting to 100%. For rolling: define the max-surge and max-unavailable
|
|
317
|
+
settings. Document the rollback trigger condition (error rate threshold,
|
|
318
|
+
latency threshold, or health check failure count).
|
|
319
|
+
contract: >
|
|
320
|
+
Deployment strategy config present (k8s deployment, railway.json,
|
|
321
|
+
fly.toml, ECS service, or equivalent). Rollback trigger threshold
|
|
322
|
+
documented in runbook or infra config.
|
|
323
|
+
tools: ["kubectl", "fly", "railway", "aws ecs"]
|
|
324
|
+
expected_output: "Deployment config file + documented rollback threshold"
|
|
325
|
+
pass_criterion: "Deploy config present; rollback threshold defined and reachable in runbook"
|
|
326
|
+
requires_human_review: true
|
|
327
|
+
|
|
328
|
+
- id: smoke-tests
|
|
329
|
+
instruction: >
|
|
330
|
+
Define and run smoke tests covering the top 3–5 critical user journeys.
|
|
331
|
+
Smoke tests must be runnable against a live environment in < 60 seconds.
|
|
332
|
+
For web UIs: use Playwright — navigate to the live URL, wait for async
|
|
333
|
+
data to load, take a full-page screenshot, assert critical elements are
|
|
334
|
+
present and non-empty. Playwright catches rendering bugs, broken async
|
|
335
|
+
fetches, blank pages, and layout collapses that API-only tests miss entirely.
|
|
336
|
+
For APIs: use Hurl or Newman for machine-readable output.
|
|
337
|
+
These same tests run automatically post-deploy.
|
|
338
|
+
contract: >
|
|
339
|
+
Smoke test suite exists, runs in < 60 seconds, covers the critical path.
|
|
340
|
+
Web UIs: Playwright script with element assertions + screenshot artifact.
|
|
341
|
+
APIs: Hurl/Newman with status code + response schema assertions.
|
|
342
|
+
All smoke tests pass against staging before deploy to production.
|
|
343
|
+
tools: ["playwright", "newman run", "hurl", "k6 run --vus 1"]
|
|
344
|
+
expected_output: "Smoke test results with pass/fail per check, screenshot for web UIs, total duration"
|
|
345
|
+
pass_criterion: "All checks pass; screenshot confirms page renders correctly; total duration < 60s"
|
|
346
|
+
requires_human_review: false
|
|
347
|
+
|
|
348
|
+
- id: observability-verified
|
|
349
|
+
instruction: >
|
|
350
|
+
Confirm that structured logging, metrics, and alerting are active in the
|
|
351
|
+
target environment. Required: log aggregation (Datadog, Splunk, CloudWatch,
|
|
352
|
+
or equivalent), error rate alert (fires within 5 minutes of >1% error rate),
|
|
353
|
+
and latency alert (fires within 5 minutes of p95 > SLA). Run a synthetic
|
|
354
|
+
error and confirm the alert fires.
|
|
355
|
+
contract: >
|
|
356
|
+
Log aggregation active; error rate alert configured and tested;
|
|
357
|
+
latency alert configured and tested. Alert runbook linked in deploy notes.
|
|
358
|
+
tools: ["datadog", "cloudwatch", "grafana", "pagerduty"]
|
|
359
|
+
expected_output: "Screenshot or API response showing alerts active + test alert fired"
|
|
360
|
+
pass_criterion: "Synthetic error triggers alert within 5 minutes"
|
|
361
|
+
requires_human_review: true
|
|
362
|
+
|
|
363
|
+
- id: post-deployment
|
|
364
|
+
title: "Post-Deployment Health Verification"
|
|
365
|
+
rationale: >
|
|
366
|
+
After production deployment, synthetic probes and observability must confirm
|
|
367
|
+
that the new version is serving traffic correctly. The deployment is not
|
|
368
|
+
complete until post-deployment verification passes. If it fails, rollback
|
|
369
|
+
is the default response — not patching in production.
|
|
370
|
+
release_phase_gate: production
|
|
371
|
+
steps:
|
|
372
|
+
- id: synthetic-health-probes
|
|
373
|
+
instruction: >
|
|
374
|
+
Run the smoke test suite against the production environment within 5 minutes
|
|
375
|
+
of deploy completion. For web UIs: Playwright navigates to the live URL,
|
|
376
|
+
waits for async data, asserts critical elements, and saves a screenshot.
|
|
377
|
+
The screenshot is the proof artifact — view it after every deploy.
|
|
378
|
+
If any check fails, trigger rollback immediately.
|
|
379
|
+
contract: >
|
|
380
|
+
All smoke tests pass against production within 5 minutes of deployment.
|
|
381
|
+
Web UIs: screenshot artifact saved and reviewed. API: response schema valid.
|
|
382
|
+
Rollback is initiated automatically or within 10 minutes if any smoke test fails.
|
|
383
|
+
tools: ["newman run", "hurl", "k6 run --vus 1"]
|
|
384
|
+
expected_output: "Post-deploy smoke test results with timestamp"
|
|
385
|
+
pass_criterion: "All smoke tests pass against production; timestamp within 5 min of deploy"
|
|
386
|
+
requires_human_review: false
|
|
387
|
+
|
|
388
|
+
- id: error-rate-monitoring
|
|
389
|
+
instruction: >
|
|
390
|
+
Monitor the error rate for 30 minutes after deployment. Error rate must stay
|
|
391
|
+
below the rollback threshold. Compare p95 latency against pre-deploy baseline
|
|
392
|
+
(from load test results). If error rate spikes above threshold or latency
|
|
393
|
+
degrades > 20% vs baseline, initiate rollback.
|
|
394
|
+
contract: >
|
|
395
|
+
Error rate stays below rollback threshold for 30 minutes post-deploy.
|
|
396
|
+
p95 latency does not degrade > 20% vs pre-deploy baseline.
|
|
397
|
+
tools: ["datadog", "cloudwatch", "grafana"]
|
|
398
|
+
expected_output: "30-minute error rate and latency time series with baseline comparison"
|
|
399
|
+
pass_criterion: "Error rate below threshold; latency within 20% of baseline for 30 min"
|
|
400
|
+
requires_human_review: true
|
|
401
|
+
|
|
402
|
+
- id: incident-runbook-verified
|
|
403
|
+
instruction: >
|
|
404
|
+
Confirm that the incident runbook for this service is current and accessible.
|
|
405
|
+
The runbook must cover: how to roll back this version, how to escalate,
|
|
406
|
+
how to check the canary status, and how to interpret the health dashboard.
|
|
407
|
+
Have one team member who was not involved in the deployment review and
|
|
408
|
+
confirm the runbook is complete.
|
|
409
|
+
contract: >
|
|
410
|
+
Incident runbook exists at docs/runbooks/<service>.md or equivalent.
|
|
411
|
+
Runbook covers rollback, escalation, canary status, and dashboard links.
|
|
412
|
+
Independent review confirmed by a second team member.
|
|
413
|
+
tools: ["docs/runbooks/"]
|
|
414
|
+
expected_output: "Runbook link + peer review confirmation"
|
|
415
|
+
pass_criterion: "Runbook current; independent reviewer confirms completeness"
|
|
416
|
+
requires_human_review: true
|