forgecraft-mcp 1.2.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/README.md +525 -525
  2. package/dist/cli/help.js +44 -44
  3. package/dist/registry/renderer-skeletons.js +92 -92
  4. package/dist/shared/gs-score-logger.js +6 -6
  5. package/dist/tools/add-module.js +123 -123
  6. package/dist/tools/advice-registry.js +18 -18
  7. package/dist/tools/check-cascade-report.js +64 -64
  8. package/dist/tools/configure-mcp.d.ts +3 -0
  9. package/dist/tools/configure-mcp.d.ts.map +1 -1
  10. package/dist/tools/configure-mcp.js +10 -0
  11. package/dist/tools/configure-mcp.js.map +1 -1
  12. package/dist/tools/forgecraft-dispatch.d.ts.map +1 -1
  13. package/dist/tools/forgecraft-dispatch.js +3 -0
  14. package/dist/tools/forgecraft-dispatch.js.map +1 -1
  15. package/dist/tools/forgecraft-schema-params.d.ts +9 -0
  16. package/dist/tools/forgecraft-schema-params.d.ts.map +1 -1
  17. package/dist/tools/forgecraft-schema-params.js +21 -0
  18. package/dist/tools/forgecraft-schema-params.js.map +1 -1
  19. package/dist/tools/forgecraft-schema.d.ts +9 -0
  20. package/dist/tools/forgecraft-schema.d.ts.map +1 -1
  21. package/dist/tools/refresh-output.js +14 -14
  22. package/dist/tools/scaffold-spec-stubs.js +115 -115
  23. package/dist/tools/scaffold-templates.js +62 -62
  24. package/dist/tools/setup-artifact-writers.d.ts +30 -0
  25. package/dist/tools/setup-artifact-writers.d.ts.map +1 -1
  26. package/dist/tools/setup-artifact-writers.js +120 -8
  27. package/dist/tools/setup-artifact-writers.js.map +1 -1
  28. package/dist/tools/setup-phase1.d.ts +3 -0
  29. package/dist/tools/setup-phase1.d.ts.map +1 -1
  30. package/dist/tools/setup-phase1.js +79 -35
  31. package/dist/tools/setup-phase1.js.map +1 -1
  32. package/dist/tools/setup-phase2.d.ts +2 -0
  33. package/dist/tools/setup-phase2.d.ts.map +1 -1
  34. package/dist/tools/setup-phase2.js +10 -1
  35. package/dist/tools/setup-phase2.js.map +1 -1
  36. package/dist/tools/setup-project.d.ts +18 -0
  37. package/dist/tools/setup-project.d.ts.map +1 -1
  38. package/dist/tools/setup-project.js +77 -1
  39. package/dist/tools/setup-project.js.map +1 -1
  40. package/dist/tools/spec-parser-tags.d.ts +9 -0
  41. package/dist/tools/spec-parser-tags.d.ts.map +1 -1
  42. package/dist/tools/spec-parser-tags.js +92 -0
  43. package/dist/tools/spec-parser-tags.js.map +1 -1
  44. package/package.json +89 -86
  45. package/templates/analytics/instructions.yaml +37 -37
  46. package/templates/analytics/mcp-servers.yaml +11 -11
  47. package/templates/analytics/structure.yaml +25 -25
  48. package/templates/api/instructions.yaml +231 -231
  49. package/templates/api/mcp-servers.yaml +22 -13
  50. package/templates/api/nfr.yaml +23 -23
  51. package/templates/api/review.yaml +103 -103
  52. package/templates/api/structure.yaml +34 -34
  53. package/templates/api/verification.yaml +132 -132
  54. package/templates/cli/instructions.yaml +31 -31
  55. package/templates/cli/mcp-servers.yaml +11 -11
  56. package/templates/cli/review.yaml +53 -53
  57. package/templates/cli/structure.yaml +16 -16
  58. package/templates/data-lineage/instructions.yaml +28 -28
  59. package/templates/data-lineage/mcp-servers.yaml +22 -22
  60. package/templates/data-pipeline/instructions.yaml +84 -84
  61. package/templates/data-pipeline/mcp-servers.yaml +13 -13
  62. package/templates/data-pipeline/nfr.yaml +39 -39
  63. package/templates/data-pipeline/structure.yaml +23 -23
  64. package/templates/fintech/hooks.yaml +55 -55
  65. package/templates/fintech/instructions.yaml +112 -112
  66. package/templates/fintech/mcp-servers.yaml +13 -13
  67. package/templates/fintech/nfr.yaml +46 -46
  68. package/templates/fintech/playbook.yaml +210 -210
  69. package/templates/fintech/verification.yaml +239 -239
  70. package/templates/game/instructions.yaml +289 -289
  71. package/templates/game/mcp-servers.yaml +38 -38
  72. package/templates/game/nfr.yaml +64 -64
  73. package/templates/game/playbook.yaml +214 -214
  74. package/templates/game/review.yaml +97 -97
  75. package/templates/game/structure.yaml +67 -67
  76. package/templates/game/verification.yaml +174 -174
  77. package/templates/healthcare/instructions.yaml +42 -42
  78. package/templates/healthcare/mcp-servers.yaml +13 -13
  79. package/templates/healthcare/nfr.yaml +47 -47
  80. package/templates/hipaa/instructions.yaml +41 -41
  81. package/templates/hipaa/mcp-servers.yaml +13 -13
  82. package/templates/infra/instructions.yaml +104 -104
  83. package/templates/infra/mcp-servers.yaml +20 -20
  84. package/templates/infra/nfr.yaml +46 -46
  85. package/templates/infra/review.yaml +65 -65
  86. package/templates/infra/structure.yaml +25 -25
  87. package/templates/library/instructions.yaml +36 -36
  88. package/templates/library/mcp-servers.yaml +20 -20
  89. package/templates/library/review.yaml +56 -56
  90. package/templates/library/structure.yaml +19 -19
  91. package/templates/medallion-architecture/instructions.yaml +41 -41
  92. package/templates/medallion-architecture/mcp-servers.yaml +22 -22
  93. package/templates/ml/instructions.yaml +85 -85
  94. package/templates/ml/mcp-servers.yaml +11 -11
  95. package/templates/ml/nfr.yaml +39 -39
  96. package/templates/ml/structure.yaml +25 -25
  97. package/templates/ml/verification.yaml +156 -156
  98. package/templates/mobile/instructions.yaml +44 -44
  99. package/templates/mobile/mcp-servers.yaml +11 -11
  100. package/templates/mobile/nfr.yaml +49 -49
  101. package/templates/mobile/structure.yaml +27 -27
  102. package/templates/mobile/verification.yaml +121 -121
  103. package/templates/observability-xray/instructions.yaml +40 -40
  104. package/templates/observability-xray/mcp-servers.yaml +15 -15
  105. package/templates/realtime/instructions.yaml +42 -42
  106. package/templates/realtime/mcp-servers.yaml +13 -13
  107. package/templates/soc2/instructions.yaml +41 -41
  108. package/templates/soc2/mcp-servers.yaml +24 -24
  109. package/templates/social/instructions.yaml +43 -43
  110. package/templates/social/mcp-servers.yaml +24 -24
  111. package/templates/state-machine/instructions.yaml +42 -42
  112. package/templates/state-machine/mcp-servers.yaml +11 -11
  113. package/templates/tools-registry.yaml +164 -164
  114. package/templates/universal/hooks.yaml +531 -531
  115. package/templates/universal/instructions.yaml +1692 -1692
  116. package/templates/universal/mcp-servers.yaml +50 -50
  117. package/templates/universal/nfr.yaml +197 -197
  118. package/templates/universal/reference.yaml +326 -326
  119. package/templates/universal/review.yaml +204 -204
  120. package/templates/universal/skills.yaml +262 -262
  121. package/templates/universal/structure.yaml +67 -67
  122. package/templates/universal/verification.yaml +416 -416
  123. package/templates/web-react/hooks.yaml +44 -44
  124. package/templates/web-react/instructions.yaml +207 -207
  125. package/templates/web-react/mcp-servers.yaml +20 -20
  126. package/templates/web-react/nfr.yaml +27 -27
  127. package/templates/web-react/review.yaml +94 -94
  128. package/templates/web-react/structure.yaml +46 -46
  129. package/templates/web-react/verification.yaml +126 -126
  130. package/templates/web-static/instructions.yaml +115 -115
  131. package/templates/web-static/mcp-servers.yaml +20 -20
  132. package/templates/web3/instructions.yaml +44 -44
  133. package/templates/web3/mcp-servers.yaml +11 -11
  134. package/templates/web3/verification.yaml +159 -159
  135. package/templates/zero-trust/instructions.yaml +41 -41
  136. package/templates/zero-trust/mcp-servers.yaml +15 -15
@@ -1,41 +1,41 @@
1
- tag: HIPAA
2
- section: instructions
3
- blocks:
4
- - id: pii-masking
5
- tier: recommended
6
- title: "PII Masking & Data Protection"
7
- content: |
8
- ## PII Masking & Data Protection
9
-
10
- - Identify all Personally Identifiable Information (PII) fields at design time using the 18 HIPAA identifiers as a baseline.
11
- - Implement masking templates for every PII field: full mask for display, partial mask for verification, tokenized for logging.
12
- - Apply dynamic masking based on user role — clinicians see full data, billing sees partial, analytics sees only de-identified data.
13
- - Never store raw PII in caches, message queues, or temporary files. Use tokenized references that resolve through a secure lookup service.
14
- - Validate masking coverage with automated tests: no PII field should reach a log sink, error report, or analytics pipeline unmasked.
15
- - Maintain a PII field registry as a living document — every new field must be classified before it enters the data model.
16
-
17
- - id: encryption-checks
18
- tier: recommended
19
- title: "Encryption Verification & Key Management"
20
- content: |
21
- ## Encryption Verification & Key Management
22
-
23
- - Enforce AES-256 encryption at rest for all data stores containing PII. Verify encryption status in CI with infrastructure-as-code checks.
24
- - Require TLS 1.2+ for all data in transit. Reject connections using older protocols at the load balancer level.
25
- - Use envelope encryption with a managed KMS (AWS KMS, Azure Key Vault, GCP KMS). Never store encryption keys alongside encrypted data.
26
- - Rotate encryption keys on a defined schedule (90 days minimum). Automate rotation and re-encryption of affected data.
27
- - Include encryption verification in deployment checklists: database encryption enabled, TLS certificates valid, KMS policies correct.
28
- - Test encryption boundaries: verify that data crossing service boundaries remains encrypted and that decryption only occurs in authorized services.
29
-
30
- - id: audit-logging-hipaa
31
- tier: recommended
32
- title: "HIPAA Audit Logging"
33
- content: |
34
- ## HIPAA Audit Logging
35
-
36
- - Log every access to PII: who, when, what data, from where, and the business justification.
37
- - Store audit logs in an append-only, tamper-evident store separate from application databases.
38
- - Retain audit logs for a minimum of 6 years per HIPAA requirements.
39
- - Generate automated audit reports: access frequency per user, unusual patterns, after-hours access.
40
- - Implement real-time alerting for anomalous access: bulk record access, out-of-role access, VIP record access.
41
- - Include a unique correlation ID in every request touching PII for end-to-end traceability across services.
1
+ tag: HIPAA
2
+ section: instructions
3
+ blocks:
4
+ - id: pii-masking
5
+ tier: recommended
6
+ title: "PII Masking & Data Protection"
7
+ content: |
8
+ ## PII Masking & Data Protection
9
+
10
+ - Identify all Personally Identifiable Information (PII) fields at design time using the 18 HIPAA identifiers as a baseline.
11
+ - Implement masking templates for every PII field: full mask for display, partial mask for verification, tokenized for logging.
12
+ - Apply dynamic masking based on user role — clinicians see full data, billing sees partial, analytics sees only de-identified data.
13
+ - Never store raw PII in caches, message queues, or temporary files. Use tokenized references that resolve through a secure lookup service.
14
+ - Validate masking coverage with automated tests: no PII field should reach a log sink, error report, or analytics pipeline unmasked.
15
+ - Maintain a PII field registry as a living document — every new field must be classified before it enters the data model.
16
+
17
+ - id: encryption-checks
18
+ tier: recommended
19
+ title: "Encryption Verification & Key Management"
20
+ content: |
21
+ ## Encryption Verification & Key Management
22
+
23
+ - Enforce AES-256 encryption at rest for all data stores containing PII. Verify encryption status in CI with infrastructure-as-code checks.
24
+ - Require TLS 1.2+ for all data in transit. Reject connections using older protocols at the load balancer level.
25
+ - Use envelope encryption with a managed KMS (AWS KMS, Azure Key Vault, GCP KMS). Never store encryption keys alongside encrypted data.
26
+ - Rotate encryption keys on a defined schedule (90 days minimum). Automate rotation and re-encryption of affected data.
27
+ - Include encryption verification in deployment checklists: database encryption enabled, TLS certificates valid, KMS policies correct.
28
+ - Test encryption boundaries: verify that data crossing service boundaries remains encrypted and that decryption only occurs in authorized services.
29
+
30
+ - id: audit-logging-hipaa
31
+ tier: recommended
32
+ title: "HIPAA Audit Logging"
33
+ content: |
34
+ ## HIPAA Audit Logging
35
+
36
+ - Log every access to PII: who, when, what data, from where, and the business justification.
37
+ - Store audit logs in an append-only, tamper-evident store separate from application databases.
38
+ - Retain audit logs for a minimum of 6 years per HIPAA requirements.
39
+ - Generate automated audit reports: access frequency per user, unusual patterns, after-hours access.
40
+ - Implement real-time alerting for anomalous access: bulk record access, out-of-role access, VIP record access.
41
+ - Include a unique correlation ID in every request touching PII for end-to-end traceability across services.
@@ -1,13 +1,13 @@
1
- tag: HIPAA
2
- section: mcp-servers
3
- servers:
4
- - name: postgres
5
- description: "PostgreSQL database inspection and schema management — common backend for PHI/PII storage"
6
- command: npx
7
- args: ["-y", "@modelcontextprotocol/server-postgres"]
8
- tags: [HIPAA, HEALTHCARE, API]
9
- category: database
10
- tier: recommended
11
- env:
12
- POSTGRES_CONNECTION_STRING: ""
13
- url: "https://github.com/modelcontextprotocol/servers/tree/main/src/postgres"
1
+ tag: HIPAA
2
+ section: mcp-servers
3
+ servers:
4
+ - name: postgres
5
+ description: "PostgreSQL database inspection and schema management — common backend for PHI/PII storage"
6
+ command: npx
7
+ args: ["-y", "@modelcontextprotocol/server-postgres"]
8
+ tags: [HIPAA, HEALTHCARE, API]
9
+ category: database
10
+ tier: recommended
11
+ env:
12
+ POSTGRES_CONNECTION_STRING: ""
13
+ url: "https://github.com/modelcontextprotocol/servers/tree/main/src/postgres"
@@ -1,104 +1,104 @@
1
- tag: INFRA
2
- section: instructions
3
- blocks:
4
- - id: iac-containers
5
- tier: recommended
6
- title: "Infrastructure as Code & Containers"
7
- content: |
8
- ## Infrastructure as Code & Container Orchestration
9
-
10
- - Define all infrastructure declaratively using IaC tools (Terraform, Pulumi, CloudFormation, CDK). No manual resource creation—if it's not in code, it doesn't exist.
11
- - Version IaC alongside application code. Use separate state files/workspaces per environment (dev, staging, prod) and lock state to prevent concurrent modifications.
12
- - Write minimal, single-purpose Dockerfiles. Use multi-stage builds to keep production images small. Pin base image digests (not just tags) for reproducibility.
13
- - Run containers as non-root users. Drop all Linux capabilities except those explicitly needed. Use read-only root filesystems where possible.
14
- - Use container orchestration (Kubernetes, ECS, Nomad) for production workloads. Define resource requests and limits for every container to prevent noisy-neighbor issues.
15
- - Implement health checks (liveness, readiness, startup probes) for every service. The orchestrator should not route traffic to unhealthy instances.
16
- - Store environment-specific configuration in environment variables or a secrets manager—never bake secrets or environment config into container images.
17
-
18
- - id: cicd-pipelines
19
- tier: recommended
20
- title: "CI/CD & Deployment Patterns"
21
- content: |
22
- ## CI/CD & Deployment
23
-
24
- - Automate the full path from commit to production: lint → test → build → security scan → deploy to staging → integration test → promote to production.
25
- - Keep CI pipelines fast (< 10 minutes for the inner loop). Parallelize test suites, cache dependencies aggressively, and use incremental builds.
26
- - Use blue-green or canary deployments for zero-downtime releases. Automate rollback by monitoring error rates and latency during the canary window.
27
- - Implement GitOps for Kubernetes: declare the desired cluster state in a Git repository and use a reconciliation controller (ArgoCD, Flux) to converge actual state to desired state.
28
- - Scan container images for CVEs in CI using tools like Trivy or Grype. Block deployment of images with critical or high-severity vulnerabilities.
29
- - Tag every deployment artifact with the git SHA, build number, and timestamp. Maintain a deployment log that records what was deployed, when, by whom, and the rollback procedure.
30
-
31
- - id: observability-secrets
32
- tier: recommended
33
- title: "Observability & Secrets Management"
34
- content: |
35
- ## Observability, Monitoring & Secrets
36
-
37
- - Implement the three pillars of observability: structured logs (JSON with correlation IDs), metrics (counters, gauges, histograms via Prometheus/OpenTelemetry), and distributed traces (OpenTelemetry spans).
38
- - Define SLIs (error rate, latency p99, availability) and set SLOs (e.g., 99.9% availability, p99 latency < 200ms). Create error budget policies that slow down releases when the budget is exhausted.
39
- - Set up actionable alerts: every alert must have a runbook link, clear ownership, and a defined severity level. Eliminate noisy alerts ruthlessly—alert fatigue is a reliability risk.
40
- - Use a centralized secrets manager (Vault, AWS Secrets Manager, GCP Secret Manager) for all credentials, API keys, and certificates. Rotate secrets automatically on a schedule.
41
- - Never commit secrets to version control. Use pre-commit hooks (e.g., detect-secrets, gitleaks) to block accidental secret commits. Scan git history periodically for leaked secrets.
42
- - Follow the 12-factor app methodology: externalize config, treat logs as event streams, maximize dev/prod parity, and design for stateless, disposable processes.
43
- - Implement dashboards for each service covering the RED metrics (Rate, Errors, Duration) and the USE metrics (Utilization, Saturation, Errors) for infrastructure resources.
44
-
45
- - id: iac-cdk-patterns
46
- tier: recommended
47
- title: "CDK, Pulumi & IaC Patterns"
48
- content: |
49
- ## CDK, Pulumi & Advanced IaC
50
-
51
- ### AWS CDK
52
- - Define infrastructure in TypeScript/Python — real programming language, not HCL.
53
- Enables loops, conditionals, type checking, and IDE support.
54
- - Organize stacks by lifecycle: networking stack, database stack, application stack.
55
- Stacks that change together should be in the same stack.
56
- - Use L2/L3 Constructs for common patterns (ApplicationLoadBalancedFargateService, etc.).
57
- Drop to L1 (CfnResource) only when L2 doesn't expose what you need.
58
- - `cdk diff` before every deploy. Review change sets. Never deploy blind.
59
- - Use CDK Aspects for compliance: tag enforcement, encryption-at-rest checks, public access blocking.
60
-
61
- ### Pulumi
62
- - Same benefits as CDK (real language) but multi-cloud.
63
- TypeScript, Python, Go, or C#. Choose the team's strongest language.
64
- - Use ComponentResources to create reusable infrastructure modules.
65
- - State in Pulumi Cloud or self-managed S3 backend. Lock state to prevent concurrent modifications.
66
-
67
- ### IaC Best Practices (All Tools)
68
- - **Modules**: Don't repeat resource definitions. Create reusable modules for VPC, ECS service, RDS cluster, etc.
69
- - **Environments**: Same IaC, different configs. Use variables/parameters per environment, not separate code.
70
- - **State**: Remote state with locking. Never local state files in production.
71
- - **Drift detection**: Run periodic plan/diff against production. Alert on manual changes.
72
- - **Cost tagging**: Every resource tagged with `Environment`, `Service`, `Owner`, `CostCenter`.
73
- Untagged resources are flagged and blocked in CI.
74
- - **Blast radius**: Small, independently deployable stacks. A bad deploy to one stack doesn't affect others.
75
-
76
- - id: cloud-platform-guidance
77
- tier: recommended
78
- title: "Cloud Platform & Deployment Targets"
79
- content: |
80
- ## Cloud Platform Guidance
81
-
82
- ### Cloud vs. Local
83
- - **Local development**: Docker Compose for all backing services (DB, cache, queue, storage).
84
- Use LocalStack or MinIO for cloud service emulation when needed.
85
- - **Managed cloud**: Prefer managed services (RDS over self-hosted Postgres, SQS over self-hosted RabbitMQ)
86
- unless cost, latency, or compliance requires self-hosted.
87
- - **Serverless**: Consider for event-driven, bursty, or low-traffic workloads.
88
- Lambda/Cloud Functions: cold start < 1s, function duration < 15 min, stateless.
89
- Not suitable for: long-running processes, WebSockets, high-throughput steady-state.
90
- - **Edge compute**: Cloudflare Workers, Vercel Edge Functions, Lambda@Edge.
91
- Use for: auth, redirects, A/B testing, geo-routing. Keep logic simple and fast.
92
-
93
- ### Multi-Cloud Considerations
94
- - **Default**: single cloud provider. Multi-cloud adds operational complexity.
95
- - **When to go multi-cloud**: regulatory requirements, vendor negotiation leverage, or
96
- specific best-of-breed services (e.g., GCP for ML, AWS for general infra).
97
- - Abstract cloud-specific APIs behind interfaces if multi-cloud is a real possibility.
98
- - Use provider-agnostic services where cost-effective: Terraform, Pulumi, containers, S3-compatible storage.
99
-
100
- ### Cost Control
101
- - Set budget alerts at 50%, 80%, 100% of monthly target.
102
- - Reserved instances / savings plans for steady-state workloads. Spot/preemptible for batch jobs.
103
- - Right-size: review instance utilization monthly. Downsize over-provisioned resources.
104
- - Auto-sleep non-production environments outside business hours.
1
+ tag: INFRA
2
+ section: instructions
3
+ blocks:
4
+ - id: iac-containers
5
+ tier: recommended
6
+ title: "Infrastructure as Code & Containers"
7
+ content: |
8
+ ## Infrastructure as Code & Container Orchestration
9
+
10
+ - Define all infrastructure declaratively using IaC tools (Terraform, Pulumi, CloudFormation, CDK). No manual resource creation—if it's not in code, it doesn't exist.
11
+ - Version IaC alongside application code. Use separate state files/workspaces per environment (dev, staging, prod) and lock state to prevent concurrent modifications.
12
+ - Write minimal, single-purpose Dockerfiles. Use multi-stage builds to keep production images small. Pin base image digests (not just tags) for reproducibility.
13
+ - Run containers as non-root users. Drop all Linux capabilities except those explicitly needed. Use read-only root filesystems where possible.
14
+ - Use container orchestration (Kubernetes, ECS, Nomad) for production workloads. Define resource requests and limits for every container to prevent noisy-neighbor issues.
15
+ - Implement health checks (liveness, readiness, startup probes) for every service. The orchestrator should not route traffic to unhealthy instances.
16
+ - Store environment-specific configuration in environment variables or a secrets manager—never bake secrets or environment config into container images.
17
+
18
+ - id: cicd-pipelines
19
+ tier: recommended
20
+ title: "CI/CD & Deployment Patterns"
21
+ content: |
22
+ ## CI/CD & Deployment
23
+
24
+ - Automate the full path from commit to production: lint → test → build → security scan → deploy to staging → integration test → promote to production.
25
+ - Keep CI pipelines fast (< 10 minutes for the inner loop). Parallelize test suites, cache dependencies aggressively, and use incremental builds.
26
+ - Use blue-green or canary deployments for zero-downtime releases. Automate rollback by monitoring error rates and latency during the canary window.
27
+ - Implement GitOps for Kubernetes: declare the desired cluster state in a Git repository and use a reconciliation controller (ArgoCD, Flux) to converge actual state to desired state.
28
+ - Scan container images for CVEs in CI using tools like Trivy or Grype. Block deployment of images with critical or high-severity vulnerabilities.
29
+ - Tag every deployment artifact with the git SHA, build number, and timestamp. Maintain a deployment log that records what was deployed, when, by whom, and the rollback procedure.
30
+
31
+ - id: observability-secrets
32
+ tier: recommended
33
+ title: "Observability & Secrets Management"
34
+ content: |
35
+ ## Observability, Monitoring & Secrets
36
+
37
+ - Implement the three pillars of observability: structured logs (JSON with correlation IDs), metrics (counters, gauges, histograms via Prometheus/OpenTelemetry), and distributed traces (OpenTelemetry spans).
38
+ - Define SLIs (error rate, latency p99, availability) and set SLOs (e.g., 99.9% availability, p99 latency < 200ms). Create error budget policies that slow down releases when the budget is exhausted.
39
+ - Set up actionable alerts: every alert must have a runbook link, clear ownership, and a defined severity level. Eliminate noisy alerts ruthlessly—alert fatigue is a reliability risk.
40
+ - Use a centralized secrets manager (Vault, AWS Secrets Manager, GCP Secret Manager) for all credentials, API keys, and certificates. Rotate secrets automatically on a schedule.
41
+ - Never commit secrets to version control. Use pre-commit hooks (e.g., detect-secrets, gitleaks) to block accidental secret commits. Scan git history periodically for leaked secrets.
42
+ - Follow the 12-factor app methodology: externalize config, treat logs as event streams, maximize dev/prod parity, and design for stateless, disposable processes.
43
+ - Implement dashboards for each service covering the RED metrics (Rate, Errors, Duration) and the USE metrics (Utilization, Saturation, Errors) for infrastructure resources.
44
+
45
+ - id: iac-cdk-patterns
46
+ tier: recommended
47
+ title: "CDK, Pulumi & IaC Patterns"
48
+ content: |
49
+ ## CDK, Pulumi & Advanced IaC
50
+
51
+ ### AWS CDK
52
+ - Define infrastructure in TypeScript/Python — real programming language, not HCL.
53
+ Enables loops, conditionals, type checking, and IDE support.
54
+ - Organize stacks by lifecycle: networking stack, database stack, application stack.
55
+ Stacks that change together should be in the same stack.
56
+ - Use L2/L3 Constructs for common patterns (ApplicationLoadBalancedFargateService, etc.).
57
+ Drop to L1 (CfnResource) only when L2 doesn't expose what you need.
58
+ - `cdk diff` before every deploy. Review change sets. Never deploy blind.
59
+ - Use CDK Aspects for compliance: tag enforcement, encryption-at-rest checks, public access blocking.
60
+
61
+ ### Pulumi
62
+ - Same benefits as CDK (real language) but multi-cloud.
63
+ TypeScript, Python, Go, or C#. Choose the team's strongest language.
64
+ - Use ComponentResources to create reusable infrastructure modules.
65
+ - State in Pulumi Cloud or self-managed S3 backend. Lock state to prevent concurrent modifications.
66
+
67
+ ### IaC Best Practices (All Tools)
68
+ - **Modules**: Don't repeat resource definitions. Create reusable modules for VPC, ECS service, RDS cluster, etc.
69
+ - **Environments**: Same IaC, different configs. Use variables/parameters per environment, not separate code.
70
+ - **State**: Remote state with locking. Never local state files in production.
71
+ - **Drift detection**: Run periodic plan/diff against production. Alert on manual changes.
72
+ - **Cost tagging**: Every resource tagged with `Environment`, `Service`, `Owner`, `CostCenter`.
73
+ Untagged resources are flagged and blocked in CI.
74
+ - **Blast radius**: Small, independently deployable stacks. A bad deploy to one stack doesn't affect others.
75
+
76
+ - id: cloud-platform-guidance
77
+ tier: recommended
78
+ title: "Cloud Platform & Deployment Targets"
79
+ content: |
80
+ ## Cloud Platform Guidance
81
+
82
+ ### Cloud vs. Local
83
+ - **Local development**: Docker Compose for all backing services (DB, cache, queue, storage).
84
+ Use LocalStack or MinIO for cloud service emulation when needed.
85
+ - **Managed cloud**: Prefer managed services (RDS over self-hosted Postgres, SQS over self-hosted RabbitMQ)
86
+ unless cost, latency, or compliance requires self-hosted.
87
+ - **Serverless**: Consider for event-driven, bursty, or low-traffic workloads.
88
+ Lambda/Cloud Functions: cold start < 1s, function duration < 15 min, stateless.
89
+ Not suitable for: long-running processes, WebSockets, high-throughput steady-state.
90
+ - **Edge compute**: Cloudflare Workers, Vercel Edge Functions, Lambda@Edge.
91
+ Use for: auth, redirects, A/B testing, geo-routing. Keep logic simple and fast.
92
+
93
+ ### Multi-Cloud Considerations
94
+ - **Default**: single cloud provider. Multi-cloud adds operational complexity.
95
+ - **When to go multi-cloud**: regulatory requirements, vendor negotiation leverage, or
96
+ specific best-of-breed services (e.g., GCP for ML, AWS for general infra).
97
+ - Abstract cloud-specific APIs behind interfaces if multi-cloud is a real possibility.
98
+ - Use provider-agnostic services where cost-effective: Terraform, Pulumi, containers, S3-compatible storage.
99
+
100
+ ### Cost Control
101
+ - Set budget alerts at 50%, 80%, 100% of monthly target.
102
+ - Reserved instances / savings plans for steady-state workloads. Spot/preemptible for batch jobs.
103
+ - Right-size: review instance utilization monthly. Downsize over-provisioned resources.
104
+ - Auto-sleep non-production environments outside business hours.
@@ -1,20 +1,20 @@
1
- tag: INFRA
2
- section: mcp-servers
3
- servers:
4
- - name: docker
5
- description: "Docker container and image management via MCP"
6
- command: npx
7
- args: ["-y", "mcp-server-docker"]
8
- tags: [INFRA]
9
- category: deployment
10
- tier: recommended
11
- url: "https://github.com/ckreiling/mcp-server-docker"
12
-
13
- - name: kubernetes
14
- description: "Kubernetes cluster management — pods, deployments, services"
15
- command: npx
16
- args: ["-y", "mcp-server-kubernetes"]
17
- tags: [INFRA]
18
- category: deployment
19
- tier: optional
20
- url: "https://github.com/strowk/mcp-k8s-go"
1
+ tag: INFRA
2
+ section: mcp-servers
3
+ servers:
4
+ - name: docker
5
+ description: "Docker container and image management via MCP"
6
+ command: npx
7
+ args: ["-y", "mcp-server-docker"]
8
+ tags: [INFRA]
9
+ category: deployment
10
+ tier: recommended
11
+ url: "https://github.com/ckreiling/mcp-server-docker"
12
+
13
+ - name: kubernetes
14
+ description: "Kubernetes cluster management — pods, deployments, services"
15
+ command: npx
16
+ args: ["-y", "mcp-server-kubernetes"]
17
+ tags: [INFRA]
18
+ category: deployment
19
+ tier: optional
20
+ url: "https://github.com/strowk/mcp-k8s-go"
@@ -1,46 +1,46 @@
1
- tag: INFRA
2
- section: nfr
3
- blocks:
4
- - id: infra-reliability
5
- tier: recommended
6
- title: "Infrastructure Reliability"
7
- content: |
8
- ## NFR: Infrastructure Reliability
9
-
10
- ### Availability
11
- - Target uptime for infrastructure platform: {{infra_uptime | default: 99.95%}}.
12
- - Multi-AZ deployment for all production workloads. Single-AZ acceptable for dev/staging.
13
- - Automated failover tested quarterly. Document actual recovery time.
14
-
15
- ### Change Management
16
- - All infrastructure changes go through code review (IaC PR).
17
- - Plan/diff output reviewed before apply. No blind applies.
18
- - Rollback procedure documented and tested for every change type.
19
- - Change windows for breaking changes. Emergency changes require post-mortem.
20
-
21
- ### Drift Detection
22
- - Scheduled drift detection (weekly minimum). Alert on any manual changes.
23
- - Drift remediation: either update code to match reality or re-apply to match code.
24
- - No resources outside IaC management in production.
25
-
26
- - id: infra-security
27
- tier: recommended
28
- title: "Infrastructure Security"
29
- content: |
30
- ## NFR: Infrastructure Security
31
-
32
- ### Network
33
- - Least-privilege security groups / firewall rules. No 0.0.0.0/0 ingress except load balancers.
34
- - Private subnets for application and database tiers. Public subnets only for load balancers.
35
- - VPC flow logs enabled. Anomaly detection on network patterns.
36
-
37
- ### Access Control
38
- - IAM roles with minimum required permissions. No wildcard (*) actions on production resources.
39
- - MFA required for all human access to cloud consoles and CI/CD pipelines.
40
- - Service-to-service auth via IAM roles or mTLS, not shared API keys.
41
- - Access reviewed quarterly. Revoke unused permissions.
42
-
43
- ### Secrets
44
- - All secrets in a secrets manager (Vault, AWS Secrets Manager, GCP Secret Manager).
45
- - Secrets rotated on schedule (90 days max). Auto-rotation preferred.
46
- - Pre-commit hooks block secret commits (gitleaks, detect-secrets).
1
+ tag: INFRA
2
+ section: nfr
3
+ blocks:
4
+ - id: infra-reliability
5
+ tier: recommended
6
+ title: "Infrastructure Reliability"
7
+ content: |
8
+ ## NFR: Infrastructure Reliability
9
+
10
+ ### Availability
11
+ - Target uptime for infrastructure platform: {{infra_uptime | default: 99.95%}}.
12
+ - Multi-AZ deployment for all production workloads. Single-AZ acceptable for dev/staging.
13
+ - Automated failover tested quarterly. Document actual recovery time.
14
+
15
+ ### Change Management
16
+ - All infrastructure changes go through code review (IaC PR).
17
+ - Plan/diff output reviewed before apply. No blind applies.
18
+ - Rollback procedure documented and tested for every change type.
19
+ - Change windows for breaking changes. Emergency changes require post-mortem.
20
+
21
+ ### Drift Detection
22
+ - Scheduled drift detection (weekly minimum). Alert on any manual changes.
23
+ - Drift remediation: either update code to match reality or re-apply to match code.
24
+ - No resources outside IaC management in production.
25
+
26
+ - id: infra-security
27
+ tier: recommended
28
+ title: "Infrastructure Security"
29
+ content: |
30
+ ## NFR: Infrastructure Security
31
+
32
+ ### Network
33
+ - Least-privilege security groups / firewall rules. No 0.0.0.0/0 ingress except load balancers.
34
+ - Private subnets for application and database tiers. Public subnets only for load balancers.
35
+ - VPC flow logs enabled. Anomaly detection on network patterns.
36
+
37
+ ### Access Control
38
+ - IAM roles with minimum required permissions. No wildcard (*) actions on production resources.
39
+ - MFA required for all human access to cloud consoles and CI/CD pipelines.
40
+ - Service-to-service auth via IAM roles or mTLS, not shared API keys.
41
+ - Access reviewed quarterly. Revoke unused permissions.
42
+
43
+ ### Secrets
44
+ - All secrets in a secrets manager (Vault, AWS Secrets Manager, GCP Secret Manager).
45
+ - Secrets rotated on schedule (90 days max). Auto-rotation preferred.
46
+ - Pre-commit hooks block secret commits (gitleaks, detect-secrets).
@@ -1,65 +1,65 @@
1
- tag: INFRA
2
- section: review
3
- blocks:
4
- - id: infra-architecture-review
5
- tier: recommended
6
- dimension: architecture
7
- title: "Infrastructure Architecture Review"
8
- description: |
9
- Evaluate IaC patterns, resource organization, and environment management.
10
- checklist:
11
- - id: iac-all-resources
12
- description: "All production resources defined in IaC. No manually created resources."
13
- severity: critical
14
- - id: module-reuse
15
- description: "Common patterns (VPC, database, service) extracted into reusable modules, not copy-pasted."
16
- severity: important
17
- - id: environment-isolation
18
- description: "Environments fully isolated: separate state files, separate accounts/projects where possible."
19
- severity: critical
20
- - id: blast-radius
21
- description: "Stacks/modules scoped to limit blast radius. A bad deploy to one stack doesn't affect others."
22
- severity: important
23
-
24
- - id: infra-security-review
25
- tier: recommended
26
- dimension: code-quality
27
- title: "Infrastructure Security Review"
28
- description: |
29
- Evaluate network security, access control, and secrets management.
30
- checklist:
31
- - id: least-privilege
32
- description: "IAM roles use minimum required permissions. No wildcard actions on production."
33
- severity: critical
34
- - id: no-public-exposure
35
- description: "Databases and application servers in private subnets. Only load balancers are publicly accessible."
36
- severity: critical
37
- - id: secrets-managed
38
- description: "All secrets in a secrets manager with rotation. No hardcoded credentials in IaC files."
39
- severity: critical
40
- - id: encryption-at-rest
41
- description: "Storage volumes, databases, and backups encrypted at rest. KMS keys managed per environment."
42
- severity: important
43
-
44
- - id: infra-ops-review
45
- tier: recommended
46
- dimension: performance
47
- title: "Infrastructure Operations Review"
48
- description: |
49
- Evaluate monitoring, cost management, and operational readiness.
50
- checklist:
51
- - id: resource-limits
52
- description: "CPU and memory limits defined for all containers. No unbounded resource consumption."
53
- severity: critical
54
- - id: auto-scaling
55
- description: "Auto-scaling configured with sensible min/max. Tested under load."
56
- severity: important
57
- - id: cost-tagging
58
- description: "Every resource tagged with Environment, Service, Owner. Untagged resources flagged in CI."
59
- severity: important
60
- - id: monitoring-alerting
61
- description: "Dashboards and alerts for all services. Every alert has a runbook link and clear ownership."
62
- severity: important
63
- - id: backup-tested
64
- description: "Backup and restore procedures tested. RPO/RTO validated against targets."
65
- severity: critical
1
+ tag: INFRA
2
+ section: review
3
+ blocks:
4
+ - id: infra-architecture-review
5
+ tier: recommended
6
+ dimension: architecture
7
+ title: "Infrastructure Architecture Review"
8
+ description: |
9
+ Evaluate IaC patterns, resource organization, and environment management.
10
+ checklist:
11
+ - id: iac-all-resources
12
+ description: "All production resources defined in IaC. No manually created resources."
13
+ severity: critical
14
+ - id: module-reuse
15
+ description: "Common patterns (VPC, database, service) extracted into reusable modules, not copy-pasted."
16
+ severity: important
17
+ - id: environment-isolation
18
+ description: "Environments fully isolated: separate state files, separate accounts/projects where possible."
19
+ severity: critical
20
+ - id: blast-radius
21
+ description: "Stacks/modules scoped to limit blast radius. A bad deploy to one stack doesn't affect others."
22
+ severity: important
23
+
24
+ - id: infra-security-review
25
+ tier: recommended
26
+ dimension: code-quality
27
+ title: "Infrastructure Security Review"
28
+ description: |
29
+ Evaluate network security, access control, and secrets management.
30
+ checklist:
31
+ - id: least-privilege
32
+ description: "IAM roles use minimum required permissions. No wildcard actions on production."
33
+ severity: critical
34
+ - id: no-public-exposure
35
+ description: "Databases and application servers in private subnets. Only load balancers are publicly accessible."
36
+ severity: critical
37
+ - id: secrets-managed
38
+ description: "All secrets in a secrets manager with rotation. No hardcoded credentials in IaC files."
39
+ severity: critical
40
+ - id: encryption-at-rest
41
+ description: "Storage volumes, databases, and backups encrypted at rest. KMS keys managed per environment."
42
+ severity: important
43
+
44
+ - id: infra-ops-review
45
+ tier: recommended
46
+ dimension: performance
47
+ title: "Infrastructure Operations Review"
48
+ description: |
49
+ Evaluate monitoring, cost management, and operational readiness.
50
+ checklist:
51
+ - id: resource-limits
52
+ description: "CPU and memory limits defined for all containers. No unbounded resource consumption."
53
+ severity: critical
54
+ - id: auto-scaling
55
+ description: "Auto-scaling configured with sensible min/max. Tested under load."
56
+ severity: important
57
+ - id: cost-tagging
58
+ description: "Every resource tagged with Environment, Service, Owner. Untagged resources flagged in CI."
59
+ severity: important
60
+ - id: monitoring-alerting
61
+ description: "Dashboards and alerts for all services. Every alert has a runbook link and clear ownership."
62
+ severity: important
63
+ - id: backup-tested
64
+ description: "Backup and restore procedures tested. RPO/RTO validated against targets."
65
+ severity: critical