@jetrabbits/agentic 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (440) hide show
  1. package/AGENTS.md +143 -0
  2. package/README.md +154 -0
  3. package/agentic +1615 -0
  4. package/areas/devops/ci-cd/AGENTS.md +48 -0
  5. package/areas/devops/ci-cd/PROMPTS.md +7 -0
  6. package/areas/devops/ci-cd/prompts/onboard-repo.md +97 -0
  7. package/areas/devops/ci-cd/prompts/pipeline-debug.md +103 -0
  8. package/areas/devops/ci-cd/prompts/release-pipeline.md +115 -0
  9. package/areas/devops/ci-cd/rules/pipeline-standards.md +33 -0
  10. package/areas/devops/ci-cd/rules/quality-gates.md +24 -0
  11. package/areas/devops/ci-cd/rules/supply-chain-security.md +34 -0
  12. package/areas/devops/ci-cd/skills/artifact-management/SKILL.md +157 -0
  13. package/areas/devops/ci-cd/skills/build-optimization/SKILL.md +168 -0
  14. package/areas/devops/ci-cd/skills/github-actions-patterns/SKILL.md +190 -0
  15. package/areas/devops/ci-cd/skills/gitlab-ci-patterns/SKILL.md +169 -0
  16. package/areas/devops/ci-cd/skills/pipeline-security/SKILL.md +161 -0
  17. package/areas/devops/ci-cd/workflows/onboard-repo.md +73 -0
  18. package/areas/devops/ci-cd/workflows/pipeline-debug.md +66 -0
  19. package/areas/devops/ci-cd/workflows/release-pipeline.md +115 -0
  20. package/areas/devops/database-ops/AGENTS.md +47 -0
  21. package/areas/devops/database-ops/prompts/backup-verify.md +83 -0
  22. package/areas/devops/database-ops/prompts/db-incident.md +127 -0
  23. package/areas/devops/database-ops/rules/access-control.md +20 -0
  24. package/areas/devops/database-ops/rules/backup-policy.md +33 -0
  25. package/areas/devops/database-ops/rules/migration-runbook.md +32 -0
  26. package/areas/devops/database-ops/skills/backup-restore/SKILL.md +226 -0
  27. package/areas/devops/database-ops/skills/db-performance/SKILL.md +205 -0
  28. package/areas/devops/database-ops/skills/migration-safety/SKILL.md +155 -0
  29. package/areas/devops/database-ops/skills/postgres-operations/SKILL.md +156 -0
  30. package/areas/devops/database-ops/skills/redis-operations/SKILL.md +174 -0
  31. package/areas/devops/database-ops/workflows/backup-verify.md +107 -0
  32. package/areas/devops/database-ops/workflows/db-incident.md +86 -0
  33. package/areas/devops/devsecops/AGENTS.md +47 -0
  34. package/areas/devops/devsecops/prompts/policy-onboard.md +79 -0
  35. package/areas/devops/devsecops/prompts/security-scan-pipeline.md +131 -0
  36. package/areas/devops/devsecops/rules/container-security.md +22 -0
  37. package/areas/devops/devsecops/rules/policy-as-code.md +37 -0
  38. package/areas/devops/devsecops/rules/shift-left-policy.md +26 -0
  39. package/areas/devops/devsecops/skills/container-hardening/SKILL.md +146 -0
  40. package/areas/devops/devsecops/skills/opa-policies/SKILL.md +188 -0
  41. package/areas/devops/devsecops/skills/sbom-supply-chain/SKILL.md +165 -0
  42. package/areas/devops/devsecops/skills/secret-detection/SKILL.md +190 -0
  43. package/areas/devops/devsecops/skills/sigstore-signing/SKILL.md +184 -0
  44. package/areas/devops/devsecops/workflows/policy-onboard.md +104 -0
  45. package/areas/devops/devsecops/workflows/security-scan-pipeline.md +155 -0
  46. package/areas/devops/infrastructure/AGENTS.md +50 -0
  47. package/areas/devops/infrastructure/prompts/destroy-environment.md +81 -0
  48. package/areas/devops/infrastructure/prompts/drift-remediation.md +71 -0
  49. package/areas/devops/infrastructure/prompts/module-development.md +69 -0
  50. package/areas/devops/infrastructure/prompts/provision-environment.md +121 -0
  51. package/areas/devops/infrastructure/rules/iac-standards.md +80 -0
  52. package/areas/devops/infrastructure/rules/immutability.md +28 -0
  53. package/areas/devops/infrastructure/rules/secret-hygiene.md +53 -0
  54. package/areas/devops/infrastructure/rules/state-management.md +47 -0
  55. package/areas/devops/infrastructure/skills/ansible-playbooks/SKILL.md +174 -0
  56. package/areas/devops/infrastructure/skills/cost-optimization/SKILL.md +177 -0
  57. package/areas/devops/infrastructure/skills/drift-detection/SKILL.md +178 -0
  58. package/areas/devops/infrastructure/skills/state-management/SKILL.md +159 -0
  59. package/areas/devops/infrastructure/skills/terraform-modules/SKILL.md +169 -0
  60. package/areas/devops/infrastructure/workflows/destroy-environment.md +96 -0
  61. package/areas/devops/infrastructure/workflows/drift-remediation.md +66 -0
  62. package/areas/devops/infrastructure/workflows/module-development.md +101 -0
  63. package/areas/devops/infrastructure/workflows/provision-environment.md +96 -0
  64. package/areas/devops/kubernetes/AGENTS.md +57 -0
  65. package/areas/devops/kubernetes/PROMPTS.md +9 -0
  66. package/areas/devops/kubernetes/prompts/cluster-bootstrap.md +67 -0
  67. package/areas/devops/kubernetes/prompts/debug-workload.md +91 -0
  68. package/areas/devops/kubernetes/prompts/onboard-service.md +101 -0
  69. package/areas/devops/kubernetes/prompts/upgrade-cluster.md +63 -0
  70. package/areas/devops/kubernetes/rules/cluster-standards.md +51 -0
  71. package/areas/devops/kubernetes/rules/resource-governance.md +80 -0
  72. package/areas/devops/kubernetes/rules/upgrade-policy.md +52 -0
  73. package/areas/devops/kubernetes/rules/workload-security.md +64 -0
  74. package/areas/devops/kubernetes/skills/cluster-operations/SKILL.md +136 -0
  75. package/areas/devops/kubernetes/skills/helm-charts/SKILL.md +152 -0
  76. package/areas/devops/kubernetes/skills/network-policies/SKILL.md +169 -0
  77. package/areas/devops/kubernetes/skills/pod-troubleshooting/SKILL.md +129 -0
  78. package/areas/devops/kubernetes/skills/rbac-design/SKILL.md +148 -0
  79. package/areas/devops/kubernetes/skills/resource-tuning/SKILL.md +156 -0
  80. package/areas/devops/kubernetes/workflows/cluster-bootstrap.md +194 -0
  81. package/areas/devops/kubernetes/workflows/debug-workload.md +108 -0
  82. package/areas/devops/kubernetes/workflows/onboard-service.md +124 -0
  83. package/areas/devops/kubernetes/workflows/upgrade-cluster.md +165 -0
  84. package/areas/devops/networking/AGENTS.md +47 -0
  85. package/areas/devops/networking/prompts/onboard-ingress.md +119 -0
  86. package/areas/devops/networking/prompts/service-mesh-onboard.md +77 -0
  87. package/areas/devops/networking/rules/ingress-standards.md +17 -0
  88. package/areas/devops/networking/rules/network-segmentation.md +24 -0
  89. package/areas/devops/networking/rules/tls-policy.md +32 -0
  90. package/areas/devops/networking/skills/dns-management/SKILL.md +169 -0
  91. package/areas/devops/networking/skills/ingress-patterns/SKILL.md +165 -0
  92. package/areas/devops/networking/skills/service-mesh/SKILL.md +206 -0
  93. package/areas/devops/networking/skills/tls-termination/SKILL.md +198 -0
  94. package/areas/devops/networking/skills/vpc-design/SKILL.md +132 -0
  95. package/areas/devops/networking/workflows/onboard-ingress.md +64 -0
  96. package/areas/devops/networking/workflows/service-mesh-onboard.md +122 -0
  97. package/areas/devops/observability/AGENTS.md +48 -0
  98. package/areas/devops/observability/prompts/alert-investigation.md +117 -0
  99. package/areas/devops/observability/prompts/observability-stack-setup.md +99 -0
  100. package/areas/devops/observability/prompts/onboard-service-monitoring.md +79 -0
  101. package/areas/devops/observability/rules/alerting-standards.md +36 -0
  102. package/areas/devops/observability/rules/data-retention.md +19 -0
  103. package/areas/devops/observability/rules/golden-signals.md +28 -0
  104. package/areas/devops/observability/skills/distributed-tracing/SKILL.md +149 -0
  105. package/areas/devops/observability/skills/grafana-dashboards/SKILL.md +201 -0
  106. package/areas/devops/observability/skills/log-aggregation/SKILL.md +159 -0
  107. package/areas/devops/observability/skills/prometheus-alertmanager/SKILL.md +188 -0
  108. package/areas/devops/observability/skills/slo-implementation/SKILL.md +189 -0
  109. package/areas/devops/observability/workflows/alert-investigation.md +98 -0
  110. package/areas/devops/observability/workflows/observability-stack-setup.md +156 -0
  111. package/areas/devops/observability/workflows/onboard-service-monitoring.md +83 -0
  112. package/areas/devops/sre/AGENTS.md +48 -0
  113. package/areas/devops/sre/prompts/incident-response.md +129 -0
  114. package/areas/devops/sre/prompts/postmortem.md +101 -0
  115. package/areas/devops/sre/prompts/slo-review.md +125 -0
  116. package/areas/devops/sre/rules/error-budget-policy.md +25 -0
  117. package/areas/devops/sre/rules/on-call-standards.md +25 -0
  118. package/areas/devops/sre/rules/slo-policy.md +31 -0
  119. package/areas/devops/sre/skills/capacity-planning/SKILL.md +162 -0
  120. package/areas/devops/sre/skills/chaos-engineering/SKILL.md +186 -0
  121. package/areas/devops/sre/skills/incident-command/SKILL.md +119 -0
  122. package/areas/devops/sre/skills/postmortem-analysis/SKILL.md +104 -0
  123. package/areas/devops/sre/skills/slo-sli-design/SKILL.md +145 -0
  124. package/areas/devops/sre/workflows/incident-response.md +66 -0
  125. package/areas/devops/sre/workflows/postmortem.md +90 -0
  126. package/areas/devops/sre/workflows/slo-review.md +95 -0
  127. package/areas/software/backend/AGENTS.md +59 -0
  128. package/areas/software/backend/PROMPTS.md +50 -0
  129. package/areas/software/backend/README.md +48 -0
  130. package/areas/software/backend/prompts/add-migration.md +93 -0
  131. package/areas/software/backend/prompts/create-endpoint.md +97 -0
  132. package/areas/software/backend/prompts/debug-issue.md +87 -0
  133. package/areas/software/backend/prompts/develop-epic.md +83 -0
  134. package/areas/software/backend/prompts/develop-feature.md +91 -0
  135. package/areas/software/backend/prompts/refactor-module.md +79 -0
  136. package/areas/software/backend/prompts/test-feature.md +89 -0
  137. package/areas/software/backend/rules/architecture.md +20 -0
  138. package/areas/software/backend/rules/data_access.md +20 -0
  139. package/areas/software/backend/rules/security.md +20 -0
  140. package/areas/software/backend/rules/testing.md +19 -0
  141. package/areas/software/backend/skills/api-design/SKILL.md +170 -0
  142. package/areas/software/backend/skills/async-processing/SKILL.md +152 -0
  143. package/areas/software/backend/skills/database-modeling/SKILL.md +173 -0
  144. package/areas/software/backend/skills/observability/SKILL.md +162 -0
  145. package/areas/software/backend/skills/troubleshooting/SKILL.md +139 -0
  146. package/areas/software/backend/workflows/add-migration.md +79 -0
  147. package/areas/software/backend/workflows/create-endpoint.md +89 -0
  148. package/areas/software/backend/workflows/debug-issue.md +77 -0
  149. package/areas/software/backend/workflows/develop-epic.md +78 -0
  150. package/areas/software/backend/workflows/develop-feature.md +98 -0
  151. package/areas/software/backend/workflows/refactor-module.md +73 -0
  152. package/areas/software/backend/workflows/test-feature.md +67 -0
  153. package/areas/software/data-engineering/AGENTS.md +59 -0
  154. package/areas/software/data-engineering/PROMPTS.md +32 -0
  155. package/areas/software/data-engineering/prompts/backfill-data.md +107 -0
  156. package/areas/software/data-engineering/prompts/data-quality-incident.md +109 -0
  157. package/areas/software/data-engineering/prompts/lineage-trace.md +121 -0
  158. package/areas/software/data-engineering/prompts/new-model.md +117 -0
  159. package/areas/software/data-engineering/prompts/schema-migration.md +111 -0
  160. package/areas/software/data-engineering/rules/data-governance.md +11 -0
  161. package/areas/software/data-engineering/rules/pii-handling.md +19 -0
  162. package/areas/software/data-engineering/rules/pipeline-integrity.md +11 -0
  163. package/areas/software/data-engineering/rules/schema-management.md +21 -0
  164. package/areas/software/data-engineering/skills/data-modeling/SKILL.md +49 -0
  165. package/areas/software/data-engineering/skills/dbt-patterns/SKILL.md +43 -0
  166. package/areas/software/data-engineering/skills/lineage-governance/SKILL.md +38 -0
  167. package/areas/software/data-engineering/skills/orchestration/SKILL.md +35 -0
  168. package/areas/software/data-engineering/skills/quality-checks/SKILL.md +50 -0
  169. package/areas/software/data-engineering/skills/sql-optimization/SKILL.md +47 -0
  170. package/areas/software/data-engineering/skills/streaming-patterns/SKILL.md +48 -0
  171. package/areas/software/data-engineering/workflows/backfill-data.md +59 -0
  172. package/areas/software/data-engineering/workflows/data-quality-incident.md +64 -0
  173. package/areas/software/data-engineering/workflows/lineage-trace.md +56 -0
  174. package/areas/software/data-engineering/workflows/new-model.md +71 -0
  175. package/areas/software/data-engineering/workflows/schema-migration.md +67 -0
  176. package/areas/software/frontend/AGENTS.md +60 -0
  177. package/areas/software/frontend/PROMPTS.md +32 -0
  178. package/areas/software/frontend/prompts/a11y-fix.md +75 -0
  179. package/areas/software/frontend/prompts/bundle-analyze.md +75 -0
  180. package/areas/software/frontend/prompts/release-prep.md +83 -0
  181. package/areas/software/frontend/prompts/scaffold-component.md +69 -0
  182. package/areas/software/frontend/prompts/visual-regression.md +73 -0
  183. package/areas/software/frontend/rules/accessibility.md +16 -0
  184. package/areas/software/frontend/rules/architecture.md +29 -0
  185. package/areas/software/frontend/rules/performance.md +23 -0
  186. package/areas/software/frontend/rules/quality.md +12 -0
  187. package/areas/software/frontend/skills/a11y-audit/SKILL.md +61 -0
  188. package/areas/software/frontend/skills/api-integration/SKILL.md +58 -0
  189. package/areas/software/frontend/skills/component-design/SKILL.md +171 -0
  190. package/areas/software/frontend/skills/css-architecture/SKILL.md +146 -0
  191. package/areas/software/frontend/skills/error-handling/SKILL.md +55 -0
  192. package/areas/software/frontend/skills/performance-tuning/SKILL.md +58 -0
  193. package/areas/software/frontend/skills/state-management/SKILL.md +54 -0
  194. package/areas/software/frontend/skills/testing-patterns/SKILL.md +69 -0
  195. package/areas/software/frontend/workflows/a11y-fix.md +63 -0
  196. package/areas/software/frontend/workflows/bundle-analyze.md +56 -0
  197. package/areas/software/frontend/workflows/release-prep.md +66 -0
  198. package/areas/software/frontend/workflows/scaffold-component.md +67 -0
  199. package/areas/software/frontend/workflows/visual-regression.md +65 -0
  200. package/areas/software/full-stack/AGENTS.md +72 -0
  201. package/areas/software/full-stack/PROMPTS.md +66 -0
  202. package/areas/software/full-stack/prompts/backend-project-full-cycle.md +141 -0
  203. package/areas/software/full-stack/prompts/debug-issue.md +115 -0
  204. package/areas/software/full-stack/prompts/develop-feature.md +119 -0
  205. package/areas/software/full-stack/prompts/feature-implementation-flow.md +137 -0
  206. package/areas/software/full-stack/prompts/testing-ci-pipeline.md +119 -0
  207. package/areas/software/full-stack/rules/api-design-guide.md +24 -0
  208. package/areas/software/full-stack/rules/async-concurrency-guide.md +21 -0
  209. package/areas/software/full-stack/rules/backend-architecture-rule.md +41 -0
  210. package/areas/software/full-stack/rules/background-jobs-guide.md +20 -0
  211. package/areas/software/full-stack/rules/code-quality-guide.md +22 -0
  212. package/areas/software/full-stack/rules/database-access-guide.md +24 -0
  213. package/areas/software/full-stack/rules/database-migrations-guide.md +24 -0
  214. package/areas/software/full-stack/rules/domain-models-guide.md +28 -0
  215. package/areas/software/full-stack/rules/e2e-test-guide.md +18 -0
  216. package/areas/software/full-stack/rules/env-settings-guide.md +34 -0
  217. package/areas/software/full-stack/rules/error-handling-guide.md +20 -0
  218. package/areas/software/full-stack/rules/logging-observability-guide.md +22 -0
  219. package/areas/software/full-stack/rules/project-guide.md +34 -0
  220. package/areas/software/full-stack/rules/python-venv-guide.md +23 -0
  221. package/areas/software/full-stack/rules/security-guide.md +22 -0
  222. package/areas/software/full-stack/rules/svt-test-guide.md +17 -0
  223. package/areas/software/full-stack/rules/testing-ci-guide.md +25 -0
  224. package/areas/software/full-stack/skills/api-design-principles/SKILL.md +125 -0
  225. package/areas/software/full-stack/skills/api-design-principles/assets/api-design-checklist.md +155 -0
  226. package/areas/software/full-stack/skills/api-design-principles/assets/rest-api-template.py +182 -0
  227. package/areas/software/full-stack/skills/api-design-principles/references/graphql-schema-design.md +583 -0
  228. package/areas/software/full-stack/skills/api-design-principles/references/rest-best-practices.md +408 -0
  229. package/areas/software/full-stack/skills/api-design-principles/resources/implementation-playbook.md +513 -0
  230. package/areas/software/full-stack/skills/api-patterns/SKILL.md +81 -0
  231. package/areas/software/full-stack/skills/api-patterns/api-style.md +42 -0
  232. package/areas/software/full-stack/skills/api-patterns/auth.md +24 -0
  233. package/areas/software/full-stack/skills/api-patterns/documentation.md +26 -0
  234. package/areas/software/full-stack/skills/api-patterns/graphql.md +41 -0
  235. package/areas/software/full-stack/skills/api-patterns/rate-limiting.md +31 -0
  236. package/areas/software/full-stack/skills/api-patterns/response.md +37 -0
  237. package/areas/software/full-stack/skills/api-patterns/rest.md +40 -0
  238. package/areas/software/full-stack/skills/api-patterns/scripts/api_validator.py +211 -0
  239. package/areas/software/full-stack/skills/api-patterns/security-testing.md +122 -0
  240. package/areas/software/full-stack/skills/api-patterns/trpc.md +41 -0
  241. package/areas/software/full-stack/skills/api-patterns/versioning.md +22 -0
  242. package/areas/software/full-stack/skills/app-builder/SKILL.md +135 -0
  243. package/areas/software/full-stack/skills/app-builder/agent-coordination.md +71 -0
  244. package/areas/software/full-stack/skills/app-builder/feature-building.md +53 -0
  245. package/areas/software/full-stack/skills/app-builder/project-detection.md +34 -0
  246. package/areas/software/full-stack/skills/app-builder/scaffolding.md +118 -0
  247. package/areas/software/full-stack/skills/app-builder/tech-stack.md +40 -0
  248. package/areas/software/full-stack/skills/app-builder/templates/SKILL.md +39 -0
  249. package/areas/software/full-stack/skills/app-builder/templates/astro-static/TEMPLATE.md +76 -0
  250. package/areas/software/full-stack/skills/app-builder/templates/chrome-extension/TEMPLATE.md +92 -0
  251. package/areas/software/full-stack/skills/app-builder/templates/cli-tool/TEMPLATE.md +88 -0
  252. package/areas/software/full-stack/skills/app-builder/templates/electron-desktop/TEMPLATE.md +88 -0
  253. package/areas/software/full-stack/skills/app-builder/templates/express-api/TEMPLATE.md +83 -0
  254. package/areas/software/full-stack/skills/app-builder/templates/flutter-app/TEMPLATE.md +90 -0
  255. package/areas/software/full-stack/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +90 -0
  256. package/areas/software/full-stack/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +82 -0
  257. package/areas/software/full-stack/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +100 -0
  258. package/areas/software/full-stack/skills/app-builder/templates/nextjs-static/TEMPLATE.md +106 -0
  259. package/areas/software/full-stack/skills/app-builder/templates/nuxt-app/TEMPLATE.md +101 -0
  260. package/areas/software/full-stack/skills/app-builder/templates/python-fastapi/TEMPLATE.md +83 -0
  261. package/areas/software/full-stack/skills/app-builder/templates/react-native-app/TEMPLATE.md +93 -0
  262. package/areas/software/full-stack/skills/backend-developer/SKILL.md +58 -0
  263. package/areas/software/full-stack/skills/bash-pro/SKILL.md +310 -0
  264. package/areas/software/full-stack/skills/blackbox-test/SKILL.md +84 -0
  265. package/areas/software/full-stack/skills/prompt-project-planner/SKILL.md +130 -0
  266. package/areas/software/full-stack/skills/prompt-project-planner/output.schema.md +68 -0
  267. package/areas/software/full-stack/skills/prompt-project-planner/questions.md +80 -0
  268. package/areas/software/full-stack/skills/python-pro/SKILL.md +158 -0
  269. package/areas/software/full-stack/skills/skill-creator/LICENSE.txt +202 -0
  270. package/areas/software/full-stack/skills/skill-creator/SKILL.md +356 -0
  271. package/areas/software/full-stack/skills/skill-creator/references/output-patterns.md +82 -0
  272. package/areas/software/full-stack/skills/skill-creator/references/workflows.md +28 -0
  273. package/areas/software/full-stack/skills/skill-creator/scripts/init_skill.py +303 -0
  274. package/areas/software/full-stack/skills/skill-creator/scripts/package_skill.py +110 -0
  275. package/areas/software/full-stack/skills/skill-creator/scripts/quick_validate.py +95 -0
  276. package/areas/software/full-stack/workflows/backend-project-full-cycle.md +132 -0
  277. package/areas/software/full-stack/workflows/debug-issue.md +70 -0
  278. package/areas/software/full-stack/workflows/develop-feature.md +85 -0
  279. package/areas/software/full-stack/workflows/feature-implementation-flow.md +78 -0
  280. package/areas/software/full-stack/workflows/testing-ci-pipeline.md +65 -0
  281. package/areas/software/general/AGENTS.md +68 -0
  282. package/areas/software/general/prompts/code-review-workflow.md +87 -0
  283. package/areas/software/general/prompts/development-cycle-workflow.md +83 -0
  284. package/areas/software/general/prompts/project-setup-workflow.md +93 -0
  285. package/areas/software/general/rules/code-style-guide.md +31 -0
  286. package/areas/software/general/rules/docker-compose-guide.md +27 -0
  287. package/areas/software/general/rules/git-workflow-guide.md +27 -0
  288. package/areas/software/general/rules/github-workflow-guide.md +27 -0
  289. package/areas/software/general/rules/gitlab-ci-guide.md +27 -0
  290. package/areas/software/general/rules/lint-format-guide.md +29 -0
  291. package/areas/software/general/rules/makefile-guide.md +34 -0
  292. package/areas/software/general/rules/readme-sync-guide.md +40 -0
  293. package/areas/software/general/rules/sdlc-methodology-guide.md +27 -0
  294. package/areas/software/general/rules/sdlc-role-responsibilities.md +108 -0
  295. package/areas/software/general/skills/general-dev-tools/SKILL.md +324 -0
  296. package/areas/software/general/workflows/code-review-workflow.md +84 -0
  297. package/areas/software/general/workflows/development-cycle-workflow.md +85 -0
  298. package/areas/software/general/workflows/project-setup-workflow.md +94 -0
  299. package/areas/software/mlops/AGENTS.md +57 -0
  300. package/areas/software/mlops/PROMPTS.md +32 -0
  301. package/areas/software/mlops/prompts/champion-challenger.md +87 -0
  302. package/areas/software/mlops/prompts/deploy-endpoint.md +91 -0
  303. package/areas/software/mlops/prompts/evaluate-model.md +87 -0
  304. package/areas/software/mlops/prompts/model-incident.md +87 -0
  305. package/areas/software/mlops/prompts/train-experiment.md +83 -0
  306. package/areas/software/mlops/rules/data-integrity.md +9 -0
  307. package/areas/software/mlops/rules/model-governance.md +9 -0
  308. package/areas/software/mlops/rules/production-safety.md +9 -0
  309. package/areas/software/mlops/rules/reproducibility.md +9 -0
  310. package/areas/software/mlops/skills/experiment-tracking/SKILL.md +29 -0
  311. package/areas/software/mlops/skills/feature-engineering/SKILL.md +44 -0
  312. package/areas/software/mlops/skills/inference-serving/SKILL.md +35 -0
  313. package/areas/software/mlops/skills/model-evaluation/SKILL.md +40 -0
  314. package/areas/software/mlops/skills/model-monitoring/SKILL.md +32 -0
  315. package/areas/software/mlops/workflows/champion-challenger.md +65 -0
  316. package/areas/software/mlops/workflows/deploy-endpoint.md +70 -0
  317. package/areas/software/mlops/workflows/evaluate-model.md +63 -0
  318. package/areas/software/mlops/workflows/model-incident.md +64 -0
  319. package/areas/software/mlops/workflows/train-experiment.md +56 -0
  320. package/areas/software/mobile/AGENTS.md +58 -0
  321. package/areas/software/mobile/PROMPTS.md +32 -0
  322. package/areas/software/mobile/prompts/crash-triage.md +63 -0
  323. package/areas/software/mobile/prompts/device-testing.md +83 -0
  324. package/areas/software/mobile/prompts/ota-update.md +75 -0
  325. package/areas/software/mobile/prompts/release-build.md +67 -0
  326. package/areas/software/mobile/prompts/store-submission.md +79 -0
  327. package/areas/software/mobile/rules/offline-first.md +10 -0
  328. package/areas/software/mobile/rules/performance-budget.md +20 -0
  329. package/areas/software/mobile/rules/platform-compliance.md +17 -0
  330. package/areas/software/mobile/rules/security-mobile.md +9 -0
  331. package/areas/software/mobile/skills/app-store-prep/SKILL.md +27 -0
  332. package/areas/software/mobile/skills/mobile-testing/SKILL.md +36 -0
  333. package/areas/software/mobile/skills/native-modules/SKILL.md +38 -0
  334. package/areas/software/mobile/skills/navigation-patterns/SKILL.md +49 -0
  335. package/areas/software/mobile/skills/push-notifications/SKILL.md +40 -0
  336. package/areas/software/mobile/skills/state-sync/SKILL.md +48 -0
  337. package/areas/software/mobile/workflows/crash-triage.md +63 -0
  338. package/areas/software/mobile/workflows/device-testing.md +54 -0
  339. package/areas/software/mobile/workflows/ota-update.md +54 -0
  340. package/areas/software/mobile/workflows/release-build.md +67 -0
  341. package/areas/software/mobile/workflows/store-submission.md +63 -0
  342. package/areas/software/platform/AGENTS.md +67 -0
  343. package/areas/software/platform/PROMPTS.md +32 -0
  344. package/areas/software/platform/prompts/cost-audit.md +117 -0
  345. package/areas/software/platform/prompts/deploy-production.md +109 -0
  346. package/areas/software/platform/prompts/drift-check.md +107 -0
  347. package/areas/software/platform/prompts/incident-response.md +121 -0
  348. package/areas/software/platform/prompts/provision-env.md +113 -0
  349. package/areas/software/platform/rules/cost-governance.md +11 -0
  350. package/areas/software/platform/rules/immutability.md +17 -0
  351. package/areas/software/platform/rules/reliability.md +19 -0
  352. package/areas/software/platform/rules/security-posture.md +12 -0
  353. package/areas/software/platform/skills/ci-cd-pipelines/SKILL.md +58 -0
  354. package/areas/software/platform/skills/incident-response/SKILL.md +41 -0
  355. package/areas/software/platform/skills/k8s-manifests/SKILL.md +56 -0
  356. package/areas/software/platform/skills/networking/SKILL.md +44 -0
  357. package/areas/software/platform/skills/observability-setup/SKILL.md +49 -0
  358. package/areas/software/platform/skills/secrets-management/SKILL.md +43 -0
  359. package/areas/software/platform/skills/terraform-patterns/SKILL.md +75 -0
  360. package/areas/software/platform/workflows/cost-audit.md +61 -0
  361. package/areas/software/platform/workflows/deploy-production.md +67 -0
  362. package/areas/software/platform/workflows/drift-check.md +61 -0
  363. package/areas/software/platform/workflows/incident-response.md +69 -0
  364. package/areas/software/platform/workflows/provision-env.md +77 -0
  365. package/areas/software/qa/AGENTS.md +58 -0
  366. package/areas/software/qa/PROMPTS.md +32 -0
  367. package/areas/software/qa/prompts/flakiness-investigation.md +61 -0
  368. package/areas/software/qa/prompts/performance-audit.md +65 -0
  369. package/areas/software/qa/prompts/regression-suite.md +61 -0
  370. package/areas/software/qa/prompts/smoke-test.md +65 -0
  371. package/areas/software/qa/prompts/test-coverage-report.md +61 -0
  372. package/areas/software/qa/rules/flakiness-policy.md +12 -0
  373. package/areas/software/qa/rules/quality-gates.md +28 -0
  374. package/areas/software/qa/rules/test-data.md +9 -0
  375. package/areas/software/qa/rules/test-strategy.md +11 -0
  376. package/areas/software/qa/skills/accessibility-testing/SKILL.md +139 -0
  377. package/areas/software/qa/skills/api-testing/SKILL.md +140 -0
  378. package/areas/software/qa/skills/e2e-patterns/SKILL.md +152 -0
  379. package/areas/software/qa/skills/performance-testing/SKILL.md +177 -0
  380. package/areas/software/qa/skills/test-data-management/SKILL.md +161 -0
  381. package/areas/software/qa/skills/test-pyramid/SKILL.md +127 -0
  382. package/areas/software/qa/workflows/flakiness-investigation.md +63 -0
  383. package/areas/software/qa/workflows/performance-audit.md +59 -0
  384. package/areas/software/qa/workflows/regression-suite.md +59 -0
  385. package/areas/software/qa/workflows/smoke-test.md +64 -0
  386. package/areas/software/qa/workflows/test-coverage-report.md +57 -0
  387. package/areas/software/security/AGENTS.md +58 -0
  388. package/areas/software/security/PROMPTS.md +32 -0
  389. package/areas/software/security/prompts/compliance-report.md +113 -0
  390. package/areas/software/security/prompts/pen-test-sim.md +113 -0
  391. package/areas/software/security/prompts/secret-rotation.md +115 -0
  392. package/areas/software/security/prompts/security-scan.md +91 -0
  393. package/areas/software/security/prompts/threat-model-review.md +105 -0
  394. package/areas/software/security/rules/compliance-baseline.md +23 -0
  395. package/areas/software/security/rules/dependency-policy.md +12 -0
  396. package/areas/software/security/rules/secrets-policy.md +22 -0
  397. package/areas/software/security/rules/secure-coding.md +22 -0
  398. package/areas/software/security/skills/auth-patterns/SKILL.md +42 -0
  399. package/areas/software/security/skills/crypto-standards/SKILL.md +42 -0
  400. package/areas/software/security/skills/dependency-audit/SKILL.md +29 -0
  401. package/areas/software/security/skills/sast-dast-interpretation/SKILL.md +33 -0
  402. package/areas/software/security/skills/security-headers/SKILL.md +29 -0
  403. package/areas/software/security/skills/threat-modeling/SKILL.md +36 -0
  404. package/areas/software/security/workflows/compliance-report.md +57 -0
  405. package/areas/software/security/workflows/pen-test-sim.md +63 -0
  406. package/areas/software/security/workflows/secret-rotation.md +67 -0
  407. package/areas/software/security/workflows/security-scan.md +64 -0
  408. package/areas/software/security/workflows/threat-model-review.md +62 -0
  409. package/areas/template/AGENTS-area.tmpl.md +61 -0
  410. package/areas/template/AGENTS.tmpl.md +67 -0
  411. package/areas/template/GUIDE.md +102 -0
  412. package/areas/template/PROMPTS.tmpl.md +29 -0
  413. package/areas/template/README.md +57 -0
  414. package/areas/template/README.tmpl.md +51 -0
  415. package/areas/template/prompt.tmpl.md +101 -0
  416. package/areas/template/rule.tmpl.md +71 -0
  417. package/areas/template/skill.tmpl.md +108 -0
  418. package/areas/template/workflow.tmpl.md +104 -0
  419. package/bin/agentic.js +24 -0
  420. package/extensions/antigravity/GEMINI.md +10 -0
  421. package/extensions/claude/CLAUDE.md +10 -0
  422. package/extensions/codex/AGENTS.override.md +93 -0
  423. package/extensions/gemini/GEMINI.md +10 -0
  424. package/extensions/opencode/agents/designer.md +65 -0
  425. package/extensions/opencode/agents/developer.md +63 -0
  426. package/extensions/opencode/agents/devops-engineer.md +69 -0
  427. package/extensions/opencode/agents/pm.md +61 -0
  428. package/extensions/opencode/agents/product-owner.md +76 -0
  429. package/extensions/opencode/agents/qa.md +66 -0
  430. package/extensions/opencode/agents/team-lead.md +67 -0
  431. package/extensions/opencode/commands/feature.md +75 -0
  432. package/extensions/opencode/opencode.json +93 -0
  433. package/extensions/opencode/plugins/model-checker.json +14 -0
  434. package/extensions/opencode/plugins/model-checker.ts +279 -0
  435. package/extensions/opencode/plugins/sound-notification.ts +13 -0
  436. package/extensions/opencode/plugins/telegram-notification.ts +86 -0
  437. package/extensions/opencode/skills/code_review_expert/SKILL.md +144 -0
  438. package/extensions/opencode/skills/design_expert/SKILL.md +42 -0
  439. package/extensions/opencode/skills/qa_expert/SKILL.md +116 -0
  440. package/package.json +19 -0
@@ -0,0 +1,156 @@
1
+ ---
2
+ name: observability-stack-setup
3
+ type: workflow
4
+ trigger: /observability-stack-setup
5
+ description: Deploy the full observability stack (Prometheus + Loki + Tempo + Grafana) to a Kubernetes cluster from scratch.
6
+ inputs:
7
+ - cluster_name
8
+ - storage_class
9
+ - retention_days_metrics
10
+ - retention_days_logs
11
+ outputs:
12
+ - running_observability_stack
13
+ - grafana_url
14
+ - setup_report
15
+ roles:
16
+ - devops-engineer
17
+ execution:
18
+ initiator: developer
19
+ related-rules:
20
+ - golden-signals.md
21
+ - alerting-standards.md
22
+ - data-retention.md
23
+ uses-skills:
24
+ - prometheus-alertmanager
25
+ - grafana-dashboards
26
+ - log-aggregation
27
+ - distributed-tracing
28
+ - slo-implementation
29
+ quality-gates:
30
+ - all components healthy (Prometheus targets UP)
31
+ - sample alert fires and reaches Alertmanager
32
+ - Grafana shows data from all three pillars (metrics/logs/traces)
33
+ ---
34
+
35
+ ## Steps
36
+
37
+ ### 1. Namespace & Prerequisites — `@devops-engineer`
38
+ ```bash
39
+ kubectl create namespace monitoring
40
+ kubectl create namespace logging
41
+ kubectl create namespace tracing
42
+
43
+ # Add Helm repos
44
+ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
45
+ helm repo add grafana https://grafana.github.io/helm-charts
46
+ helm repo update
47
+ ```
48
+
49
+ ### 2. kube-prometheus-stack (Prometheus + Grafana + Alertmanager) — `@devops-engineer`
50
+ ```bash
51
+ helm upgrade --install kube-prometheus-stack \
52
+ prometheus-community/kube-prometheus-stack \
53
+ -n monitoring \
54
+ -f infra/observability/prometheus-values.yaml \
55
+ --create-namespace
56
+
57
+ # prometheus-values.yaml (key sections)
58
+ # prometheus:
59
+ # prometheusSpec:
60
+ # retention: 15d
61
+ # storageSpec:
62
+ # volumeClaimTemplate:
63
+ # spec:
64
+ # storageClassName: longhorn
65
+ # resources: { requests: { storage: 50Gi } }
66
+ # alertmanager:
67
+ # config: <alertmanager routing config>
68
+ # grafana:
69
+ # adminPassword: <from Vault>
70
+ # persistence: { enabled: true, storageClassName: longhorn, size: 10Gi }
71
+ ```
72
+ - Verify: `kubectl get pods -n monitoring` — all Running
73
+ - Check Prometheus targets: `kubectl port-forward svc/kube-prometheus-stack-prometheus 9090:9090 -n monitoring`
74
+
75
+ ### 3. Loki + Promtail (Logs) — `@devops-engineer`
76
+ ```bash
77
+ helm upgrade --install loki grafana/loki-stack \
78
+ -n logging \
79
+ -f infra/observability/loki-values.yaml \
80
+ --create-namespace
81
+
82
+ # loki-values.yaml key settings:
83
+ # loki.config.limits_config.retention_period: 720h (30d)
84
+ # promtail.config.clients[0].url: http://loki.logging:3100/loki/api/v1/push
85
+ ```
86
+ - Add Loki datasource in Grafana: `http://loki.logging:3100`
87
+ - Verify: `{job="loki"}` returns logs in Grafana Explore
88
+
89
+ ### 4. Tempo (Traces) — `@devops-engineer`
90
+ ```bash
91
+ helm upgrade --install tempo grafana/tempo \
92
+ -n tracing \
93
+ -f infra/observability/tempo-values.yaml \
94
+ --create-namespace
95
+
96
+ # tempo-values.yaml key settings:
97
+ # tempo.retention: 168h (7d)
98
+ # persistence.enabled: true
99
+ # tempo.receivers.otlp.protocols.grpc.endpoint: 0.0.0.0:4317
100
+ ```
101
+ - Add Tempo datasource in Grafana: `http://tempo.tracing:3100`
102
+ - Configure trace-to-log correlation: set Loki derived field `traceID` → Tempo URL
103
+
104
+ ### 5. OpenTelemetry Collector (DaemonSet) — `@devops-engineer`
105
+ ```bash
106
+ helm upgrade --install otel-collector open-telemetry/opentelemetry-collector \
107
+ -n monitoring \
108
+ -f infra/observability/otel-collector-values.yaml
109
+ ```
110
+ - Accepts OTLP from apps (port 4317 gRPC, 4318 HTTP)
111
+ - Forwards to Tempo
112
+
113
+ ### 6. Validate Stack — `@devops-engineer`
114
+ ```bash
115
+ # Test Prometheus query
116
+ kubectl exec -n monitoring deploy/prometheus -- \
117
+ wget -qO- 'http://localhost:9090/api/v1/query?query=up' | jq '.data.result | length'
118
+
119
+ # Test alert routing: create test alert
120
+ kubectl apply -f - << 'YAML'
121
+ apiVersion: monitoring.coreos.com/v1
122
+ kind: PrometheusRule
123
+ metadata:
124
+ name: test-alert
125
+ namespace: monitoring
126
+ spec:
127
+ groups:
128
+ - name: test
129
+ rules:
130
+ - alert: TestAlert
131
+ expr: vector(1) # always fires
132
+ labels: { severity: warning }
133
+ annotations: { summary: "Test alert for stack validation" }
134
+ YAML
135
+ # Check Alertmanager received it: kubectl port-forward svc/alertmanager 9093:9093 -n monitoring
136
+
137
+ # Test logs visible
138
+ kubectl port-forward svc/grafana 3000:80 -n monitoring
139
+ # Open Grafana → Explore → Loki → {job="monitoring"} → should show logs
140
+ ```
141
+
142
+ ### 7. Import Dashboards — `@devops-engineer`
143
+ ```bash
144
+ # Apply standard dashboard ConfigMaps (GitOps)
145
+ kubectl apply -f infra/observability/dashboards/ -n monitoring
146
+
147
+ # Or import via Grafana API
148
+ for dashboard in infra/observability/dashboards/*.json; do
149
+ curl -X POST http://admin:${GRAFANA_PASS}@localhost:3000/api/dashboards/import \
150
+ -H 'Content-Type: application/json' \
151
+ -d "{\"dashboard\": $(cat $dashboard), \"overwrite\": true}"
152
+ done
153
+ ```
154
+
155
+ ## Exit
156
+ All 4 components healthy + test alert fired + dashboards showing data = stack deployed.
@@ -0,0 +1,83 @@
1
+ ---
2
+ name: onboard-service-monitoring
3
+ type: workflow
4
+ trigger: /onboard-service-monitoring
5
+ description: Add full observability (metrics, logs, traces, alerts, dashboard) to an existing service.
6
+ inputs:
7
+ - service_name
8
+ - namespace
9
+ - language/framework
10
+ outputs:
11
+ - running_metrics_scrape
12
+ - grafana_dashboard
13
+ - alert_rules_deployed
14
+ roles:
15
+ - devops-engineer
16
+ - developer
17
+ execution:
18
+ initiator: developer
19
+ related-rules:
20
+ - golden-signals.md
21
+ - alerting-standards.md
22
+ uses-skills:
23
+ - prometheus-alertmanager
24
+ - grafana-dashboards
25
+ - distributed-tracing
26
+ - log-aggregation
27
+ quality-gates:
28
+ - all four golden signals visible in Prometheus
29
+ - at least one critical alert deployed with runbook
30
+ - logs visible in Loki with trace_id field
31
+ ---
32
+
33
+ ## Steps
34
+
35
+ ### 1. Metrics Instrumentation — `@developer`
36
+ - Add Prometheus client library to service
37
+ - Expose standard HTTP metrics (requests_total, duration histogram, active_requests)
38
+ - Expose `/metrics` endpoint on port 9090 (or sidecar annotation)
39
+ - **Done when:** `curl http://<pod-ip>:9090/metrics` returns Prometheus format
40
+
41
+ ### 2. ServiceMonitor — `@devops-engineer`
42
+ ```yaml
43
+ apiVersion: monitoring.coreos.com/v1
44
+ kind: ServiceMonitor
45
+ metadata:
46
+ name: ${SERVICE}
47
+ namespace: ${NAMESPACE}
48
+ spec:
49
+ selector:
50
+ matchLabels: { app: ${SERVICE} }
51
+ endpoints:
52
+ - port: metrics
53
+ interval: 15s
54
+ path: /metrics
55
+ ```
56
+ - **Done when:** Prometheus targets page shows service as UP
57
+
58
+ ### 3. Tracing Instrumentation — `@developer`
59
+ - Add OpenTelemetry SDK (or use K8s operator auto-injection)
60
+ - Configure OTLP exporter → otel-collector:4317
61
+ - Verify trace_id appears in application logs
62
+ - **Done when:** traces visible in Tempo; trace_id in logs
63
+
64
+ ### 4. Log Labels — `@devops-engineer`
65
+ - Verify Promtail/Fluent Bit picks up pod logs
66
+ - Confirm JSON parsing works: `{namespace="${NS}", app="${SERVICE}"} | json`
67
+ - Add log-based alert if service emits structured error logs
68
+ - **Done when:** logs searchable in Loki with level + trace_id fields
69
+
70
+ ### 5. Alert Rules — `@devops-engineer`
71
+ - Create `PrometheusRule` with golden signal alerts (HighErrorRate, HighP99Latency, PodMemoryPressure)
72
+ - Write runbook for each alert in `docs/runbooks/`
73
+ - Test alert firing: temporarily lower threshold, verify Alertmanager receives it
74
+ - **Done when:** all alerts show in Prometheus rules page; test fire works
75
+
76
+ ### 6. Grafana Dashboard — `@devops-engineer`
77
+ - Import standard service overview dashboard template
78
+ - Customize: add service-specific panels (queue depth, custom business metrics)
79
+ - Link trace panel (Tempo datasource) to request duration panel
80
+ - **Done when:** dashboard saved in `infra/dashboards/`; Grafana shows live data
81
+
82
+ ## Exit
83
+ Golden signals in Prometheus + logs in Loki + traces in Tempo + alerts deployed + dashboard live = service monitored.
@@ -0,0 +1,48 @@
1
+ # SRE — guidance index
2
+
3
+ ## What this area covers
4
+
5
+ Site reliability engineering: SLO/SLI design, error budget policy, chaos engineering, capacity planning, incident command, and post-mortem facilitation. The SRE area treats reliability as a measurable feature with a finite budget — not a vague aspiration.
6
+
7
+ ## Guidance chain
8
+
9
+ 1. Project `.agent/` baseline
10
+ 2. `sre/rules/*` — load all
11
+ 3. `sre/skills/*/SKILL.md` — load matching skill only
12
+ 4. `sre/workflows/*` — load matching workflow
13
+
14
+ ## Cross-cutting constraints
15
+
16
+ - **SLOs drive decisions** — if error budget remains, ship features; if exhausted, halt features and fix reliability.
17
+ - **No heroics** — every repeated manual action is a toil item to automate.
18
+ - **Blameless culture** — incidents indict systems, not people. Post-mortems focus on what the system lacked.
19
+ - **Data before action** — no reliability work starts without a metric showing the problem.
20
+
21
+ ## Spec map
22
+
23
+ ```text
24
+ sre/
25
+ ├── rules/
26
+ │ ├── slo-policy.md ← SLO definition standards, window sizes, target tiers
27
+ │ ├── error-budget-policy.md ← budget consumption thresholds, freeze triggers
28
+ │ └── on-call-standards.md ← rotation design, escalation, response SLAs
29
+ ├── skills/
30
+ │ ├── slo-sli-design/SKILL.md ← SLI selection, SLO target setting, burn-rate alerts
31
+ │ ├── chaos-engineering/SKILL.md ← experiment design, blast radius, rollback gates
32
+ │ ├── capacity-planning/SKILL.md ← demand forecasting, right-sizing, headroom models
33
+ │ ├── incident-command/SKILL.md ← severity classification, role assignment, comms cadence
34
+ │ └── postmortem-analysis/SKILL.md ← 5 Whys, fault trees, systemic action items
35
+ ├── workflows/
36
+ │ ├── incident-response.md ← /incident-response
37
+ │ ├── postmortem.md ← /postmortem
38
+ │ └── slo-review.md ← /slo-review
39
+ └── prompts/
40
+ └── *.md
41
+ ```
42
+
43
+ ## Discovery patterns
44
+
45
+ - `rules/*.md`
46
+ - `skills/*/SKILL.md`
47
+ - `workflows/*.md`
48
+ - `prompts/*.md`
@@ -0,0 +1,129 @@
1
+ ---
2
+ workflow: incident-response
3
+ ---
4
+
5
+ # Prompt: `/incident-response`
6
+
7
+ Use when: actively responding to a production incident or resilience event that demands mitigation, communication, and a clear escalation path.
8
+
9
+ ---
10
+
11
+ ## Example 1 — P0 service outage
12
+
13
+ **EN:**
14
+ ```
15
+ /incident-response
16
+
17
+ Severity: P0
18
+ Service: order-service / Namespace: production
19
+ Symptom: complete service outage — 100% error rate since 15:42 UTC
20
+ Affected: all checkout flows; estimated 2,000 users/min impact
21
+ IC: @me (on-call)
22
+ Available data:
23
+ - Alert fired: HighErrorRate 100% for order-service
24
+ - Recent deploy: order-service v3.1.0 at 15:38 UTC (4 min before incident)
25
+ - Prometheus shows: all pods Running but 0 successful responses
26
+ - Logs show: "connection refused" to postgres-primary:5432
27
+ Actions needed:
28
+ 1. Immediate mitigation options (rollback, feature flag, scale)
29
+ 2. Status page template for this incident
30
+ 3. Slack communication template for #incidents
31
+ 4. Scribe doc started
32
+ ```
33
+
34
+ **RU:**
35
+ ```
36
+ /incident-response
37
+
38
+ Severity: P0
39
+ Сервис: order-service / Namespace: production
40
+ Симптом: полный отказ сервиса — 100% error rate с 15:42 UTC
41
+ Затронуто: все checkout flow; ~2,000 пользователей/мин
42
+ IC: @я (on-call)
43
+ Доступные данные:
44
+ - Алерт: HighErrorRate 100% для order-service
45
+ - Последний деплой: order-service v3.1.0 в 15:38 UTC (за 4 мин до инцидента)
46
+ - Prometheus: все поды Running но 0 успешных ответов
47
+ - Логи: "connection refused" на postgres-primary:5432
48
+ Необходимые действия:
49
+ 1. Варианты немедленной митигации (откат, feature flag, масштабирование)
50
+ 2. Шаблон для status page по этому инциденту
51
+ 3. Шаблон Slack сообщения для #incidents
52
+ 4. Начат scribe doc
53
+ ```
54
+
55
+ ---
56
+
57
+ ## Example 2 — P1 performance degradation
58
+
59
+ **EN:**
60
+ ```
61
+ /incident-response
62
+
63
+ Severity: P1
64
+ Service: payment-service / Namespace: production
65
+ Symptom: p99 latency spiked from 200ms to 4.2s; error rate 0.8% (below 1% threshold but rising)
66
+ Affected: ~15% of payment requests timing out; no complete outage
67
+ Recent changes: database index migration ran at 03:00 UTC (8h ago)
68
+ Metrics: CPU normal, memory normal, DB connections at 89% pool capacity
69
+ Action needed:
70
+ 1. Classify: is this trending toward P0? (burn rate calculation)
71
+ 2. Identify: DB connection exhaustion vs slow query vs external dependency
72
+ 3. Quick mitigation options that don't require deploy
73
+ 4. Comms: notify on Slack (not status page yet — degraded only)
74
+ ```
75
+
76
+ **RU:**
77
+ ```
78
+ /incident-response
79
+
80
+ Severity: P1
81
+ Сервис: payment-service / Namespace: production
82
+ Симптом: p99 latency вырос с 200ms до 4.2s; error rate 0.8% (ниже порога 1% но растёт)
83
+ Затронуто: ~15% запросов на оплату с таймаутом; полного отказа нет
84
+ Недавние изменения: миграция индекса БД в 03:00 UTC (8ч назад)
85
+ Метрики: CPU в норме, память в норме, DB connections на 89% от pool capacity
86
+ Необходимые действия:
87
+ 1. Классификация: движется ли это к P0? (расчёт burn rate)
88
+ 2. Определить: исчерпание DB connections vs медленный запрос vs внешняя зависимость
89
+ 3. Варианты быстрой митигации без деплоя
90
+ 4. Коммуникация: уведомить в Slack (не status page пока — только деградация)
91
+ ```
92
+
93
+ ---
94
+
95
+ ## Example 3 — Network partition: database failover test
96
+
97
+ **EN:**
98
+ ```
99
+ /incident-response
100
+
101
+ System under test: payment-service → postgres-primary (CloudNativePG cluster)
102
+ Hypothesis: "payment-service automatically reconnects within 30s after postgres primary failover, with < 500ms added latency per request during reconnect"
103
+ Experiment:
104
+ - Inject NetworkChaos: block traffic from payment-service pods to postgres-primary for 90s
105
+ - CloudNativePG should auto-promote replica to primary during network partition
106
+ - Monitor: payment-service error rate, connection pool exhaustion (pgbouncer stats), reconnect time
107
+ - Verify: after partition heals, service recovers automatically (no manual intervention)
108
+ Pre-conditions:
109
+ - Confirm postgres replica is healthy before starting
110
+ - Confirm pgbouncer reconnect_timeout is set appropriately
111
+ - Run at 10% of normal traffic (k6 load generator)
112
+ ```
113
+
114
+ **RU:**
115
+ ```
116
+ /incident-response
117
+
118
+ Система под тестом: payment-service → postgres-primary (CloudNativePG кластер)
119
+ Гипотеза: "payment-service автоматически переподключается в течение 30с после failover postgres primary, с задержкой < 500ms на запрос во время переподключения"
120
+ Эксперимент:
121
+ - Инжектировать NetworkChaos: блокировать трафик от подов payment-service к postgres-primary на 90с
122
+ - CloudNativePG должен автоматически назначить реплику primary во время сетевого раздела
123
+ - Мониторинг: error rate payment-service, исчерпание connection pool (статистика pgbouncer), время переподключения
124
+ - Проверить: после восстановления раздела сервис восстанавливается автоматически (без ручного вмешательства)
125
+ Предусловия:
126
+ - Убедиться что postgres реплика здорова перед началом
127
+ - Убедиться что pgbouncer reconnect_timeout настроен правильно
128
+ - Запустить при 10% от нормального трафика (k6 генератор нагрузки)
129
+ ```
@@ -0,0 +1,101 @@
1
+ ---
2
+ workflow: postmortem
3
+ ---
4
+
5
+ # Prompt: `/postmortem`
6
+
7
+ Use when: writing or facilitating a blameless postmortem after a P0/P1 incident.
8
+
9
+ ---
10
+
11
+ ## Example 1 — Full postmortem from incident data
12
+
13
+ **EN:**
14
+ ```
15
+ /postmortem
16
+
17
+ Incident: INC-2024-112 / Service: payment-service / Severity: P1
18
+ Duration: 2024-11-15 03:42–04:01 UTC (19 min)
19
+ Impact: 4.2% error rate; ~850 failed payment attempts; SLO: 18 min budget consumed
20
+ Root cause (preliminary): OOMKilled pods after v2.4.1 deploy introduced high-memory code path
21
+
22
+ Timeline (from scribe doc):
23
+ 03:42 - Alert fired HighErrorRate 4.2%
24
+ 03:44 - On-call acknowledged
25
+ 03:49 - Identified: payment-service pods OOMKilling (exit 137)
26
+ 03:51 - Mitigation: helm rollback payment-service to revision 3
27
+ 03:53 - Error rate dropping
28
+ 04:01 - Resolved; monitoring
29
+
30
+ Tasks:
31
+ 1. Full 5-whys RCA from the preliminary root cause
32
+ 2. Contributing factors analysis
33
+ 3. 3–5 action items (specific, owned, dated within 2 weeks)
34
+ 4. What went well section (at least 3 items)
35
+ 5. SLO impact calculation
36
+ ```
37
+
38
+ **RU:**
39
+ ```
40
+ /postmortem
41
+
42
+ Инцидент: INC-2024-112 / Сервис: payment-service / Severity: P1
43
+ Длительность: 2024-11-15 03:42–04:01 UTC (19 мин)
44
+ Влияние: error rate 4.2%; ~850 неудачных попыток оплаты; SLO: потрачено 18 мин бюджета
45
+ Корневая причина (предварительно): OOMKilled поды после деплоя v2.4.1 с высокопамятным кодом
46
+
47
+ Timeline (из scribe doc):
48
+ 03:42 - Алерт сработал HighErrorRate 4.2%
49
+ 03:44 - On-call подтвердил
50
+ 03:49 - Определено: поды payment-service OOMKilling (exit 137)
51
+ 03:51 - Митигация: helm rollback payment-service до ревизии 3
52
+ 03:53 - Error rate падает
53
+ 04:01 - Разрешено; мониторинг
54
+
55
+ Задачи:
56
+ 1. Полный анализ 5-whys от предварительной корневой причины
57
+ 2. Анализ способствующих факторов
58
+ 3. 3–5 action items (конкретные, с владельцами, сроки в течение 2 недель)
59
+ 4. Раздел "что прошло хорошо" (минимум 3 пункта)
60
+ 5. Расчёт влияния на SLO
61
+ ```
62
+
63
+ ---
64
+
65
+ ## Example 2 — SLO review: define SLOs for new service
66
+
67
+ **EN:**
68
+ ```
69
+ /postmortem
70
+
71
+ Task: define SLOs (not a postmortem — SLO design session)
72
+ Service: notification-service
73
+ User expectation: "Notifications arrive within 30 seconds; don't lose notifications"
74
+ Current metrics available: delivery_attempts_total, delivery_success_total, delivery_latency_seconds
75
+ Team size: 2 backend engineers + 1 devops
76
+ Target tier: Tier 2 (internal tool; not directly revenue-impacting)
77
+ Design:
78
+ 1. Select 2 SLIs (availability + latency) with formulas
79
+ 2. Propose SLO targets (start conservative)
80
+ 3. Calculate error budget for 28-day window
81
+ 4. Write burn rate alert thresholds (fast + slow)
82
+ 5. Sloth YAML definition
83
+ ```
84
+
85
+ **RU:**
86
+ ```
87
+ /postmortem
88
+
89
+ Задача: определить SLO (не postmortem — сессия проектирования SLO)
90
+ Сервис: notification-service
91
+ Ожидание пользователей: "Уведомления доставляются в течение 30 секунд; уведомления не теряются"
92
+ Доступные метрики: delivery_attempts_total, delivery_success_total, delivery_latency_seconds
93
+ Размер команды: 2 backend инженера + 1 devops
94
+ Целевой tier: Tier 2 (внутренний инструмент; не влияет напрямую на выручку)
95
+ Проектирование:
96
+ 1. Выбрать 2 SLI (availability + latency) с формулами
97
+ 2. Предложить цели SLO (начать консервативно)
98
+ 3. Рассчитать error budget для 28-дневного окна
99
+ 4. Написать пороги burn rate алертов (быстрый + медленный)
100
+ 5. YAML определение для Sloth
101
+ ```
@@ -0,0 +1,125 @@
1
+ ---
2
+ workflow: slo-review
3
+ ---
4
+
5
+ # Prompt: `/slo-review`
6
+
7
+ Use when: reviewing SLO health, error budget burn, and upcoming capacity risks before committing to reliability or scaling work.
8
+
9
+ ---
10
+
11
+ ## Example 1 — Q4 SLO review for 6 services
12
+
13
+ **EN:**
14
+ ```
15
+ /slo-review
16
+
17
+ Review period: Q3 2024 (July–September)
18
+ Services under review: checkout, payment, order, auth, user, notification
19
+ Data available in Prometheus (Sloth recording rules)
20
+ For each service, evaluate:
21
+ 1. SLI achievement: actual ratio vs SLO target for the quarter
22
+ 2. Error budget burn: how much was consumed, main events causing consumption
23
+ 3. Incidents: count, severity, duration, correlation with budget consumption
24
+ 4. Target calibration: is the target too tight (budget always exhausted) or too loose (never burns)?
25
+ 5. Action items from previous review: completed? effective?
26
+ Recommendations needed:
27
+ - Services to tighten (budget never used → target probably too conservative)
28
+ - Services to loosen (budget always exhausted → target not achievable with current architecture)
29
+ - Reliability investments for Q4 (prioritised by error budget consumed)
30
+ Output format: executive summary + per-service table + Q4 recommendations
31
+ ```
32
+
33
+ **RU:**
34
+ ```
35
+ /slo-review
36
+
37
+ Период проверки: Q3 2024 (июль–сентябрь)
38
+ Сервисы на проверке: checkout, payment, order, auth, user, notification
39
+ Данные доступны в Prometheus (Sloth recording rules)
40
+ Для каждого сервиса оценить:
41
+ 1. Достижение SLI: фактическое соотношение vs цель SLO за квартал
42
+ 2. Сжигание error budget: сколько потрачено, основные события вызвавшие потребление
43
+ 3. Инциденты: количество, severity, продолжительность, корреляция с потреблением бюджета
44
+ 4. Калибровка цели: слишком жёсткая (бюджет всегда исчерпан) или слишком мягкая (никогда не горит)?
45
+ 5. Action items из предыдущего review: выполнены? эффективны?
46
+ Необходимые рекомендации:
47
+ - Сервисы для ужесточения (бюджет никогда не расходуется → цель вероятно слишком консервативная)
48
+ - Сервисы для смягчения (бюджет всегда исчерпан → цель недостижима с текущей архитектурой)
49
+ - Инвестиции в надёжность на Q4 (приоритизированы по потреблённому error budget)
50
+ Формат вывода: executive summary + таблица по сервисам + рекомендации на Q4
51
+ ```
52
+
53
+ ---
54
+
55
+ ## Example 2 — Emergency SLO calibration after infra migration
56
+
57
+ **EN:**
58
+ ```
59
+ /slo-review
60
+
61
+ Context: migrated from single-AZ to multi-AZ K8s (3 control plane + 6 workers)
62
+ Pre-migration: payment-service SLO 99.5%, frequently in Freeze state
63
+ Hypothesis: new HA setup should enable tightening to 99.9%
64
+ Task:
65
+ 1. Review pre-migration error budget consumption (last 3 months)
66
+ 2. Classify error budget events: infra-caused vs app-caused vs dependency-caused
67
+ 3. Estimate: if all infra-caused events are eliminated, what availability % would have been achieved?
68
+ 4. Propose new SLO target with rationale
69
+ 5. Set review checkpoint: evaluate new target after 30 days
70
+ ```
71
+
72
+ **RU:**
73
+ ```
74
+ /slo-review
75
+
76
+ Контекст: миграция с single-AZ на multi-AZ K8s (3 control plane + 6 workers)
77
+ До миграции: payment-service SLO 99.5%, часто в состоянии Freeze
78
+ Гипотеза: новая HA конфигурация должна позволить ужесточить до 99.9%
79
+ Задача:
80
+ 1. Проверить потребление error budget до миграции (последние 3 месяца)
81
+ 2. Классифицировать события error budget: вызванные инфрой / приложением / зависимостями
82
+ 3. Оценить: если бы все события вызванные инфрой были исключены, какой % доступности был бы достигнут?
83
+ 4. Предложить новую цель SLO с обоснованием
84
+ 5. Установить точку проверки: оценить новую цель через 30 дней
85
+ ```
86
+
87
+ ---
88
+
89
+ ## Example 3 — Black Friday capacity runbook
90
+
91
+ **EN:**
92
+ ```
93
+ /slo-review
94
+
95
+ Event: Black Friday (peak 5× normal traffic, 4-hour window)
96
+ Services affected: checkout, payment, order (top 3 by load)
97
+ Normal peak: 800 RPS; expected BF peak: 4000 RPS
98
+ Pre-event checklist needed:
99
+ - Scale workers from 6 → 10 (pre-provision 48h before event)
100
+ - Set HPA min replicas: checkout→10, payment→8, order→8 (prevent cold start during spike)
101
+ - Pre-warm: connection pools, DNS TTLs flushed, CDN cache warmed
102
+ - Load test: k6 script targeting 4500 RPS (10% above expected peak); run 2 days before
103
+ - DB: pre-warm vacuumed + analysed; connection pool max set to 80% of max_connections
104
+ - War room: open 1h before event; on-call + dev leads + DBA on standby
105
+ - Auto-scale-down: trigger 2h after event peak (cost control)
106
+ Output: runbook document + pre-event checklist + post-event scale-down procedure
107
+ ```
108
+
109
+ **RU:**
110
+ ```
111
+ /slo-review
112
+
113
+ Событие: Чёрная пятница (пик 5× нормального трафика, 4-часовое окно)
114
+ Затронутые сервисы: checkout, payment, order (топ-3 по нагрузке)
115
+ Нормальный пик: 800 RPS; ожидаемый пик ЧП: 4000 RPS
116
+ Необходимый чеклист перед событием:
117
+ - Масштабировать workers с 6 → 10 (заранее за 48ч до события)
118
+ - Установить HPA min replicas: checkout→10, payment→8, order→8 (предотвратить cold start при скачке)
119
+ - Pre-warm: connection pools, сброс DNS TTL, прогрев CDN кэша
120
+ - Нагрузочное тестирование: k6 скрипт на 4500 RPS (10% сверх ожидаемого пика); запустить за 2 дня
121
+ - БД: прогрев vacuum + analyse; max connection pool = 80% от max_connections
122
+ - Военная комната: открыть за 1ч до события; on-call + dev leads + DBA в режиме ожидания
123
+ - Авто-уменьшение масштаба: через 2ч после пика события (контроль затрат)
124
+ Результат: runbook документ + чеклист до события + процедура уменьшения масштаба после события
125
+ ```
@@ -0,0 +1,25 @@
1
+ # Rule: Error Budget Policy
2
+
3
+ **Priority**: P1 — Error budget governs feature development velocity vs reliability investment.
4
+
5
+ ## Error Budget States
6
+
7
+ | State | Budget remaining | Action |
8
+ |:---|:---|:---|
9
+ | 🟢 Healthy | > 50% | Normal development velocity |
10
+ | 🟡 Warning | 25–50% | Reliability work enters next sprint |
11
+ | 🔴 Freeze | < 25% | Feature freeze; only reliability fixes ship |
12
+ | ⛔ Exhausted | 0% | Mandatory postmortem; all features blocked until replenished |
13
+
14
+ ## Freeze Rules
15
+
16
+ - Feature freeze requires: team-lead + product-owner sign-off.
17
+ - Reliability work during freeze: reduce MTTR, add chaos tests, improve monitoring.
18
+ - Exception for hotfixes (security, critical bugs) — requires VP Engineering approval.
19
+
20
+ ## Error Budget Tracking
21
+
22
+ - Error budget burn rate alerts:
23
+ - Fast burn (> 14.4× in 1h): page on-call → investigate immediately
24
+ - Slow burn (> 3× over 6h): Slack alert → review in next stand-up
25
+ - Monthly error budget report published to Confluence/Notion.