specweave 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/INSTALL.md +848 -0
- package/LICENSE +21 -0
- package/README.md +675 -0
- package/SPECWEAVE.md +665 -0
- package/bin/install-agents.sh +57 -0
- package/bin/install-all.sh +49 -0
- package/bin/install-commands.sh +56 -0
- package/bin/install-skills.sh +57 -0
- package/bin/specweave.js +81 -0
- package/dist/adapters/adapter-base.d.ts +50 -0
- package/dist/adapters/adapter-base.d.ts.map +1 -0
- package/dist/adapters/adapter-base.js +146 -0
- package/dist/adapters/adapter-base.js.map +1 -0
- package/dist/adapters/adapter-interface.d.ts +108 -0
- package/dist/adapters/adapter-interface.d.ts.map +1 -0
- package/dist/adapters/adapter-interface.js +9 -0
- package/dist/adapters/adapter-interface.js.map +1 -0
- package/dist/adapters/claude/adapter.d.ts +54 -0
- package/dist/adapters/claude/adapter.d.ts.map +1 -0
- package/dist/adapters/claude/adapter.js +184 -0
- package/dist/adapters/claude/adapter.js.map +1 -0
- package/dist/adapters/copilot/adapter.d.ts +42 -0
- package/dist/adapters/copilot/adapter.d.ts.map +1 -0
- package/dist/adapters/copilot/adapter.js +239 -0
- package/dist/adapters/copilot/adapter.js.map +1 -0
- package/dist/adapters/cursor/adapter.d.ts +42 -0
- package/dist/adapters/cursor/adapter.d.ts.map +1 -0
- package/dist/adapters/cursor/adapter.js +297 -0
- package/dist/adapters/cursor/adapter.js.map +1 -0
- package/dist/adapters/generic/adapter.d.ts +40 -0
- package/dist/adapters/generic/adapter.d.ts.map +1 -0
- package/dist/adapters/generic/adapter.js +155 -0
- package/dist/adapters/generic/adapter.js.map +1 -0
- package/dist/cli/commands/init.d.ts +6 -0
- package/dist/cli/commands/init.d.ts.map +1 -0
- package/dist/cli/commands/init.js +247 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/commands/install.d.ts +7 -0
- package/dist/cli/commands/install.d.ts.map +1 -0
- package/dist/cli/commands/install.js +160 -0
- package/dist/cli/commands/install.js.map +1 -0
- package/dist/cli/commands/list.d.ts +6 -0
- package/dist/cli/commands/list.d.ts.map +1 -0
- package/dist/cli/commands/list.js +154 -0
- package/dist/cli/commands/list.js.map +1 -0
- package/package.json +90 -0
- package/src/adapters/README.md +312 -0
- package/src/adapters/adapter-base.ts +146 -0
- package/src/adapters/adapter-interface.ts +120 -0
- package/src/adapters/claude/README.md +241 -0
- package/src/adapters/claude/adapter.ts +157 -0
- package/src/adapters/copilot/.github/copilot/instructions.md +376 -0
- package/src/adapters/copilot/README.md +200 -0
- package/src/adapters/copilot/adapter.ts +210 -0
- package/src/adapters/cursor/.cursor/context/docs-context.md +62 -0
- package/src/adapters/cursor/.cursor/context/increments-context.md +71 -0
- package/src/adapters/cursor/.cursor/context/strategy-context.md +73 -0
- package/src/adapters/cursor/.cursor/context/tests-context.md +89 -0
- package/src/adapters/cursor/.cursorrules +325 -0
- package/src/adapters/cursor/README.md +243 -0
- package/src/adapters/cursor/adapter.ts +268 -0
- package/src/adapters/generic/README.md +277 -0
- package/src/adapters/generic/SPECWEAVE-MANUAL.md +676 -0
- package/src/adapters/generic/adapter.ts +159 -0
- package/src/adapters/registry.yaml +126 -0
- package/src/agents/architect/AGENT.md +416 -0
- package/src/agents/devops/AGENT.md +1738 -0
- package/src/agents/docs-writer/AGENT.md +239 -0
- package/src/agents/performance/AGENT.md +228 -0
- package/src/agents/pm/AGENT.md +751 -0
- package/src/agents/qa-lead/AGENT.md +150 -0
- package/src/agents/security/AGENT.md +179 -0
- package/src/agents/sre/AGENT.md +582 -0
- package/src/agents/sre/modules/backend-diagnostics.md +481 -0
- package/src/agents/sre/modules/database-diagnostics.md +509 -0
- package/src/agents/sre/modules/infrastructure.md +561 -0
- package/src/agents/sre/modules/monitoring.md +439 -0
- package/src/agents/sre/modules/security-incidents.md +421 -0
- package/src/agents/sre/modules/ui-diagnostics.md +302 -0
- package/src/agents/sre/playbooks/01-high-cpu-usage.md +204 -0
- package/src/agents/sre/playbooks/02-database-deadlock.md +241 -0
- package/src/agents/sre/playbooks/03-memory-leak.md +252 -0
- package/src/agents/sre/playbooks/04-slow-api-response.md +269 -0
- package/src/agents/sre/playbooks/05-ddos-attack.md +293 -0
- package/src/agents/sre/playbooks/06-disk-full.md +314 -0
- package/src/agents/sre/playbooks/07-service-down.md +333 -0
- package/src/agents/sre/playbooks/08-data-corruption.md +337 -0
- package/src/agents/sre/playbooks/09-cascade-failure.md +430 -0
- package/src/agents/sre/playbooks/10-rate-limit-exceeded.md +464 -0
- package/src/agents/sre/scripts/health-check.sh +230 -0
- package/src/agents/sre/scripts/log-analyzer.py +213 -0
- package/src/agents/sre/scripts/metrics-collector.sh +294 -0
- package/src/agents/sre/scripts/trace-analyzer.js +257 -0
- package/src/agents/sre/templates/incident-report.md +249 -0
- package/src/agents/sre/templates/mitigation-plan.md +375 -0
- package/src/agents/sre/templates/post-mortem.md +418 -0
- package/src/agents/sre/templates/runbook-template.md +412 -0
- package/src/agents/tech-lead/AGENT.md +263 -0
- package/src/commands/add-tasks.md +176 -0
- package/src/commands/close-increment.md +347 -0
- package/src/commands/create-increment.md +223 -0
- package/src/commands/create-project.md +528 -0
- package/src/commands/generate-docs.md +623 -0
- package/src/commands/list-increments.md +180 -0
- package/src/commands/review-docs.md +331 -0
- package/src/commands/start-increment.md +139 -0
- package/src/commands/sync-github.md +115 -0
- package/src/commands/validate-increment.md +800 -0
- package/src/hooks/README.md +252 -0
- package/src/hooks/docs-changed.sh +59 -0
- package/src/hooks/human-input-required.sh +55 -0
- package/src/hooks/post-task-completion.sh +57 -0
- package/src/hooks/pre-implementation.sh +47 -0
- package/src/skills/ado-sync/README.md +449 -0
- package/src/skills/ado-sync/SKILL.md +245 -0
- package/src/skills/ado-sync/test-cases/test-1.yaml +9 -0
- package/src/skills/ado-sync/test-cases/test-2.yaml +8 -0
- package/src/skills/ado-sync/test-cases/test-3.yaml +9 -0
- package/src/skills/bmad-method-expert/SKILL.md +628 -0
- package/src/skills/bmad-method-expert/scripts/analyze-project.js +318 -0
- package/src/skills/bmad-method-expert/scripts/check-setup.js +208 -0
- package/src/skills/bmad-method-expert/scripts/generate-template.js +1149 -0
- package/src/skills/bmad-method-expert/scripts/validate-documents.js +340 -0
- package/src/skills/bmad-method-expert/test-cases/test-1-placeholder.yaml +12 -0
- package/src/skills/bmad-method-expert/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/bmad-method-expert/test-cases/test-3-placeholder.yaml +12 -0
- package/src/skills/brownfield-analyzer/SKILL.md +523 -0
- package/src/skills/brownfield-analyzer/test-cases/test-1-basic-analysis.yaml +48 -0
- package/src/skills/brownfield-analyzer/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/brownfield-analyzer/test-cases/test-3-placeholder.yaml +12 -0
- package/src/skills/brownfield-onboarder/SKILL.md +625 -0
- package/src/skills/brownfield-onboarder/test-cases/test-1-placeholder.yaml +12 -0
- package/src/skills/brownfield-onboarder/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/brownfield-onboarder/test-cases/test-3-placeholder.yaml +12 -0
- package/src/skills/calendar-system/test-cases/test-1-placeholder.yaml +12 -0
- package/src/skills/calendar-system/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/calendar-system/test-cases/test-3-placeholder.yaml +12 -0
- package/src/skills/context-loader/SKILL.md +734 -0
- package/src/skills/context-loader/test-cases/test-1-basic-loading.yaml +39 -0
- package/src/skills/context-loader/test-cases/test-2-token-budget-exceeded.yaml +44 -0
- package/src/skills/context-loader/test-cases/test-3-section-anchors.yaml +45 -0
- package/src/skills/context-optimizer/SKILL.md +618 -0
- package/src/skills/context-optimizer/test-cases/test-1-bug-fix-narrow.yaml +97 -0
- package/src/skills/context-optimizer/test-cases/test-2-feature-focused.yaml +109 -0
- package/src/skills/context-optimizer/test-cases/test-3-architecture-broad.yaml +98 -0
- package/src/skills/cost-optimizer/SKILL.md +190 -0
- package/src/skills/cost-optimizer/test-cases/test-1-basic-comparison.yaml +75 -0
- package/src/skills/cost-optimizer/test-cases/test-2-budget-constraint.yaml +52 -0
- package/src/skills/cost-optimizer/test-cases/test-3-scale-requirement.yaml +63 -0
- package/src/skills/cost-optimizer/test-results/README.md +46 -0
- package/src/skills/design-system-architect/SKILL.md +107 -0
- package/src/skills/design-system-architect/test-cases/test-1-token-structure.yaml +23 -0
- package/src/skills/design-system-architect/test-cases/test-2-component-hierarchy.yaml +24 -0
- package/src/skills/design-system-architect/test-cases/test-3-accessibility-checklist.yaml +23 -0
- package/src/skills/diagrams-architect/SKILL.md +763 -0
- package/src/skills/diagrams-generator/SKILL.md +25 -0
- package/src/skills/diagrams-generator/test-cases/test-1.yaml +9 -0
- package/src/skills/diagrams-generator/test-cases/test-2.yaml +9 -0
- package/src/skills/diagrams-generator/test-cases/test-3.yaml +8 -0
- package/src/skills/docs-updater/README.md +48 -0
- package/src/skills/docs-updater/test-cases/test-1-placeholder.yaml +12 -0
- package/src/skills/docs-updater/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/docs-updater/test-cases/test-3-placeholder.yaml +12 -0
- package/src/skills/dotnet-backend/SKILL.md +250 -0
- package/src/skills/e2e-playwright/README.md +506 -0
- package/src/skills/e2e-playwright/SKILL.md +457 -0
- package/src/skills/e2e-playwright/execute.js +373 -0
- package/src/skills/e2e-playwright/lib/utils.js +514 -0
- package/src/skills/e2e-playwright/package.json +33 -0
- package/src/skills/e2e-playwright/test-cases/TC-001-basic-navigation.yaml +54 -0
- package/src/skills/e2e-playwright/test-cases/TC-002-form-interaction.yaml +64 -0
- package/src/skills/e2e-playwright/test-cases/TC-003-specweave-integration.yaml +74 -0
- package/src/skills/e2e-playwright/test-cases/TC-004-accessibility-check.yaml +98 -0
- package/src/skills/figma-designer/SKILL.md +149 -0
- package/src/skills/figma-implementer/SKILL.md +148 -0
- package/src/skills/figma-mcp-connector/SKILL.md +136 -0
- package/src/skills/figma-mcp-connector/test-cases/test-1-read-file-desktop.yaml +22 -0
- package/src/skills/figma-mcp-connector/test-cases/test-2-read-file-framelink.yaml +21 -0
- package/src/skills/figma-mcp-connector/test-cases/test-3-error-handling.yaml +18 -0
- package/src/skills/figma-to-code/SKILL.md +128 -0
- package/src/skills/figma-to-code/test-cases/test-1-token-generation.yaml +29 -0
- package/src/skills/figma-to-code/test-cases/test-2-component-generation.yaml +27 -0
- package/src/skills/figma-to-code/test-cases/test-3-typescript-generation.yaml +28 -0
- package/src/skills/frontend/SKILL.md +177 -0
- package/src/skills/github-sync/SKILL.md +252 -0
- package/src/skills/github-sync/test-cases/test-1-placeholder.yaml +12 -0
- package/src/skills/github-sync/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/github-sync/test-cases/test-3-placeholder.yaml +12 -0
- package/src/skills/hetzner-provisioner/README.md +308 -0
- package/src/skills/hetzner-provisioner/SKILL.md +251 -0
- package/src/skills/hetzner-provisioner/test-cases/test-1-basic-provision.yaml +71 -0
- package/src/skills/hetzner-provisioner/test-cases/test-2-postgres-provision.yaml +85 -0
- package/src/skills/hetzner-provisioner/test-cases/test-3-ssl-config.yaml +126 -0
- package/src/skills/hetzner-provisioner/test-results/README.md +259 -0
- package/src/skills/increment-planner/SKILL.md +889 -0
- package/src/skills/increment-planner/scripts/feature-utils.js +250 -0
- package/src/skills/increment-planner/test-cases/test-1-basic-feature.yaml +27 -0
- package/src/skills/increment-planner/test-cases/test-2-complex-feature.yaml +30 -0
- package/src/skills/increment-planner/test-cases/test-3-auto-numbering.yaml +24 -0
- package/src/skills/increment-quality-judge/SKILL.md +566 -0
- package/src/skills/increment-quality-judge/test-cases/test-1-good-spec.yaml +95 -0
- package/src/skills/increment-quality-judge/test-cases/test-2-poor-spec.yaml +108 -0
- package/src/skills/increment-quality-judge/test-cases/test-3-export-suggestions.yaml +87 -0
- package/src/skills/jira-sync/README.md +328 -0
- package/src/skills/jira-sync/SKILL.md +209 -0
- package/src/skills/jira-sync/test-cases/test-1.yaml +9 -0
- package/src/skills/jira-sync/test-cases/test-2.yaml +9 -0
- package/src/skills/jira-sync/test-cases/test-3.yaml +10 -0
- package/src/skills/nextjs/SKILL.md +176 -0
- package/src/skills/nodejs-backend/SKILL.md +181 -0
- package/src/skills/notification-system/test-cases/test-1-placeholder.yaml +12 -0
- package/src/skills/notification-system/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/notification-system/test-cases/test-3-placeholder.yaml +12 -0
- package/src/skills/python-backend/SKILL.md +226 -0
- package/src/skills/role-orchestrator/README.md +197 -0
- package/src/skills/role-orchestrator/SKILL.md +1184 -0
- package/src/skills/role-orchestrator/test-cases/test-1-simple-product.yaml +98 -0
- package/src/skills/role-orchestrator/test-cases/test-2-quality-gate-failure.yaml +73 -0
- package/src/skills/role-orchestrator/test-cases/test-3-security-workflow.yaml +121 -0
- package/src/skills/role-orchestrator/test-cases/test-4-parallel-execution.yaml +145 -0
- package/src/skills/role-orchestrator/test-cases/test-5-feedback-loops.yaml +149 -0
- package/src/skills/skill-creator/LICENSE.txt +202 -0
- package/src/skills/skill-creator/SKILL.md +209 -0
- package/src/skills/skill-creator/scripts/init_skill.py +303 -0
- package/src/skills/skill-creator/scripts/package_skill.py +110 -0
- package/src/skills/skill-creator/scripts/quick_validate.py +65 -0
- package/src/skills/skill-creator/test-cases/test-1-placeholder.yaml +12 -0
- package/src/skills/skill-creator/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/skill-creator/test-cases/test-3-placeholder.yaml +12 -0
- package/src/skills/skill-router/SKILL.md +497 -0
- package/src/skills/skill-router/test-cases/test-1-basic-routing.yaml +33 -0
- package/src/skills/skill-router/test-cases/test-2-ambiguous-request.yaml +42 -0
- package/src/skills/skill-router/test-cases/test-3-nested-orchestration.yaml +50 -0
- package/src/skills/spec-driven-brainstorming/README.md +264 -0
- package/src/skills/spec-driven-brainstorming/SKILL.md +439 -0
- package/src/skills/spec-driven-brainstorming/test-cases/TC-001-simple-idea-to-design.yaml +148 -0
- package/src/skills/spec-driven-brainstorming/test-cases/TC-002-complex-ultrathink-design.yaml +190 -0
- package/src/skills/spec-driven-brainstorming/test-cases/TC-003-unclear-requirements-socratic.yaml +233 -0
- package/src/skills/spec-driven-debugging/README.md +479 -0
- package/src/skills/spec-driven-debugging/SKILL.md +652 -0
- package/src/skills/spec-driven-debugging/test-cases/TC-001-simple-auth-bug.yaml +212 -0
- package/src/skills/spec-driven-debugging/test-cases/TC-002-race-condition-ultrathink.yaml +461 -0
- package/src/skills/spec-driven-debugging/test-cases/TC-003-brownfield-missing-spec.yaml +366 -0
- package/src/skills/spec-kit-expert/SKILL.md +1012 -0
- package/src/skills/spec-kit-expert/test-cases/test-1-placeholder.yaml +12 -0
- package/src/skills/spec-kit-expert/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/spec-kit-expert/test-cases/test-3-placeholder.yaml +12 -0
- package/src/skills/specweave-ado-mapper/SKILL.md +501 -0
- package/src/skills/specweave-detector/SKILL.md +420 -0
- package/src/skills/specweave-detector/test-cases/test-1-basic-detection.yaml +37 -0
- package/src/skills/specweave-detector/test-cases/test-2-missing-config.yaml +37 -0
- package/src/skills/specweave-detector/test-cases/test-3-non-specweave-project.yaml +34 -0
- package/src/skills/specweave-jira-mapper/SKILL.md +500 -0
- package/src/skills/stripe-integrator/test-cases/test-1-placeholder.yaml +12 -0
- package/src/skills/stripe-integrator/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/stripe-integrator/test-cases/test-3-placeholder.yaml +12 -0
- package/src/skills/task-builder/README.md +90 -0
- package/src/skills/task-builder/test-cases/test-1-placeholder.yaml +12 -0
- package/src/skills/task-builder/test-cases/test-2-placeholder.yaml +12 -0
- package/src/skills/task-builder/test-cases/test-3-placeholder.yaml +12 -0
- package/src/templates/.env.example +144 -0
- package/src/templates/.gitignore.template +81 -0
- package/src/templates/CLAUDE.md.template +383 -0
- package/src/templates/README.md.template +240 -0
- package/src/templates/config.yaml +333 -0
- package/src/templates/docs/README.md +124 -0
- package/src/templates/docs/adr-template.md +118 -0
- package/src/templates/docs/hld-template.md +220 -0
- package/src/templates/docs/lld-template.md +580 -0
- package/src/templates/docs/prd-template.md +132 -0
- package/src/templates/docs/rfc-template.md +229 -0
- package/src/templates/docs/runbook-template.md +298 -0
- package/src/templates/environments/minimal/.env.production +16 -0
- package/src/templates/environments/minimal/README.md +54 -0
- package/src/templates/environments/minimal/deploy-production.yml +52 -0
- package/src/templates/environments/progressive/.env.qa +28 -0
- package/src/templates/environments/progressive/README.md +129 -0
- package/src/templates/environments/progressive/deploy-production.yml +93 -0
- package/src/templates/environments/progressive/deploy-qa.yml +62 -0
- package/src/templates/environments/progressive/deploy-staging.yml +67 -0
- package/src/templates/environments/standard/.env.development +20 -0
- package/src/templates/environments/standard/.env.production +30 -0
- package/src/templates/environments/standard/.env.staging +23 -0
- package/src/templates/environments/standard/README.md +97 -0
- package/src/templates/environments/standard/deploy-production.yml +68 -0
- package/src/templates/environments/standard/deploy-staging.yml +61 -0
- package/src/templates/environments/standard/docker-compose.yml +43 -0
- package/src/templates/increment-metadata-template.yaml +138 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
# Mitigation Plan: [Incident Title]
|
|
2
|
+
|
|
3
|
+
**Date**: YYYY-MM-DD HH:MM UTC
|
|
4
|
+
**Incident**: [Brief description]
|
|
5
|
+
**Root Cause**: [Root cause if known, or "Under investigation"]
|
|
6
|
+
**Severity**: SEV1 / SEV2 / SEV3
|
|
7
|
+
**Created By**: [Name]
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Executive Summary
|
|
12
|
+
|
|
13
|
+
**Problem**: [What's broken in one sentence]
|
|
14
|
+
|
|
15
|
+
**Impact**: [Who's affected and how]
|
|
16
|
+
|
|
17
|
+
**Solution**: [High-level approach]
|
|
18
|
+
|
|
19
|
+
**ETA**: [Estimated time to resolution]
|
|
20
|
+
|
|
21
|
+
**Example**:
|
|
22
|
+
```
|
|
23
|
+
Problem: Database connection pool exhausted due to connection leak
|
|
24
|
+
Impact: All users unable to access application (100% downtime)
|
|
25
|
+
Solution: Restart application + fix connection leak in code
|
|
26
|
+
ETA: 30 minutes (service restored in 5 min, permanent fix in 30 min)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Three-Horizon Mitigation
|
|
32
|
+
|
|
33
|
+
### Immediate (Now - 5 minutes)
|
|
34
|
+
|
|
35
|
+
**Goal**: Stop the bleeding, restore service immediately
|
|
36
|
+
|
|
37
|
+
**Actions**:
|
|
38
|
+
- [ ] [Action 1]
|
|
39
|
+
- **What**: [Detailed description]
|
|
40
|
+
- **How**: [Commands/steps]
|
|
41
|
+
- **Impact**: [Expected improvement]
|
|
42
|
+
- **Risk**: [Low/Medium/High + explanation]
|
|
43
|
+
- **Rollback**: [How to undo if it fails]
|
|
44
|
+
- **ETA**: [Time to execute]
|
|
45
|
+
- **Owner**: [Who will do this]
|
|
46
|
+
|
|
47
|
+
**Example**:
|
|
48
|
+
```
|
|
49
|
+
- [ ] Restart payment service to release connections
|
|
50
|
+
- What: Restart payment service to release database connections
|
|
51
|
+
- How: `systemctl restart payment-service`
|
|
52
|
+
- Impact: All 100 connections released, service restored
|
|
53
|
+
- Risk: Low (stateless service, graceful restart)
|
|
54
|
+
- Rollback: N/A (restart is safe)
|
|
55
|
+
- ETA: 2 minutes
|
|
56
|
+
- Owner: Jane (SRE)
|
|
57
|
+
|
|
58
|
+
- [ ] Monitor connection pool for 5 minutes
|
|
59
|
+
- What: Verify connections stay below 80%
|
|
60
|
+
- How: `watch -n 5 'psql -c "SELECT count(*) FROM pg_stat_activity"'`
|
|
61
|
+
- Impact: Early detection if issue recurs
|
|
62
|
+
- Risk: None (monitoring only)
|
|
63
|
+
- Rollback: N/A
|
|
64
|
+
- ETA: 5 minutes
|
|
65
|
+
- Owner: Jane (SRE)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Success Criteria**:
|
|
69
|
+
- [ ] Service health check passing
|
|
70
|
+
- [ ] Users able to access application
|
|
71
|
+
- [ ] Connection pool <80% of max
|
|
72
|
+
- [ ] No active alerts
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
### Short-term (5 minutes - 1 hour)
|
|
77
|
+
|
|
78
|
+
**Goal**: Tactical fix to prevent immediate recurrence
|
|
79
|
+
|
|
80
|
+
**Actions**:
|
|
81
|
+
- [ ] [Action 1]
|
|
82
|
+
- **What**: [Detailed description]
|
|
83
|
+
- **How**: [Commands/steps]
|
|
84
|
+
- **Impact**: [Expected improvement]
|
|
85
|
+
- **Risk**: [Low/Medium/High + explanation]
|
|
86
|
+
- **Rollback**: [How to undo if it fails]
|
|
87
|
+
- **ETA**: [Time to execute]
|
|
88
|
+
- **Owner**: [Who will do this]
|
|
89
|
+
|
|
90
|
+
**Example**:
|
|
91
|
+
```
|
|
92
|
+
- [ ] Fix connection leak in payment service code
|
|
93
|
+
- What: Add `finally` block to close connection in error path
|
|
94
|
+
- How: Deploy hotfix branch `fix/connection-leak`
|
|
95
|
+
- Impact: Connections properly closed, no leak
|
|
96
|
+
- Risk: Medium (code change requires testing)
|
|
97
|
+
- Rollback: `git revert <commit>` + redeploy
|
|
98
|
+
- ETA: 30 minutes (test + deploy)
|
|
99
|
+
- Owner: Mike (Developer)
|
|
100
|
+
|
|
101
|
+
- [ ] Increase connection pool size
|
|
102
|
+
- What: Increase max_connections from 100 to 200
|
|
103
|
+
- How: ALTER SYSTEM SET max_connections = 200; SELECT pg_reload_conf();
|
|
104
|
+
- Impact: More headroom for traffic spikes
|
|
105
|
+
- Risk: Low (more connections = more memory, but server has capacity)
|
|
106
|
+
- Rollback: ALTER SYSTEM SET max_connections = 100; SELECT pg_reload_conf();
|
|
107
|
+
- ETA: 5 minutes
|
|
108
|
+
- Owner: Tom (DBA)
|
|
109
|
+
|
|
110
|
+
- [ ] Add connection pool monitoring alert
|
|
111
|
+
- What: Alert when connections >80% of max
|
|
112
|
+
- How: Create CloudWatch/Grafana alert
|
|
113
|
+
- Impact: Early warning before exhaustion
|
|
114
|
+
- Risk: None (monitoring only)
|
|
115
|
+
- Rollback: Disable alert
|
|
116
|
+
- ETA: 15 minutes
|
|
117
|
+
- Owner: Jane (SRE)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
**Success Criteria**:
|
|
121
|
+
- [ ] Code fix deployed and verified
|
|
122
|
+
- [ ] Connection pool increased
|
|
123
|
+
- [ ] Monitoring alert configured
|
|
124
|
+
- [ ] No recurrence in 1 hour
|
|
125
|
+
- [ ] Load test passed (if applicable)
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
### Long-term (1 hour - days/weeks)
|
|
130
|
+
|
|
131
|
+
**Goal**: Permanent fix and prevention
|
|
132
|
+
|
|
133
|
+
**Actions**:
|
|
134
|
+
- [ ] [Action 1]
|
|
135
|
+
- **What**: [Detailed description]
|
|
136
|
+
- **Priority**: P1 / P2 / P3
|
|
137
|
+
- **Due Date**: [YYYY-MM-DD]
|
|
138
|
+
- **Owner**: [Who will do this]
|
|
139
|
+
|
|
140
|
+
**Example**:
|
|
141
|
+
```
|
|
142
|
+
- [ ] Add automated test for connection cleanup
|
|
143
|
+
- What: Integration test that verifies connections are closed in error paths
|
|
144
|
+
- Priority: P1
|
|
145
|
+
- Due Date: 2025-10-27
|
|
146
|
+
- Owner: Lisa (QA)
|
|
147
|
+
|
|
148
|
+
- [ ] Add connection timeout configuration
|
|
149
|
+
- What: Set connection_timeout = 30s in database config
|
|
150
|
+
- Priority: P2
|
|
151
|
+
- Due Date: 2025-10-28
|
|
152
|
+
- Owner: Tom (DBA)
|
|
153
|
+
|
|
154
|
+
- [ ] Review all database queries for connection leaks
|
|
155
|
+
- What: Audit all DB queries to ensure proper cleanup
|
|
156
|
+
- Priority: P3
|
|
157
|
+
- Due Date: 2025-11-02
|
|
158
|
+
- Owner: Mike (Developer)
|
|
159
|
+
|
|
160
|
+
- [ ] Load test for high-traffic events
|
|
161
|
+
- What: Load test with 10x normal traffic to find bottlenecks
|
|
162
|
+
- Priority: P3
|
|
163
|
+
- Due Date: 2025-11-10
|
|
164
|
+
- Owner: John (DevOps)
|
|
165
|
+
|
|
166
|
+
- [ ] Update runbook with new findings
|
|
167
|
+
- What: Document connection leak troubleshooting steps
|
|
168
|
+
- Priority: P3
|
|
169
|
+
- Due Date: 2025-10-28
|
|
170
|
+
- Owner: Jane (SRE)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
**Success Criteria**:
|
|
174
|
+
- [ ] All P1 actions completed
|
|
175
|
+
- [ ] Regression test added (prevents future occurrences)
|
|
176
|
+
- [ ] Monitoring improved (detect earlier)
|
|
177
|
+
- [ ] Runbook updated
|
|
178
|
+
- [ ] Post-mortem published
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## Risk Assessment
|
|
183
|
+
|
|
184
|
+
### Risks of Mitigation Actions
|
|
185
|
+
|
|
186
|
+
| Action | Risk Level | Risk Description | Mitigation |
|
|
187
|
+
|--------|------------|------------------|------------|
|
|
188
|
+
| [Action 1] | Low/Med/High | [What could go wrong] | [How to reduce risk] |
|
|
189
|
+
|
|
190
|
+
**Example**:
|
|
191
|
+
```
|
|
192
|
+
| Restart service | Low | Brief downtime (5s) | Use graceful restart, off-peak time |
|
|
193
|
+
| Deploy code fix | Medium | Bug in fix could worsen issue | Test in staging first, have rollback ready |
|
|
194
|
+
| Increase connection pool | Low | More memory usage | Server has capacity, monitor memory |
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Risks of NOT Mitigating
|
|
198
|
+
|
|
199
|
+
| Risk | Impact | Probability |
|
|
200
|
+
|------|--------|-------------|
|
|
201
|
+
| [Risk 1] | [Impact if we do nothing] | High/Med/Low |
|
|
202
|
+
|
|
203
|
+
**Example**:
|
|
204
|
+
```
|
|
205
|
+
| Service remains down | All users affected, revenue loss | High (will recur) |
|
|
206
|
+
| Connection leak worsens | Database crashes | High |
|
|
207
|
+
| SLA breach | Customer refunds, reputation damage | Medium |
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## Communication Plan
|
|
213
|
+
|
|
214
|
+
### Internal Communication
|
|
215
|
+
|
|
216
|
+
**Incident Channel**: #incident-YYYYMMDD-title
|
|
217
|
+
|
|
218
|
+
**Update Frequency**: Every [X] minutes
|
|
219
|
+
|
|
220
|
+
**Stakeholders to Notify**:
|
|
221
|
+
- [ ] Engineering team (#engineering)
|
|
222
|
+
- [ ] Customer support (#support)
|
|
223
|
+
- [ ] Management (#management)
|
|
224
|
+
- [ ] [Other teams]
|
|
225
|
+
|
|
226
|
+
**Update Template**:
|
|
227
|
+
```markdown
|
|
228
|
+
[HH:MM] Update:
|
|
229
|
+
- Status: [Investigating / Mitigating / Resolved]
|
|
230
|
+
- Root Cause: [Known / Under investigation]
|
|
231
|
+
- Current Action: [What we're doing now]
|
|
232
|
+
- Next Steps: [What's next]
|
|
233
|
+
- ETA: [Estimated resolution time]
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
### External Communication
|
|
239
|
+
|
|
240
|
+
**Status Page**: [URL]
|
|
241
|
+
|
|
242
|
+
**Update Frequency**: Every [X] minutes or when status changes
|
|
243
|
+
|
|
244
|
+
**Status Page Template**:
|
|
245
|
+
```markdown
|
|
246
|
+
[HH:MM] Investigating: We are currently investigating [issue description]. Our team is actively working on a resolution.
|
|
247
|
+
|
|
248
|
+
[HH:MM] Identified: We have identified the issue as [root cause]. We are implementing a fix. ETA: [time].
|
|
249
|
+
|
|
250
|
+
[HH:MM] Monitoring: The fix has been deployed. We are monitoring to ensure stability.
|
|
251
|
+
|
|
252
|
+
[HH:MM] Resolved: The issue has been fully resolved. All services are operating normally. We apologize for the inconvenience.
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
**Customer Email** (if needed):
|
|
256
|
+
- [ ] Draft email
|
|
257
|
+
- [ ] Approve with management
|
|
258
|
+
- [ ] Send to affected customers
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
## Validation
|
|
263
|
+
|
|
264
|
+
### Before Declaring Resolved
|
|
265
|
+
|
|
266
|
+
Verify all of the following:
|
|
267
|
+
|
|
268
|
+
- [ ] Root cause identified
|
|
269
|
+
- [ ] Immediate fix deployed and verified
|
|
270
|
+
- [ ] Service health check passing for >30 minutes
|
|
271
|
+
- [ ] Users able to access application
|
|
272
|
+
- [ ] Metrics returned to normal (response time, error rate, etc.)
|
|
273
|
+
- [ ] No active alerts
|
|
274
|
+
- [ ] Load test passed (if applicable)
|
|
275
|
+
- [ ] Customer support confirms no ongoing issues
|
|
276
|
+
|
|
277
|
+
### Monitoring After Resolution
|
|
278
|
+
|
|
279
|
+
Monitor for [X] hours after declaring resolved:
|
|
280
|
+
|
|
281
|
+
- [ ] [Metric 1] within normal range
|
|
282
|
+
- [ ] [Metric 2] within normal range
|
|
283
|
+
- [ ] [Metric 3] within normal range
|
|
284
|
+
- [ ] No error spikes
|
|
285
|
+
- [ ] No user complaints
|
|
286
|
+
|
|
287
|
+
**Example**:
|
|
288
|
+
```
|
|
289
|
+
- [ ] Connection pool <50% of max
|
|
290
|
+
- [ ] API response time <200ms (p95)
|
|
291
|
+
- [ ] Error rate <0.1%
|
|
292
|
+
- [ ] Database CPU <70%
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
---
|
|
296
|
+
|
|
297
|
+
## Rollback Plan
|
|
298
|
+
|
|
299
|
+
If mitigation actions fail or make things worse:
|
|
300
|
+
|
|
301
|
+
### Immediate Rollback
|
|
302
|
+
|
|
303
|
+
```bash
|
|
304
|
+
# Rollback code deployment
|
|
305
|
+
git revert <commit>
|
|
306
|
+
npm run deploy
|
|
307
|
+
|
|
308
|
+
# Rollback database config
|
|
309
|
+
ALTER SYSTEM SET max_connections = 100;
|
|
310
|
+
SELECT pg_reload_conf();
|
|
311
|
+
|
|
312
|
+
# Verify rollback
|
|
313
|
+
curl http://localhost/health
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### When to Rollback
|
|
317
|
+
|
|
318
|
+
Rollback if:
|
|
319
|
+
- [ ] Issue worsens after mitigation
|
|
320
|
+
- [ ] New errors appear
|
|
321
|
+
- [ ] Service remains down >X minutes after mitigation
|
|
322
|
+
- [ ] Metrics worsen (response time, error rate)
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
## Next Steps
|
|
327
|
+
|
|
328
|
+
After incident is resolved:
|
|
329
|
+
|
|
330
|
+
1. [ ] Create post-mortem (within 24 hours)
|
|
331
|
+
- Owner: [Name]
|
|
332
|
+
- Due: [Date]
|
|
333
|
+
|
|
334
|
+
2. [ ] Schedule post-mortem review meeting
|
|
335
|
+
- Date: [Date]
|
|
336
|
+
- Attendees: [List]
|
|
337
|
+
|
|
338
|
+
3. [ ] Track action items to completion
|
|
339
|
+
- Use: [JIRA/GitHub/etc.]
|
|
340
|
+
- Review: Weekly in team meeting
|
|
341
|
+
|
|
342
|
+
4. [ ] Update runbooks based on learnings
|
|
343
|
+
- Owner: [Name]
|
|
344
|
+
- Due: [Date]
|
|
345
|
+
|
|
346
|
+
5. [ ] Share learnings with organization
|
|
347
|
+
- Format: All-hands presentation / Email / Wiki
|
|
348
|
+
- Owner: [Name]
|
|
349
|
+
- Due: [Date]
|
|
350
|
+
|
|
351
|
+
---
|
|
352
|
+
|
|
353
|
+
## Appendix
|
|
354
|
+
|
|
355
|
+
### Commands Reference
|
|
356
|
+
|
|
357
|
+
```bash
|
|
358
|
+
# Useful commands for this incident
|
|
359
|
+
<command1>
|
|
360
|
+
<command2>
|
|
361
|
+
<command3>
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
### Links
|
|
365
|
+
|
|
366
|
+
- **Monitoring Dashboard**: [URL]
|
|
367
|
+
- **Runbook**: [URL]
|
|
368
|
+
- **Related Incidents**: [URL]
|
|
369
|
+
- **Incident Channel**: [Slack/Teams URL]
|
|
370
|
+
|
|
371
|
+
---
|
|
372
|
+
|
|
373
|
+
**Plan Created**: YYYY-MM-DD HH:MM UTC
|
|
374
|
+
**Plan Updated**: YYYY-MM-DD HH:MM UTC
|
|
375
|
+
**Status**: Active / Executed / Superseded
|