@garethdaine/agentops 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/.claude-plugin/plugin.json +10 -0
  2. package/LICENSE +21 -0
  3. package/README.md +410 -0
  4. package/agents/architecture-researcher.md +115 -0
  5. package/agents/code-critic.md +190 -0
  6. package/agents/delegation-router.md +40 -0
  7. package/agents/feature-researcher.md +117 -0
  8. package/agents/interrogator.md +11 -0
  9. package/agents/pitfalls-researcher.md +112 -0
  10. package/agents/plan-validator.md +173 -0
  11. package/agents/proposer.md +61 -0
  12. package/agents/security-reviewer.md +189 -0
  13. package/agents/skill-builder.md +43 -0
  14. package/agents/spec-compliance-reviewer.md +154 -0
  15. package/agents/stack-researcher.md +89 -0
  16. package/commands/build.md +766 -0
  17. package/commands/code-analysis.md +39 -0
  18. package/commands/code-field.md +22 -0
  19. package/commands/compliance-check.md +34 -0
  20. package/commands/configure.md +178 -0
  21. package/commands/cost-report.md +17 -0
  22. package/commands/enterprise/adr.md +78 -0
  23. package/commands/enterprise/brainstorm.md +461 -0
  24. package/commands/enterprise/design.md +203 -0
  25. package/commands/enterprise/dev-setup.md +136 -0
  26. package/commands/enterprise/docker-dev.md +229 -0
  27. package/commands/enterprise/e2e.md +233 -0
  28. package/commands/enterprise/feature.md +218 -0
  29. package/commands/enterprise/gap-analysis.md +204 -0
  30. package/commands/enterprise/handover.md +195 -0
  31. package/commands/enterprise/herd.md +152 -0
  32. package/commands/enterprise/knowledge.md +173 -0
  33. package/commands/enterprise/onboard.md +86 -0
  34. package/commands/enterprise/qa-check.md +80 -0
  35. package/commands/enterprise/reason.md +196 -0
  36. package/commands/enterprise/review.md +177 -0
  37. package/commands/enterprise/scaffold.md +153 -0
  38. package/commands/enterprise/status-report.md +101 -0
  39. package/commands/enterprise/tech-catalog.md +170 -0
  40. package/commands/enterprise/test-gen.md +138 -0
  41. package/commands/evolve.md +39 -0
  42. package/commands/flags.md +44 -0
  43. package/commands/interrogate.md +263 -0
  44. package/commands/lesson.md +15 -0
  45. package/commands/lessons.md +10 -0
  46. package/commands/plan.md +44 -0
  47. package/commands/prune.md +27 -0
  48. package/commands/star.md +17 -0
  49. package/commands/supply-chain-scan.md +44 -0
  50. package/commands/unicode-scan.md +63 -0
  51. package/commands/verify.md +41 -0
  52. package/commands/workflow.md +436 -0
  53. package/hooks/ai-guardrails.sh +114 -0
  54. package/hooks/audit-log.sh +26 -0
  55. package/hooks/auto-delegate.sh +45 -0
  56. package/hooks/auto-evolve.sh +22 -0
  57. package/hooks/auto-lesson.sh +26 -0
  58. package/hooks/auto-plan.sh +59 -0
  59. package/hooks/auto-test.sh +46 -0
  60. package/hooks/auto-verify.sh +30 -0
  61. package/hooks/budget-check.sh +24 -0
  62. package/hooks/code-field-preamble.sh +30 -0
  63. package/hooks/compliance-gate.sh +50 -0
  64. package/hooks/content-trust.sh +22 -0
  65. package/hooks/credential-redact.sh +23 -0
  66. package/hooks/delegation-trust.sh +15 -0
  67. package/hooks/detect-test-run.sh +19 -0
  68. package/hooks/enforcement-lib.sh +60 -0
  69. package/hooks/evolve-gate.sh +32 -0
  70. package/hooks/evolve-lib.sh +32 -0
  71. package/hooks/exfiltration-check.sh +67 -0
  72. package/hooks/failure-collector.sh +27 -0
  73. package/hooks/feature-flags.sh +67 -0
  74. package/hooks/file-provenance.sh +31 -0
  75. package/hooks/flag-utils.sh +36 -0
  76. package/hooks/hooks.json +145 -0
  77. package/hooks/injection-scan.sh +58 -0
  78. package/hooks/integrity-verify.sh +91 -0
  79. package/hooks/lessons-check.sh +17 -0
  80. package/hooks/lockfile-audit.sh +109 -0
  81. package/hooks/patterns-lib.sh +22 -0
  82. package/hooks/plan-gate.sh +18 -0
  83. package/hooks/redact-lib.sh +15 -0
  84. package/hooks/runtime-mode.sh +56 -0
  85. package/hooks/session-cleanup.sh +74 -0
  86. package/hooks/skill-validator.sh +28 -0
  87. package/hooks/standards-enforce.sh +106 -0
  88. package/hooks/star-gate.sh +93 -0
  89. package/hooks/star-preamble.sh +10 -0
  90. package/hooks/telemetry.sh +33 -0
  91. package/hooks/todo-prune.sh +84 -0
  92. package/hooks/unicode-firewall.sh +122 -0
  93. package/hooks/unicode-lib.sh +66 -0
  94. package/hooks/unicode-scan-session.sh +96 -0
  95. package/hooks/validate-command.sh +103 -0
  96. package/hooks/validate-env.sh +51 -0
  97. package/hooks/validate-path.sh +81 -0
  98. package/package.json +40 -0
  99. package/settings.json +6 -0
  100. package/templates/ai-config/tool-standards.md +56 -0
  101. package/templates/architecture/api-first.md +192 -0
  102. package/templates/architecture/auth-patterns.md +302 -0
  103. package/templates/architecture/caching-strategy.md +359 -0
  104. package/templates/architecture/database-patterns.md +347 -0
  105. package/templates/architecture/event-driven.md +252 -0
  106. package/templates/architecture/integration-patterns.md +185 -0
  107. package/templates/architecture/multi-tenancy.md +104 -0
  108. package/templates/architecture/service-boundaries.md +200 -0
  109. package/templates/build/brief-template.md +86 -0
  110. package/templates/build/summary-template.md +100 -0
  111. package/templates/build/task-plan-template.md +133 -0
  112. package/templates/communication/effort-estimate.md +54 -0
  113. package/templates/communication/incident-response.md +59 -0
  114. package/templates/communication/post-mortem.md +109 -0
  115. package/templates/communication/risk-register.md +43 -0
  116. package/templates/communication/sprint-demo-checklist.md +64 -0
  117. package/templates/communication/stakeholder-presentation-outline.md +84 -0
  118. package/templates/communication/technical-proposal.md +77 -0
  119. package/templates/delivery/deployment/deployment-checklist.md +49 -0
  120. package/templates/delivery/design/solution-design-checklist.md +37 -0
  121. package/templates/delivery/discovery/stakeholder-questions.md +33 -0
  122. package/templates/delivery/handover/knowledge-transfer-checklist.md +75 -0
  123. package/templates/delivery/handover/operational-runbook.md +117 -0
  124. package/templates/delivery/handover/support-escalation-matrix.md +56 -0
  125. package/templates/delivery/implementation/blocker-escalation-template.md +55 -0
  126. package/templates/delivery/implementation/sprint-planning-template.md +49 -0
  127. package/templates/delivery/implementation/task-decomposition-guide.md +59 -0
  128. package/templates/delivery/qa/test-plan-template.md +76 -0
  129. package/templates/delivery/qa/test-results-template.md +55 -0
  130. package/templates/delivery/qa/uat-signoff-template.md +44 -0
  131. package/templates/governance/codeowners.md +60 -0
  132. package/templates/integration/adapter-pattern.md +160 -0
  133. package/templates/scaffolds/env-validation.md +85 -0
  134. package/templates/scaffolds/error-handling.md +171 -0
  135. package/templates/scaffolds/graceful-shutdown.md +139 -0
  136. package/templates/scaffolds/health-check.md +109 -0
  137. package/templates/scaffolds/structured-logging.md +134 -0
  138. package/templates/standards/engineering-standards.md +413 -0
  139. package/templates/standards/standards-checklist.md +125 -0
  140. package/templates/tech-catalog.json +663 -0
  141. package/templates/utilities/project-detection.md +75 -0
  142. package/templates/utilities/requirements-collection.md +68 -0
  143. package/templates/utilities/template-rendering.md +81 -0
  144. package/templates/workflows/architecture-decision.md +90 -0
  145. package/templates/workflows/bug-investigation.md +83 -0
  146. package/templates/workflows/feature-implementation.md +80 -0
  147. package/templates/workflows/refactoring.md +83 -0
  148. package/templates/workflows/spike-exploration.md +82 -0
@@ -0,0 +1,109 @@
1
+ # Communication Template: Post-Mortem / RCA
2
+
3
+ *Distinct from the incident response template — this is the after-action review conducted 2-5 days after an incident is resolved.*
4
+
5
+ ```markdown
6
+ # Post-Mortem: [Incident Title]
7
+
8
+ **Incident date:** [date]
9
+ **Post-mortem date:** [date]
10
+ **Severity:** P1 / P2 / P3
11
+ **Duration:** [hours/minutes from detection to resolution]
12
+ **Author:** [name]
13
+ **Attendees:** [names of post-mortem participants]
14
+
15
+ ---
16
+
17
+ ## Summary
18
+
19
+ [2-3 sentences: what happened, who was affected, how it was resolved]
20
+
21
+ ---
22
+
23
+ ## Timeline
24
+
25
+ | Time (UTC) | Event |
26
+ |-----------|-------|
27
+ | [HH:MM] | [First sign of issue — alert, user report, monitoring] |
28
+ | [HH:MM] | [Issue acknowledged by on-call] |
29
+ | [HH:MM] | [Investigation started] |
30
+ | [HH:MM] | [Root cause identified] |
31
+ | [HH:MM] | [Fix deployed] |
32
+ | [HH:MM] | [Monitoring confirms resolution] |
33
+ | [HH:MM] | [All-clear communicated to stakeholders] |
34
+
35
+ ---
36
+
37
+ ## Root Cause Analysis
38
+
39
+ ### What happened?
40
+ [Factual description of the failure chain]
41
+
42
+ ### 5 Whys
43
+ 1. **Why** did [symptom] occur? → Because [cause 1]
44
+ 2. **Why** did [cause 1] happen? → Because [cause 2]
45
+ 3. **Why** did [cause 2] happen? → Because [cause 3]
46
+ 4. **Why** did [cause 3] happen? → Because [cause 4]
47
+ 5. **Why** did [cause 4] happen? → Because [root cause]
48
+
49
+ ### Contributing Factors
50
+ - [Factor that made the impact worse or detection slower]
51
+ - [Factor — e.g., missing monitoring, documentation gap, test gap]
52
+
53
+ ---
54
+
55
+ ## Impact
56
+
57
+ | Metric | Value |
58
+ |--------|-------|
59
+ | Users affected | [count or percentage] |
60
+ | Duration | [time] |
61
+ | Revenue impact | [if applicable] |
62
+ | Data affected | [any data loss/corruption — be specific] |
63
+ | SLA breach | Yes/No — [details] |
64
+
65
+ ---
66
+
67
+ ## What Went Well
68
+
69
+ 1. [Thing that worked — detection, response, communication]
70
+ 2. [Thing that worked]
71
+ 3. [Thing that worked]
72
+
73
+ ## What Went Poorly
74
+
75
+ 1. [Thing that didn't work — slow detection, unclear runbook, missing test]
76
+ 2. [Thing that didn't work]
77
+ 3. [Thing that didn't work]
78
+
79
+ ## Where We Got Lucky
80
+
81
+ 1. [Thing that could have been worse but wasn't — acknowledge luck explicitly]
82
+
83
+ ---
84
+
85
+ ## Action Items
86
+
87
+ | # | Action | Type | Owner | Due Date | Status |
88
+ |---|--------|------|-------|----------|--------|
89
+ | 1 | [preventive action] | Prevention | [name] | [date] | Open |
90
+ | 2 | [detective action — better monitoring] | Detection | [name] | [date] | Open |
91
+ | 3 | [process improvement] | Process | [name] | [date] | Open |
92
+
93
+ **Types:**
94
+ - **Prevention** — stop this from happening again
95
+ - **Detection** — catch it faster next time
96
+ - **Mitigation** — reduce impact if it does happen
97
+ - **Process** — improve response procedures
98
+
99
+ ---
100
+
101
+ ## Follow-up Review
102
+
103
+ **Scheduled:** [date, 2-4 weeks after post-mortem]
104
+ **Purpose:** Verify all action items are completed and effective
105
+
106
+ ---
107
+
108
+ *This post-mortem is blameless. We focus on systems and processes, not individuals. The goal is to learn and improve, not to assign fault.*
109
+ ```
@@ -0,0 +1,43 @@
1
+ # Communication Template: Risk Register
2
+
3
+ ```markdown
4
+ # Risk Register — [Project Name]
5
+
6
+ **Last updated:** [date]
7
+ **Owner:** [name]
8
+
9
+ ## Active Risks
10
+
11
+ | ID | Risk | Category | Likelihood | Impact | Score | Owner | Mitigation | Status |
12
+ |----|------|----------|-----------|--------|-------|-------|------------|--------|
13
+ | R-001 | [description] | Technical/Business/Resource/External | H/M/L | H/M/L | [H*I] | [name] | [mitigation plan] | Open/Mitigated/Closed |
14
+
15
+ ## Risk Scoring
16
+
17
+ - **High (H):** 3 points
18
+ - **Medium (M):** 2 points
19
+ - **Low (L):** 1 point
20
+ - **Score:** Likelihood x Impact (1-9 scale)
21
+ - **Critical:** Score >= 6
22
+ - **Significant:** Score 4-5
23
+ - **Manageable:** Score <= 3
24
+
25
+ ## Risk Categories
26
+
27
+ - **Technical:** Architecture, performance, security, integration
28
+ - **Business:** Scope change, stakeholder alignment, market shifts
29
+ - **Resource:** Team availability, skill gaps, dependencies
30
+ - **External:** Vendor dependencies, regulatory, third-party APIs
31
+
32
+ ## Escalation Thresholds
33
+
34
+ - **Score >= 6:** Escalate to project sponsor immediately
35
+ - **Score 4-5:** Review in weekly status meeting
36
+ - **Score <= 3:** Monitor, review monthly
37
+
38
+ ## Closed Risks
39
+
40
+ | ID | Risk | Resolution | Closed Date |
41
+ |----|------|-----------|-------------|
42
+ | [id] | [description] | [how it was resolved] | [date] |
43
+ ```
@@ -0,0 +1,64 @@
1
+ # Communication Template: Sprint Demo Checklist
2
+
3
+ ## Pre-Demo Preparation (day before)
4
+
5
+ ### Environment
6
+ - [ ] Demo environment stable and accessible
7
+ - [ ] Test data seeded and representative
8
+ - [ ] All features deployed and verified
9
+ - [ ] No pending database migrations
10
+ - [ ] Environment matches production closely enough for credibility
11
+
12
+ ### Content
13
+ - [ ] Feature list for demo confirmed with product owner
14
+ - [ ] Demo script written with exact steps
15
+ - [ ] Backup plan for each feature (what if it fails?)
16
+ - [ ] Screenshots captured in case of live demo failure
17
+ - [ ] No debug logging or test data visible
18
+
19
+ ### Logistics
20
+ - [ ] Meeting invite sent with agenda
21
+ - [ ] Screen sharing tested
22
+ - [ ] Recording set up (if applicable)
23
+ - [ ] Time allocated: 5 min per feature + 10 min Q&A
24
+
25
+ ## Demo Script Structure
26
+
27
+ ```markdown
28
+ ### 1. Opening (2 minutes)
29
+ - Sprint goal reminder
30
+ - What was committed vs delivered
31
+
32
+ ### 2. Feature Walkthrough (5 min per feature)
33
+ For each feature:
34
+ - **Context:** Why this matters (business value, not technical detail)
35
+ - **Demo:** Show the working feature (happy path)
36
+ - **Edge case:** Show one error case handled gracefully
37
+ - **Questions:** Pause for stakeholder input
38
+
39
+ ### 3. Technical Highlights (3 minutes)
40
+ - Architecture decisions made this sprint
41
+ - Technical debt addressed
42
+ - Performance improvements (with metrics if available)
43
+
44
+ ### 4. Blockers & Risks (2 minutes)
45
+ - Any unresolved blockers
46
+ - Risks to upcoming work
47
+ - Decisions needed from stakeholders
48
+
49
+ ### 5. Next Sprint Preview (3 minutes)
50
+ - Planned work for next sprint
51
+ - Dependencies or input needed from client
52
+
53
+ ### 6. Q&A (10 minutes)
54
+ - Open floor for questions
55
+ - Capture action items
56
+ ```
57
+
58
+ ## Post-Demo Follow-up
59
+
60
+ - [ ] Send demo recording (if recorded)
61
+ - [ ] Send summary email with key decisions and action items
62
+ - [ ] Update task tracker with any scope changes discussed
63
+ - [ ] Log new requirements or feedback captured during Q&A
64
+ - [ ] Schedule follow-up meetings for decisions needed
@@ -0,0 +1,84 @@
1
+ # Communication Template: Stakeholder Presentation Outline
2
+
3
+ ```markdown
4
+ # [Project Name] — Progress Update
5
+
6
+ **Date:** [date]
7
+ **Presented by:** [name]
8
+ **Audience:** [stakeholder names/roles]
9
+
10
+ ---
11
+
12
+ ## Executive Summary (1 slide / 3 sentences max)
13
+
14
+ [What was delivered, what's the current status, what decisions are needed]
15
+
16
+ ---
17
+
18
+ ## Progress Update
19
+
20
+ ### Delivered This Period
21
+ | Deliverable | Business Impact | Status |
22
+ |-------------|----------------|--------|
23
+ | [feature/milestone] | [how it helps the business] | Complete |
24
+
25
+ ### In Progress
26
+ | Item | Progress | ETA | Risk Level |
27
+ |------|----------|-----|-----------|
28
+ | [item] | [%] or On Track/At Risk | [date] | Green/Amber/Red |
29
+
30
+ ### Planned Next
31
+ | Item | Priority | Dependencies |
32
+ |------|----------|-------------|
33
+ | [item] | Must/Should/Could | [what's needed] |
34
+
35
+ ---
36
+
37
+ ## Key Decisions Needed
38
+
39
+ | # | Decision | Options | Recommendation | Impact of Delay |
40
+ |---|----------|---------|---------------|----------------|
41
+ | 1 | [what needs deciding] | A: [option] / B: [option] | [recommendation with rationale] | [what happens if we don't decide now] |
42
+
43
+ *Frame decisions as options with clear recommendations. Make it easy for stakeholders to say yes.*
44
+
45
+ ---
46
+
47
+ ## Risks & Mitigations
48
+
49
+ | Risk | Status | Mitigation | Owner |
50
+ |------|--------|------------|-------|
51
+ | [risk] | New/Active/Mitigated | [action] | [name] |
52
+
53
+ ---
54
+
55
+ ## Budget & Timeline
56
+
57
+ | Metric | Planned | Actual | Variance |
58
+ |--------|---------|--------|----------|
59
+ | Timeline | [end date] | [projected] | [+/- weeks] |
60
+ | Budget (effort) | [days] | [actual] | [+/- days] |
61
+
62
+ ---
63
+
64
+ ## Next Steps
65
+
66
+ | Action | Owner | Due Date |
67
+ |--------|-------|----------|
68
+ | [action from this meeting] | [name] | [date] |
69
+
70
+ ---
71
+
72
+ ## Appendix (if needed)
73
+ - Technical architecture diagram
74
+ - Detailed metrics
75
+ - Demo environment access
76
+ ```
77
+
78
+ ## Presentation Tips
79
+
80
+ - **Lead with outcomes, not activities** — "Users can now X" not "We built Y"
81
+ - **3 sentences per slide max** — stakeholders read ahead, not listen
82
+ - **Red/Amber/Green status** — universally understood, no explanation needed
83
+ - **One decision per slide** — don't bundle decisions, they get lost
84
+ - **End with clear next steps** — every meeting should produce actions
@@ -0,0 +1,77 @@
1
+ # Communication Template: Technical Proposal
2
+
3
+ ```markdown
4
+ # Technical Proposal: [Project/Feature Name]
5
+
6
+ **Prepared for:** [Client Name]
7
+ **Prepared by:** [Company/Team]
8
+ **Date:** [date]
9
+ **Version:** [1.0]
10
+
11
+ ---
12
+
13
+ ## Executive Summary
14
+
15
+ [2-3 paragraphs summarising the proposed solution, key benefits, and expected outcomes. Write for a non-technical senior stakeholder.]
16
+
17
+ ## Problem Statement
18
+
19
+ [What problem are we solving? What is the business impact of not solving it?]
20
+
21
+ ## Proposed Solution
22
+
23
+ ### Overview
24
+ [High-level solution description]
25
+
26
+ ### Architecture
27
+ [Architecture diagram or description — keep it accessible to non-technical readers]
28
+
29
+ ### Key Features
30
+ 1. [Feature] — [business benefit]
31
+ 2. [Feature] — [business benefit]
32
+ 3. [Feature] — [business benefit]
33
+
34
+ ## Approach
35
+
36
+ ### Phase 1: [Name] ([duration])
37
+ - [Deliverable]
38
+ - [Deliverable]
39
+
40
+ ### Phase 2: [Name] ([duration])
41
+ - [Deliverable]
42
+ - [Deliverable]
43
+
44
+ ## Timeline
45
+
46
+ | Phase | Duration | Start | End |
47
+ |-------|----------|-------|-----|
48
+ | Discovery | [weeks] | [date] | [date] |
49
+ | Design | [weeks] | [date] | [date] |
50
+ | Implementation | [weeks] | [date] | [date] |
51
+ | QA & Testing | [weeks] | [date] | [date] |
52
+ | Deployment | [weeks] | [date] | [date] |
53
+
54
+ ## Effort Estimate
55
+
56
+ | Role | Effort | Rate | Cost |
57
+ |------|--------|------|------|
58
+ | [role] | [days/weeks] | [rate] | [cost] |
59
+
60
+ **Total estimated effort:** [total]
61
+
62
+ ## Assumptions
63
+
64
+ 1. [Assumption]
65
+ 2. [Assumption]
66
+
67
+ ## Risks
68
+
69
+ | Risk | Likelihood | Impact | Mitigation |
70
+ |------|-----------|--------|------------|
71
+ | [risk] | H/M/L | H/M/L | [mitigation] |
72
+
73
+ ## Next Steps
74
+
75
+ 1. [Action] — [owner] — [date]
76
+ 2. [Action] — [owner] — [date]
77
+ ```
@@ -0,0 +1,49 @@
1
+ # Delivery Template: Deployment Checklist
2
+
3
+ ## Pre-Deployment
4
+
5
+ - [ ] All tests passing in CI
6
+ - [ ] Code review approved
7
+ - [ ] QA check completed (`/agentops:qa-check`)
8
+ - [ ] Environment variables configured in target environment
9
+ - [ ] Database migrations tested in staging
10
+ - [ ] Feature flags configured for gradual rollout
11
+ - [ ] Rollback plan documented and tested
12
+ - [ ] Monitoring/alerting configured
13
+ - [ ] Stakeholders notified of deployment window
14
+
15
+ ## Deployment
16
+
17
+ - [ ] Create deployment tag/release
18
+ - [ ] Run database migrations
19
+ - [ ] Deploy application
20
+ - [ ] Verify health check endpoints respond
21
+ - [ ] Run smoke tests against deployed environment
22
+ - [ ] Verify key user flows work end-to-end
23
+ - [ ] Check error rates in monitoring
24
+ - [ ] Check response times in monitoring
25
+
26
+ ## Post-Deployment
27
+
28
+ - [ ] Monitor error rates for 30 minutes
29
+ - [ ] Verify no performance degradation
30
+ - [ ] Update status page / notify stakeholders
31
+ - [ ] Close related tickets/issues
32
+ - [ ] Update documentation if needed
33
+ - [ ] Schedule post-deployment review if significant changes
34
+
35
+ ## Rollback Procedure
36
+
37
+ 1. Identify the issue triggering rollback
38
+ 2. Revert to previous deployment tag
39
+ 3. Run rollback database migrations (if applicable)
40
+ 4. Verify health checks pass
41
+ 5. Notify stakeholders of rollback
42
+ 6. Document what went wrong for post-mortem
43
+
44
+ ## Zero-Downtime Deployment Patterns
45
+
46
+ - **Blue/Green:** Maintain two identical environments, switch traffic
47
+ - **Canary:** Route small percentage of traffic to new version, gradually increase
48
+ - **Rolling:** Update instances one at a time behind load balancer
49
+ - **Feature flags:** Deploy code but gate features behind flags
@@ -0,0 +1,37 @@
1
+ # Design Template: Solution Design Checklist
2
+
3
+ ## Architecture
4
+ - [ ] High-level architecture defined
5
+ - [ ] Component boundaries clear
6
+ - [ ] Technology choices justified
7
+ - [ ] Integration points mapped
8
+ - [ ] Data flow documented
9
+
10
+ ## Security
11
+ - [ ] Authentication approach defined
12
+ - [ ] Authorization model designed
13
+ - [ ] Data protection strategy documented
14
+ - [ ] Compliance requirements addressed
15
+
16
+ ## Data
17
+ - [ ] Data model designed
18
+ - [ ] Migration strategy defined (if applicable)
19
+ - [ ] Backup/recovery plan
20
+ - [ ] Data retention policy
21
+
22
+ ## Infrastructure
23
+ - [ ] Hosting environment selected
24
+ - [ ] Scaling strategy defined
25
+ - [ ] Monitoring approach defined
26
+ - [ ] CI/CD pipeline designed
27
+
28
+ ## Risk
29
+ - [ ] Technical risks identified and mitigated
30
+ - [ ] Business risks flagged to stakeholders
31
+ - [ ] Fallback/rollback plans defined
32
+
33
+ ## Validation
34
+ - [ ] Design reviewed by senior engineer
35
+ - [ ] Design approved by stakeholder
36
+ - [ ] Effort estimate produced
37
+ - [ ] Timeline agreed
@@ -0,0 +1,33 @@
1
+ # Discovery Template: Stakeholder Questions
2
+
3
+ ## Business Context
4
+ 1. What problem are we solving?
5
+ 2. Who experiences this problem? How often?
6
+ 3. What is the business impact of not solving it?
7
+ 4. What does success look like? How will we measure it?
8
+ 5. What is the budget and timeline?
9
+
10
+ ## Users & Stakeholders
11
+ 1. Who are the primary users? Secondary users?
12
+ 2. What are their technical capabilities?
13
+ 3. How many concurrent users expected?
14
+ 4. Who is the project sponsor?
15
+ 5. Who approves deliverables?
16
+
17
+ ## Existing Systems
18
+ 1. What systems exist today that are relevant?
19
+ 2. What integrations are required?
20
+ 3. What data needs to migrate?
21
+ 4. What are the current pain points?
22
+
23
+ ## Constraints
24
+ 1. Regulatory/compliance requirements?
25
+ 2. Technology constraints (approved vendors, existing infrastructure)?
26
+ 3. Team/skill constraints?
27
+ 4. Timeline hard deadlines?
28
+
29
+ ## Scope
30
+ 1. What is IN scope?
31
+ 2. What is OUT of scope?
32
+ 3. What is deferred to future phases?
33
+ 4. What are the acceptance criteria?
@@ -0,0 +1,75 @@
1
+ # Delivery Template: Knowledge Transfer Checklist
2
+
3
+ ```markdown
4
+ # Knowledge Transfer Checklist — [Project Name]
5
+
6
+ **From:** [delivering team]
7
+ **To:** [receiving team/client]
8
+ **Date:** [date]
9
+
10
+ ## Sessions
11
+
12
+ ### Session 1: Architecture Overview (1-2 hours)
13
+ - [ ] High-level architecture walkthrough
14
+ - [ ] Key architectural decisions explained (link to ADRs)
15
+ - [ ] Module/service boundaries and responsibilities
16
+ - [ ] Data flow diagrams reviewed
17
+ - [ ] Integration points with external systems
18
+ - [ ] Security architecture (auth, RBAC, tenant isolation)
19
+ - [ ] Q&A
20
+
21
+ ### Session 2: Codebase Walkthrough (2-3 hours)
22
+ - [ ] Repository structure and navigation
23
+ - [ ] Key files and their purposes
24
+ - [ ] Coding conventions and patterns
25
+ - [ ] Build process and tooling
26
+ - [ ] Testing approach and running tests
27
+ - [ ] Common development tasks (add endpoint, add model, etc.)
28
+ - [ ] Q&A
29
+
30
+ ### Session 3: Operations & Deployment (1-2 hours)
31
+ - [ ] Deployment pipeline walkthrough
32
+ - [ ] Environment configuration
33
+ - [ ] Monitoring dashboards and alerting
34
+ - [ ] Log access and common queries
35
+ - [ ] Runbook walkthrough (startup, shutdown, troubleshooting)
36
+ - [ ] Backup and recovery procedures
37
+ - [ ] Q&A
38
+
39
+ ### Session 4: Incident Response (30-60 minutes)
40
+ - [ ] Incident severity definitions
41
+ - [ ] Escalation procedures
42
+ - [ ] Common failure modes and their fixes
43
+ - [ ] Rollback procedures
44
+ - [ ] Communication templates for incidents
45
+ - [ ] Q&A
46
+
47
+ ## Documentation Delivered
48
+ - [ ] Architecture overview document
49
+ - [ ] Operational runbook
50
+ - [ ] API documentation
51
+ - [ ] Environment setup guide
52
+ - [ ] ADR archive
53
+ - [ ] Test plan and current coverage report
54
+
55
+ ## Access Transferred
56
+ - [ ] Source code repository (read + write)
57
+ - [ ] CI/CD pipeline (admin access)
58
+ - [ ] Cloud infrastructure console
59
+ - [ ] Monitoring and alerting dashboards
60
+ - [ ] Log aggregation platform
61
+ - [ ] Secret management system
62
+ - [ ] Domain registrar / DNS management
63
+ - [ ] Third-party service accounts (auth, email, storage)
64
+
65
+ ## Knowledge Gaps Identified
66
+ | Gap | Impact | Remediation |
67
+ |-----|--------|-------------|
68
+ | [gap] | [impact] | [plan to address] |
69
+
70
+ ## Sign-off
71
+ | Role | Name | Confirmed | Date |
72
+ |------|------|-----------|------|
73
+ | Delivering lead | [name] | [ ] | [date] |
74
+ | Receiving lead | [name] | [ ] | [date] |
75
+ ```
@@ -0,0 +1,117 @@
1
+ # Delivery Template: Operational Runbook
2
+
3
+ ```markdown
4
+ # Operational Runbook — [Project Name]
5
+
6
+ **Last Updated:** [date]
7
+ **Owner:** [team/person]
8
+ **Version:** [1.0]
9
+
10
+ ## 1. System Overview
11
+
12
+ **Architecture:** [brief description — e.g., Next.js frontend, Express API, PostgreSQL, Redis, deployed on AWS ECS]
13
+
14
+ | Component | Technology | URL/Endpoint | Health Check |
15
+ |-----------|-----------|-------------|-------------|
16
+ | Frontend | [tech] | [URL] | [health URL] |
17
+ | API | [tech] | [URL] | [health URL] |
18
+ | Database | [tech] | [connection string ref] | [query] |
19
+ | Cache | [tech] | [connection string ref] | PING |
20
+
21
+ ## 2. Startup / Shutdown
22
+
23
+ ### Start Services
24
+ ```bash
25
+ # Start database
26
+ docker compose up -d postgres redis
27
+
28
+ # Run migrations
29
+ npx prisma migrate deploy
30
+
31
+ # Start application
32
+ pnpm start
33
+ ```
34
+
35
+ ### Stop Services
36
+ ```bash
37
+ # Graceful shutdown (drains connections)
38
+ kill -SIGTERM <pid>
39
+
40
+ # Or via Docker
41
+ docker compose down
42
+ ```
43
+
44
+ ### Restart Procedure
45
+ 1. Notify stakeholders of planned restart
46
+ 2. Stop accepting new connections (remove from load balancer)
47
+ 3. Wait for in-flight requests to complete (30s grace period)
48
+ 4. Stop services
49
+ 5. Start services
50
+ 6. Verify health checks pass
51
+ 7. Add back to load balancer
52
+
53
+ ## 3. Monitoring & Alerting
54
+
55
+ | Metric | Warning Threshold | Critical Threshold | Dashboard |
56
+ |--------|------------------|-------------------|-----------|
57
+ | API response time (p95) | >500ms | >2000ms | [link] |
58
+ | Error rate | >1% | >5% | [link] |
59
+ | CPU utilisation | >70% | >90% | [link] |
60
+ | Memory usage | >75% | >90% | [link] |
61
+ | Database connections | >80% pool | >95% pool | [link] |
62
+ | Disk usage | >80% | >90% | [link] |
63
+
64
+ ## 4. Common Troubleshooting
65
+
66
+ ### API returning 500 errors
67
+ 1. Check application logs: `docker logs <container> --tail 100`
68
+ 2. Check database connectivity: `SELECT 1` via psql
69
+ 3. Check Redis connectivity: `redis-cli PING`
70
+ 4. Check recent deployments: `git log --oneline -5`
71
+ 5. If caused by recent deployment: rollback
72
+
73
+ ### Database connection pool exhausted
74
+ 1. Check current connections: `SELECT count(*) FROM pg_stat_activity`
75
+ 2. Kill idle connections if needed
76
+ 3. Review connection pool settings
77
+ 4. Check for connection leaks (unclosed transactions)
78
+
79
+ ### High memory usage
80
+ 1. Check for memory leaks: monitor heap growth over time
81
+ 2. Review recent code changes for unbounded caches or collections
82
+ 3. Restart service if immediate relief needed
83
+ 4. Profile with Node.js inspector if recurring
84
+
85
+ ## 5. Backup & Recovery
86
+
87
+ | Data | Frequency | Retention | Location | Recovery Time |
88
+ |------|-----------|-----------|----------|--------------|
89
+ | Database | Daily + WAL | 30 days | [S3 bucket] | <1 hour |
90
+ | File uploads | Real-time (S3) | Indefinite | [S3 bucket] | <30 min |
91
+ | Application config | Git | Indefinite | Repository | <5 min |
92
+
93
+ ### Recovery Procedure
94
+ 1. Identify point-in-time for recovery
95
+ 2. Restore database from backup
96
+ 3. Verify data integrity
97
+ 4. Replay WAL logs if needed for point-in-time recovery
98
+ 5. Verify application functions correctly
99
+ 6. Notify stakeholders
100
+
101
+ ## 6. Scaling
102
+
103
+ | Trigger | Action | Limit |
104
+ |---------|--------|-------|
105
+ | CPU >70% sustained 5min | Add API instance | Max 10 instances |
106
+ | Memory >80% | Add API instance | Max 10 instances |
107
+ | Database connections >80% | Increase pool size | Max 100 |
108
+ | Response time p95 >1s | Add API instance + review queries | — |
109
+
110
+ ## 7. Contacts
111
+
112
+ | Role | Name | Contact | Escalation |
113
+ |------|------|---------|------------|
114
+ | On-call engineer | [name] | [phone/slack] | Level 1 |
115
+ | Tech lead | [name] | [phone/slack] | Level 2 |
116
+ | Infrastructure | [name] | [phone/slack] | Level 3 |
117
+ ```