@jetrabbits/agentic 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +8 -0
- package/CHANGELOG.md +3 -0
- package/Makefile +21 -5
- package/README.md +17 -4
- package/agentic +78 -7
- package/areas/devops/ci-cd/workflows/onboard-repo.md +29 -0
- package/areas/devops/ci-cd/workflows/pipeline-debug.md +26 -0
- package/areas/devops/ci-cd/workflows/release-pipeline.md +53 -0
- package/areas/devops/database-ops/workflows/backup-verify.md +27 -0
- package/areas/devops/database-ops/workflows/db-incident.md +30 -0
- package/areas/devops/devsecops/workflows/policy-onboard.md +34 -0
- package/areas/devops/devsecops/workflows/security-scan-pipeline.md +33 -0
- package/areas/devops/infrastructure/workflows/destroy-environment.md +31 -0
- package/areas/devops/infrastructure/workflows/drift-remediation.md +29 -0
- package/areas/devops/infrastructure/workflows/module-development.md +32 -0
- package/areas/devops/infrastructure/workflows/provision-environment.md +29 -0
- package/areas/devops/kubernetes/workflows/cluster-bootstrap.md +36 -0
- package/areas/devops/kubernetes/workflows/debug-workload.md +29 -0
- package/areas/devops/kubernetes/workflows/onboard-service.md +35 -0
- package/areas/devops/kubernetes/workflows/upgrade-cluster.md +30 -0
- package/areas/devops/networking/workflows/onboard-ingress.md +27 -0
- package/areas/devops/networking/workflows/service-mesh-onboard.md +27 -0
- package/areas/devops/observability/workflows/alert-investigation.md +29 -0
- package/areas/devops/observability/workflows/observability-stack-setup.md +33 -0
- package/areas/devops/observability/workflows/onboard-service-monitoring.md +31 -0
- package/areas/devops/sre/workflows/incident-response.md +48 -0
- package/areas/devops/sre/workflows/postmortem.md +32 -0
- package/areas/devops/sre/workflows/slo-review.md +35 -1
- package/areas/software/backend/workflows/add-migration.md +33 -0
- package/areas/software/backend/workflows/create-endpoint.md +40 -0
- package/areas/software/backend/workflows/debug-issue.md +31 -0
- package/areas/software/backend/workflows/develop-epic.md +37 -0
- package/areas/software/backend/workflows/develop-feature.md +44 -0
- package/areas/software/backend/workflows/refactor-module.md +35 -0
- package/areas/software/backend/workflows/test-feature.md +30 -0
- package/areas/software/data-engineering/workflows/backfill-data.md +25 -0
- package/areas/software/data-engineering/workflows/data-quality-incident.md +31 -0
- package/areas/software/data-engineering/workflows/lineage-trace.md +25 -0
- package/areas/software/data-engineering/workflows/new-model.md +30 -0
- package/areas/software/data-engineering/workflows/schema-migration.md +29 -0
- package/areas/software/frontend/workflows/a11y-fix.md +30 -0
- package/areas/software/frontend/workflows/bundle-analyze.md +28 -0
- package/areas/software/frontend/workflows/release-prep.md +33 -0
- package/areas/software/frontend/workflows/scaffold-component.md +32 -0
- package/areas/software/frontend/workflows/visual-regression.md +32 -0
- package/areas/software/full-stack/workflows/backend-project-full-cycle.md +47 -2
- package/areas/software/full-stack/workflows/debug-issue.md +29 -0
- package/areas/software/full-stack/workflows/develop-feature.md +38 -0
- package/areas/software/full-stack/workflows/feature-implementation-flow.md +38 -0
- package/areas/software/full-stack/workflows/testing-ci-pipeline.md +30 -0
- package/areas/software/general/workflows/code-review-workflow.md +31 -0
- package/areas/software/general/workflows/development-cycle-workflow.md +38 -0
- package/areas/software/general/workflows/project-setup-workflow.md +38 -0
- package/areas/software/mlops/workflows/champion-challenger.md +29 -0
- package/areas/software/mlops/workflows/deploy-endpoint.md +30 -0
- package/areas/software/mlops/workflows/evaluate-model.md +28 -0
- package/areas/software/mlops/workflows/model-incident.md +29 -0
- package/areas/software/mlops/workflows/train-experiment.md +25 -0
- package/areas/software/mobile/workflows/crash-triage.md +28 -0
- package/areas/software/mobile/workflows/device-testing.md +27 -0
- package/areas/software/mobile/workflows/ota-update.md +25 -0
- package/areas/software/mobile/workflows/release-build.md +30 -0
- package/areas/software/mobile/workflows/store-submission.md +29 -0
- package/areas/software/platform/workflows/cost-audit.md +28 -0
- package/areas/software/platform/workflows/deploy-production.md +30 -0
- package/areas/software/platform/workflows/drift-check.md +29 -0
- package/areas/software/platform/workflows/incident-response.md +33 -0
- package/areas/software/platform/workflows/provision-env.md +36 -0
- package/areas/software/qa/workflows/flakiness-investigation.md +30 -0
- package/areas/software/qa/workflows/performance-audit.md +29 -0
- package/areas/software/qa/workflows/regression-suite.md +28 -0
- package/areas/software/qa/workflows/smoke-test.md +31 -0
- package/areas/software/qa/workflows/test-coverage-report.md +28 -0
- package/areas/software/security/workflows/compliance-report.md +27 -0
- package/areas/software/security/workflows/pen-test-sim.md +28 -0
- package/areas/software/security/workflows/secret-rotation.md +33 -2
- package/areas/software/security/workflows/security-scan.md +29 -0
- package/areas/software/security/workflows/threat-model-review.md +30 -0
- package/docs/agentic-usage.md +1 -1
- package/docs/catalog.schema.json +5 -1
- package/docs/opencode_setup.md +10 -0
- package/docs/site/README.md +15 -1
- package/docs/site/app.js +68 -0
- package/docs/site/catalog.json +74 -1
- package/docs/site/index.html +5 -1
- package/docs/site/styles.css +52 -4
- package/extensions/opencode/opencode.json +0 -1
- package/extensions/opencode/profiles/githubcopilot/opencode.json +1 -2
- package/extensions/opencode/profiles/openai/opencode.json +20 -20
- package/package.json +1 -1
- package/scripts/build_docs_catalog.py +13 -1
- package/scripts/sync_workflow_diagrams.py +199 -0
- package/extensions/opencode/plugins/sound-notification.ts +0 -13
|
@@ -92,5 +92,34 @@ quality-gates:
|
|
|
92
92
|
- Write `provision_report.md`: environment, resources created, cost estimate, next steps
|
|
93
93
|
- **Done when:** report committed; outputs stored
|
|
94
94
|
|
|
95
|
+
## Agent Interaction Diagram
|
|
96
|
+
|
|
97
|
+
<!-- agent-diagram:start -->
|
|
98
|
+
```mermaid
|
|
99
|
+
flowchart TD
|
|
100
|
+
start(["Start /provision-environment"])
|
|
101
|
+
role_1["devops-engineer"]
|
|
102
|
+
role_2["team-lead"]
|
|
103
|
+
step_1["1. Plan & Review"]
|
|
104
|
+
step_2["2. Apply Infrastructure"]
|
|
105
|
+
step_3["3. Configure Nodes (Ansible)"]
|
|
106
|
+
step_4["4. Smoke Tests"]
|
|
107
|
+
step_5["5. Document & Store Outputs"]
|
|
108
|
+
exit(["Terraform apply clean + Ansible 0 failures + smoke tests pass = environment..."])
|
|
109
|
+
start --> step_1
|
|
110
|
+
step_1 --> step_2
|
|
111
|
+
step_2 --> step_3
|
|
112
|
+
step_3 --> step_4
|
|
113
|
+
step_4 --> step_5
|
|
114
|
+
step_5 --> exit
|
|
115
|
+
role_1 -. owns .-> step_1
|
|
116
|
+
role_2 -. owns .-> step_1
|
|
117
|
+
role_1 -. owns .-> step_2
|
|
118
|
+
role_1 -. owns .-> step_3
|
|
119
|
+
role_1 -. owns .-> step_4
|
|
120
|
+
role_1 -. owns .-> step_5
|
|
121
|
+
```
|
|
122
|
+
<!-- agent-diagram:end -->
|
|
123
|
+
|
|
95
124
|
## Exit
|
|
96
125
|
Terraform apply clean + Ansible 0 failures + smoke tests pass = environment provisioned.
|
|
@@ -190,5 +190,41 @@ quality-gates:
|
|
|
190
190
|
- Set up `kube-prometheus-stack` for cluster monitoring
|
|
191
191
|
- **Output:** `bootstrap_report.md` — cluster version, node IPs, installed components, kubeconfig location
|
|
192
192
|
|
|
193
|
+
## Agent Interaction Diagram
|
|
194
|
+
|
|
195
|
+
<!-- agent-diagram:start -->
|
|
196
|
+
```mermaid
|
|
197
|
+
flowchart TD
|
|
198
|
+
start(["Start /cluster-bootstrap"])
|
|
199
|
+
role_1["devops-engineer"]
|
|
200
|
+
step_1["1. Node Pre-Flight"]
|
|
201
|
+
step_2["2. Bootstrap First Control Plane"]
|
|
202
|
+
step_3["3. Install CNI (Cilium)"]
|
|
203
|
+
step_4["4. Join Remaining Control Plane Nodes"]
|
|
204
|
+
step_5["5. Join Worker Nodes"]
|
|
205
|
+
step_6["6. etcd Encryption at Rest"]
|
|
206
|
+
step_7["7. Core Platform Components"]
|
|
207
|
+
step_8["8. Apply Security Baselines"]
|
|
208
|
+
exit(["All nodes Ready + core components Running + etcd encrypted + monitoring liv..."])
|
|
209
|
+
start --> step_1
|
|
210
|
+
step_1 --> step_2
|
|
211
|
+
step_2 --> step_3
|
|
212
|
+
step_3 --> step_4
|
|
213
|
+
step_4 --> step_5
|
|
214
|
+
step_5 --> step_6
|
|
215
|
+
step_6 --> step_7
|
|
216
|
+
step_7 --> step_8
|
|
217
|
+
step_8 --> exit
|
|
218
|
+
role_1 -. owns .-> step_1
|
|
219
|
+
role_1 -. owns .-> step_2
|
|
220
|
+
role_1 -. owns .-> step_3
|
|
221
|
+
role_1 -. owns .-> step_4
|
|
222
|
+
role_1 -. owns .-> step_5
|
|
223
|
+
role_1 -. owns .-> step_6
|
|
224
|
+
role_1 -. owns .-> step_7
|
|
225
|
+
role_1 -. owns .-> step_8
|
|
226
|
+
```
|
|
227
|
+
<!-- agent-diagram:end -->
|
|
228
|
+
|
|
193
229
|
## Exit
|
|
194
230
|
All nodes Ready + core components Running + etcd encrypted + monitoring live = cluster bootstrapped.
|
|
@@ -104,5 +104,34 @@ quality-gates:
|
|
|
104
104
|
- **Output:** `docs/incidents/<date>-<workload>-root-cause.md`
|
|
105
105
|
- **Done when:** document committed; alert/runbook created if pattern is recurring
|
|
106
106
|
|
|
107
|
+
## Agent Interaction Diagram
|
|
108
|
+
|
|
109
|
+
<!-- agent-diagram:start -->
|
|
110
|
+
```mermaid
|
|
111
|
+
flowchart TD
|
|
112
|
+
start(["Start /debug-workload"])
|
|
113
|
+
role_1["devops-engineer"]
|
|
114
|
+
role_2["developer"]
|
|
115
|
+
step_1["1. Classify Symptom"]
|
|
116
|
+
step_2["2. Deep Diagnosis"]
|
|
117
|
+
step_3["3. Apply Fix"]
|
|
118
|
+
step_4["4. Verify & Monitor"]
|
|
119
|
+
step_5["5. Document"]
|
|
120
|
+
exit(["Pod Running + metrics stable + root cause documented = workload debug compl..."])
|
|
121
|
+
start --> step_1
|
|
122
|
+
step_1 --> step_2
|
|
123
|
+
step_2 --> step_3
|
|
124
|
+
step_3 --> step_4
|
|
125
|
+
step_4 --> step_5
|
|
126
|
+
step_5 --> exit
|
|
127
|
+
role_1 -. owns .-> step_1
|
|
128
|
+
role_1 -. owns .-> step_2
|
|
129
|
+
role_2 -. owns .-> step_3
|
|
130
|
+
role_1 -. owns .-> step_3
|
|
131
|
+
role_1 -. owns .-> step_4
|
|
132
|
+
role_1 -. owns .-> step_5
|
|
133
|
+
```
|
|
134
|
+
<!-- agent-diagram:end -->
|
|
135
|
+
|
|
107
136
|
## Exit
|
|
108
137
|
Pod Running + metrics stable + root cause documented = workload debug complete.
|
|
@@ -120,5 +120,40 @@ quality-gates:
|
|
|
120
120
|
- **Output:** metrics visible in Grafana; alerts configured
|
|
121
121
|
- **Done when:** Grafana dashboard shows service metrics
|
|
122
122
|
|
|
123
|
+
## Agent Interaction Diagram
|
|
124
|
+
|
|
125
|
+
<!-- agent-diagram:start -->
|
|
126
|
+
```mermaid
|
|
127
|
+
flowchart TD
|
|
128
|
+
start(["Start /onboard-service"])
|
|
129
|
+
role_1["devops-engineer"]
|
|
130
|
+
role_2["developer"]
|
|
131
|
+
step_1["1. Namespace Setup"]
|
|
132
|
+
step_2["2. RBAC Setup"]
|
|
133
|
+
step_3["3. Network Policies"]
|
|
134
|
+
step_4["4. Helm Chart"]
|
|
135
|
+
step_5["5. ArgoCD Application"]
|
|
136
|
+
step_6["6. Validate & Smoke Test"]
|
|
137
|
+
step_7["7. Monitoring"]
|
|
138
|
+
exit(["Pod Running + health check passing + ArgoCD Healthy + metrics visible = ser..."])
|
|
139
|
+
start --> step_1
|
|
140
|
+
step_1 --> step_2
|
|
141
|
+
step_2 --> step_3
|
|
142
|
+
step_3 --> step_4
|
|
143
|
+
step_4 --> step_5
|
|
144
|
+
step_5 --> step_6
|
|
145
|
+
step_6 --> step_7
|
|
146
|
+
step_7 --> exit
|
|
147
|
+
role_1 -. owns .-> step_1
|
|
148
|
+
role_1 -. owns .-> step_2
|
|
149
|
+
role_1 -. owns .-> step_3
|
|
150
|
+
role_2 -. owns .-> step_4
|
|
151
|
+
role_1 -. owns .-> step_4
|
|
152
|
+
role_1 -. owns .-> step_5
|
|
153
|
+
role_2 -. owns .-> step_6
|
|
154
|
+
role_1 -. owns .-> step_7
|
|
155
|
+
```
|
|
156
|
+
<!-- agent-diagram:end -->
|
|
157
|
+
|
|
123
158
|
## Exit
|
|
124
159
|
Pod Running + health check passing + ArgoCD Healthy + metrics visible = service onboarded.
|
|
@@ -146,6 +146,36 @@ kubectl get pdb -A
|
|
|
146
146
|
- **Output:** `upgrade_report.md` — versions before/after, issues found, time taken
|
|
147
147
|
- **Done when:** all Tier 1 services healthy; no unexpected pod restarts
|
|
148
148
|
|
|
149
|
+
## Agent Interaction Diagram
|
|
150
|
+
|
|
151
|
+
<!-- agent-diagram:start -->
|
|
152
|
+
```mermaid
|
|
153
|
+
flowchart TD
|
|
154
|
+
start(["Start /upgrade-cluster"])
|
|
155
|
+
role_1["devops-engineer"]
|
|
156
|
+
role_2["team-lead"]
|
|
157
|
+
step_1["1. etcd Backup"]
|
|
158
|
+
step_2["2. Upgrade Control Plane (kubeadm)"]
|
|
159
|
+
step_3["3. Validate Control Plane"]
|
|
160
|
+
step_4["4. Upgrade Worker Nodes (rolling)"]
|
|
161
|
+
step_5["5. Post-Upgrade Validation"]
|
|
162
|
+
exit(["All nodes on target version + Tier 1 services healthy + upgrade report comm..."])
|
|
163
|
+
start --> step_1
|
|
164
|
+
step_1 --> step_2
|
|
165
|
+
step_2 --> step_3
|
|
166
|
+
step_3 --> step_4
|
|
167
|
+
step_4 --> step_5
|
|
168
|
+
step_5 --> exit
|
|
169
|
+
role_1 -. owns .-> step_1
|
|
170
|
+
role_1 -. owns .-> step_2
|
|
171
|
+
role_1 -. owns .-> step_3
|
|
172
|
+
role_1 -. owns .-> step_4
|
|
173
|
+
role_1 -. owns .-> step_5
|
|
174
|
+
role_2 -. owns .-> step_5
|
|
175
|
+
step_5 -. iterate if blocked .-> step_1
|
|
176
|
+
```
|
|
177
|
+
<!-- agent-diagram:end -->
|
|
178
|
+
|
|
149
179
|
## Rollback Plan
|
|
150
180
|
|
|
151
181
|
```bash
|
|
@@ -60,5 +60,32 @@ done | sort | uniq -c
|
|
|
60
60
|
- Point hostname to MetalLB external IP: `kubectl get svc -n ingress-nginx`
|
|
61
61
|
- Add A record in DNS provider or internal CoreDNS
|
|
62
62
|
|
|
63
|
+
## Agent Interaction Diagram
|
|
64
|
+
|
|
65
|
+
<!-- agent-diagram:start -->
|
|
66
|
+
```mermaid
|
|
67
|
+
flowchart TD
|
|
68
|
+
start(["Start /onboard-ingress"])
|
|
69
|
+
role_1["devops-engineer"]
|
|
70
|
+
step_1["1. Write Ingress Manifest"]
|
|
71
|
+
step_2["2. Apply & Wait for Certificate"]
|
|
72
|
+
step_3["3. Verify HTTPS"]
|
|
73
|
+
step_4["4. Verify Rate Limiting"]
|
|
74
|
+
step_5["5. DNS (if needed)"]
|
|
75
|
+
exit(["HTTPS accessible + cert issued + security headers present + rate limit veri..."])
|
|
76
|
+
start --> step_1
|
|
77
|
+
step_1 --> step_2
|
|
78
|
+
step_2 --> step_3
|
|
79
|
+
step_3 --> step_4
|
|
80
|
+
step_4 --> step_5
|
|
81
|
+
step_5 --> exit
|
|
82
|
+
role_1 -. owns .-> step_1
|
|
83
|
+
role_1 -. owns .-> step_2
|
|
84
|
+
role_1 -. owns .-> step_3
|
|
85
|
+
role_1 -. owns .-> step_4
|
|
86
|
+
role_1 -. owns .-> step_5
|
|
87
|
+
```
|
|
88
|
+
<!-- agent-diagram:end -->
|
|
89
|
+
|
|
63
90
|
## Exit
|
|
64
91
|
HTTPS accessible + cert issued + security headers present + rate limit verified = ingress onboarded.
|
|
@@ -118,5 +118,32 @@ kubectl -n istio-system port-forward svc/kiali 20001:20001 &
|
|
|
118
118
|
```
|
|
119
119
|
- **Done when:** service visible in mesh dashboard; no unmeshed traffic warnings
|
|
120
120
|
|
|
121
|
+
## Agent Interaction Diagram
|
|
122
|
+
|
|
123
|
+
<!-- agent-diagram:start -->
|
|
124
|
+
```mermaid
|
|
125
|
+
flowchart TD
|
|
126
|
+
start(["Start /service-mesh-onboard"])
|
|
127
|
+
role_1["devops-engineer"]
|
|
128
|
+
step_1["1. Pre-Check Mesh Health"]
|
|
129
|
+
step_2["2. Enable Injection"]
|
|
130
|
+
step_3["3. Verify mTLS"]
|
|
131
|
+
step_4["4. Apply Traffic Policies"]
|
|
132
|
+
step_5["5. Validate in Mesh Dashboard"]
|
|
133
|
+
exit(["Sidecar injected + mTLS verified + policies applied + dashboard shows servi..."])
|
|
134
|
+
start --> step_1
|
|
135
|
+
step_1 --> step_2
|
|
136
|
+
step_2 --> step_3
|
|
137
|
+
step_3 --> step_4
|
|
138
|
+
step_4 --> step_5
|
|
139
|
+
step_5 --> exit
|
|
140
|
+
role_1 -. owns .-> step_1
|
|
141
|
+
role_1 -. owns .-> step_2
|
|
142
|
+
role_1 -. owns .-> step_3
|
|
143
|
+
role_1 -. owns .-> step_4
|
|
144
|
+
role_1 -. owns .-> step_5
|
|
145
|
+
```
|
|
146
|
+
<!-- agent-diagram:end -->
|
|
147
|
+
|
|
121
148
|
## Exit
|
|
122
149
|
Sidecar injected + mTLS verified + policies applied + dashboard shows service = onboarded.
|
|
@@ -94,5 +94,34 @@ Saturation?
|
|
|
94
94
|
- Is the alert threshold correct? (too sensitive = toil; too loose = misses real issues)
|
|
95
95
|
- Create ticket if: runbook needs update, threshold needs tuning, or root cause needs a code fix
|
|
96
96
|
|
|
97
|
+
## Agent Interaction Diagram
|
|
98
|
+
|
|
99
|
+
<!-- agent-diagram:start -->
|
|
100
|
+
```mermaid
|
|
101
|
+
flowchart TD
|
|
102
|
+
start(["Start /alert-investigation"])
|
|
103
|
+
role_1["devops-engineer"]
|
|
104
|
+
role_2["developer"]
|
|
105
|
+
step_1["1. Acknowledge & Classify"]
|
|
106
|
+
step_2["2. Correlate Signals"]
|
|
107
|
+
step_3["3. Identify Root Cause"]
|
|
108
|
+
step_4["4. Mitigate"]
|
|
109
|
+
step_5["5. Post-Investigation Notes"]
|
|
110
|
+
exit(["Alert resolved or escalated + root cause noted + runbook quality assessed =..."])
|
|
111
|
+
start --> step_1
|
|
112
|
+
step_1 --> step_2
|
|
113
|
+
step_2 --> step_3
|
|
114
|
+
step_3 --> step_4
|
|
115
|
+
step_4 --> step_5
|
|
116
|
+
step_5 --> exit
|
|
117
|
+
role_1 -. owns .-> step_1
|
|
118
|
+
role_1 -. owns .-> step_2
|
|
119
|
+
role_1 -. owns .-> step_3
|
|
120
|
+
role_2 -. owns .-> step_3
|
|
121
|
+
role_1 -. owns .-> step_4
|
|
122
|
+
role_1 -. owns .-> step_5
|
|
123
|
+
```
|
|
124
|
+
<!-- agent-diagram:end -->
|
|
125
|
+
|
|
97
126
|
## Exit
|
|
98
127
|
Alert resolved or escalated + root cause noted + runbook quality assessed = investigation complete.
|
|
@@ -152,5 +152,38 @@ for dashboard in infra/observability/dashboards/*.json; do
|
|
|
152
152
|
done
|
|
153
153
|
```
|
|
154
154
|
|
|
155
|
+
## Agent Interaction Diagram
|
|
156
|
+
|
|
157
|
+
<!-- agent-diagram:start -->
|
|
158
|
+
```mermaid
|
|
159
|
+
flowchart TD
|
|
160
|
+
start(["Start /observability-stack-setup"])
|
|
161
|
+
role_1["devops-engineer"]
|
|
162
|
+
step_1["1. Namespace & Prerequisites"]
|
|
163
|
+
step_2["2. kube-prometheus-stack (Prometheus + Grafana + Alertmanager)"]
|
|
164
|
+
step_3["3. Loki + Promtail (Logs)"]
|
|
165
|
+
step_4["4. Tempo (Traces)"]
|
|
166
|
+
step_5["5. OpenTelemetry Collector (DaemonSet)"]
|
|
167
|
+
step_6["6. Validate Stack"]
|
|
168
|
+
step_7["7. Import Dashboards"]
|
|
169
|
+
exit(["All 4 components healthy + test alert fired + dashboards showing data = sta..."])
|
|
170
|
+
start --> step_1
|
|
171
|
+
step_1 --> step_2
|
|
172
|
+
step_2 --> step_3
|
|
173
|
+
step_3 --> step_4
|
|
174
|
+
step_4 --> step_5
|
|
175
|
+
step_5 --> step_6
|
|
176
|
+
step_6 --> step_7
|
|
177
|
+
step_7 --> exit
|
|
178
|
+
role_1 -. owns .-> step_1
|
|
179
|
+
role_1 -. owns .-> step_2
|
|
180
|
+
role_1 -. owns .-> step_3
|
|
181
|
+
role_1 -. owns .-> step_4
|
|
182
|
+
role_1 -. owns .-> step_5
|
|
183
|
+
role_1 -. owns .-> step_6
|
|
184
|
+
role_1 -. owns .-> step_7
|
|
185
|
+
```
|
|
186
|
+
<!-- agent-diagram:end -->
|
|
187
|
+
|
|
155
188
|
## Exit
|
|
156
189
|
All 4 components healthy + test alert fired + dashboards showing data = stack deployed.
|
|
@@ -79,5 +79,36 @@ spec:
|
|
|
79
79
|
- Link trace panel (Tempo datasource) to request duration panel
|
|
80
80
|
- **Done when:** dashboard saved in `infra/dashboards/`; Grafana shows live data
|
|
81
81
|
|
|
82
|
+
## Agent Interaction Diagram
|
|
83
|
+
|
|
84
|
+
<!-- agent-diagram:start -->
|
|
85
|
+
```mermaid
|
|
86
|
+
flowchart TD
|
|
87
|
+
start(["Start /onboard-service-monitoring"])
|
|
88
|
+
role_1["developer"]
|
|
89
|
+
role_2["devops-engineer"]
|
|
90
|
+
step_1["1. Metrics Instrumentation"]
|
|
91
|
+
step_2["2. ServiceMonitor"]
|
|
92
|
+
step_3["3. Tracing Instrumentation"]
|
|
93
|
+
step_4["4. Log Labels"]
|
|
94
|
+
step_5["5. Alert Rules"]
|
|
95
|
+
step_6["6. Grafana Dashboard"]
|
|
96
|
+
exit(["Golden signals in Prometheus + logs in Loki + traces in Tempo + alerts depl..."])
|
|
97
|
+
start --> step_1
|
|
98
|
+
step_1 --> step_2
|
|
99
|
+
step_2 --> step_3
|
|
100
|
+
step_3 --> step_4
|
|
101
|
+
step_4 --> step_5
|
|
102
|
+
step_5 --> step_6
|
|
103
|
+
step_6 --> exit
|
|
104
|
+
role_1 -. owns .-> step_1
|
|
105
|
+
role_2 -. owns .-> step_2
|
|
106
|
+
role_1 -. owns .-> step_3
|
|
107
|
+
role_2 -. owns .-> step_4
|
|
108
|
+
role_2 -. owns .-> step_5
|
|
109
|
+
role_2 -. owns .-> step_6
|
|
110
|
+
```
|
|
111
|
+
<!-- agent-diagram:end -->
|
|
112
|
+
|
|
82
113
|
## Exit
|
|
83
114
|
Golden signals in Prometheus + logs in Loki + traces in Tempo + alerts deployed + dashboard live = service monitored.
|
|
@@ -62,5 +62,53 @@ quality-gates:
|
|
|
62
62
|
- Define action items with owners and due dates
|
|
63
63
|
- Publish to team wiki; announce in #postmortems
|
|
64
64
|
|
|
65
|
+
## Agent Interaction Diagram
|
|
66
|
+
|
|
67
|
+
<!-- agent-diagram:start -->
|
|
68
|
+
```mermaid
|
|
69
|
+
flowchart TD
|
|
70
|
+
start(["Start /incident-response"])
|
|
71
|
+
role_1["devops-engineer"]
|
|
72
|
+
role_2["developer"]
|
|
73
|
+
role_3["pm"]
|
|
74
|
+
role_4["devops-engineer (IC)"]
|
|
75
|
+
role_5["developer (technical lead)"]
|
|
76
|
+
role_6["pm (comms)"]
|
|
77
|
+
step_1["T+0–5: Acknowledge & Scope"]
|
|
78
|
+
step_2["T+5–15: Mitigate"]
|
|
79
|
+
step_3["T+10: Communicate"]
|
|
80
|
+
step_4["T+15–30: Stabilize"]
|
|
81
|
+
step_5["T+30: Resolve or Escalate"]
|
|
82
|
+
step_6["T+60: Preliminary Postmortem"]
|
|
83
|
+
step_7["T+24h: Full Postmortem"]
|
|
84
|
+
exit(["Service healthy + stakeholders informed + postmortem published = incident c..."])
|
|
85
|
+
start --> step_1
|
|
86
|
+
step_1 --> step_2
|
|
87
|
+
step_2 --> step_3
|
|
88
|
+
step_3 --> step_4
|
|
89
|
+
step_4 --> step_5
|
|
90
|
+
step_5 --> step_6
|
|
91
|
+
step_6 --> step_7
|
|
92
|
+
step_7 --> exit
|
|
93
|
+
role_1 -. owns .-> step_1
|
|
94
|
+
role_2 -. owns .-> step_2
|
|
95
|
+
role_1 -. owns .-> step_2
|
|
96
|
+
role_3 -. owns .-> step_3
|
|
97
|
+
role_1 -. owns .-> step_4
|
|
98
|
+
role_2 -. owns .-> step_5
|
|
99
|
+
role_4 -. owns .-> step_5
|
|
100
|
+
role_5 -. owns .-> step_5
|
|
101
|
+
role_6 -. owns .-> step_5
|
|
102
|
+
role_2 -. owns .-> step_6
|
|
103
|
+
role_4 -. owns .-> step_6
|
|
104
|
+
role_5 -. owns .-> step_6
|
|
105
|
+
role_6 -. owns .-> step_6
|
|
106
|
+
role_2 -. owns .-> step_7
|
|
107
|
+
role_4 -. owns .-> step_7
|
|
108
|
+
role_5 -. owns .-> step_7
|
|
109
|
+
role_6 -. owns .-> step_7
|
|
110
|
+
```
|
|
111
|
+
<!-- agent-diagram:end -->
|
|
112
|
+
|
|
65
113
|
## Exit
|
|
66
114
|
Service healthy + stakeholders informed + postmortem published = incident closed.
|
|
@@ -86,5 +86,37 @@ done
|
|
|
86
86
|
- Any blocked items? Need resource allocation?
|
|
87
87
|
- If root cause not addressed: escalate to engineering lead
|
|
88
88
|
|
|
89
|
+
## Agent Interaction Diagram
|
|
90
|
+
|
|
91
|
+
<!-- agent-diagram:start -->
|
|
92
|
+
```mermaid
|
|
93
|
+
flowchart TD
|
|
94
|
+
start(["Start /postmortem"])
|
|
95
|
+
role_1["devops-engineer"]
|
|
96
|
+
role_2["team-lead"]
|
|
97
|
+
step_1["1. Collect Data (within 2h of resolution)"]
|
|
98
|
+
step_2["2. Draft Postmortem"]
|
|
99
|
+
step_3["3. 5-Whys Facilitation Meeting (within 48h)"]
|
|
100
|
+
step_4["4. Finalize Document"]
|
|
101
|
+
step_5["5. Publish & Track"]
|
|
102
|
+
step_6["6. Follow-Up (2 weeks later)"]
|
|
103
|
+
exit(["Postmortem published + all action items in tracker + team notified = postmo..."])
|
|
104
|
+
start --> step_1
|
|
105
|
+
step_1 --> step_2
|
|
106
|
+
step_2 --> step_3
|
|
107
|
+
step_3 --> step_4
|
|
108
|
+
step_4 --> step_5
|
|
109
|
+
step_5 --> step_6
|
|
110
|
+
step_6 --> exit
|
|
111
|
+
role_1 -. owns .-> step_1
|
|
112
|
+
role_1 -. owns .-> step_2
|
|
113
|
+
role_1 -. owns .-> step_3
|
|
114
|
+
role_1 -. owns .-> step_4
|
|
115
|
+
role_1 -. owns .-> step_5
|
|
116
|
+
role_2 -. owns .-> step_5
|
|
117
|
+
role_2 -. owns .-> step_6
|
|
118
|
+
```
|
|
119
|
+
<!-- agent-diagram:end -->
|
|
120
|
+
|
|
89
121
|
## Exit
|
|
90
122
|
Postmortem published + all action items in tracker + team notified = postmortem complete.
|
|
@@ -21,7 +21,6 @@ related-rules:
|
|
|
21
21
|
- error-budget-policy.md
|
|
22
22
|
uses-skills:
|
|
23
23
|
- slo-sli-design
|
|
24
|
-
- slo-implementation
|
|
25
24
|
- capacity-planning
|
|
26
25
|
quality-gates:
|
|
27
26
|
- SLO targets grounded in actual reliability data (not aspirational)
|
|
@@ -91,5 +90,40 @@ kubectl apply -f rules/slo-${SERVICE}-generated.yaml -n monitoring
|
|
|
91
90
|
- notify: tighten SLO to 99.3%; generates meaningful error budget
|
|
92
91
|
```
|
|
93
92
|
|
|
93
|
+
## Agent Interaction Diagram
|
|
94
|
+
|
|
95
|
+
<!-- agent-diagram:start -->
|
|
96
|
+
```mermaid
|
|
97
|
+
flowchart TD
|
|
98
|
+
start(["Start /slo-review"])
|
|
99
|
+
role_1["devops-engineer"]
|
|
100
|
+
role_2["team-lead"]
|
|
101
|
+
role_3["product-owner"]
|
|
102
|
+
step_1["1. Pull Reliability Data"]
|
|
103
|
+
step_2["2. Classify Services"]
|
|
104
|
+
step_3["3. SLO Adjustment Workshop"]
|
|
105
|
+
step_4["4. Update SLO Definitions"]
|
|
106
|
+
step_5["5. Error Budget Policy Review"]
|
|
107
|
+
step_6["6. Publish SLO Review Report"]
|
|
108
|
+
exit(["Report published + SLO changes applied + action items in tracker = review c..."])
|
|
109
|
+
start --> step_1
|
|
110
|
+
step_1 --> step_2
|
|
111
|
+
step_2 --> step_3
|
|
112
|
+
step_3 --> step_4
|
|
113
|
+
step_4 --> step_5
|
|
114
|
+
step_5 --> step_6
|
|
115
|
+
step_6 --> exit
|
|
116
|
+
role_1 -. owns .-> step_1
|
|
117
|
+
role_1 -. owns .-> step_2
|
|
118
|
+
role_1 -. owns .-> step_3
|
|
119
|
+
role_2 -. owns .-> step_3
|
|
120
|
+
role_3 -. owns .-> step_3
|
|
121
|
+
role_1 -. owns .-> step_4
|
|
122
|
+
role_2 -. owns .-> step_5
|
|
123
|
+
role_3 -. owns .-> step_5
|
|
124
|
+
role_1 -. owns .-> step_6
|
|
125
|
+
```
|
|
126
|
+
<!-- agent-diagram:end -->
|
|
127
|
+
|
|
94
128
|
## Exit
|
|
95
129
|
Report published + SLO changes applied + action items in tracker = review complete.
|
|
@@ -72,6 +72,39 @@ quality-gates:
|
|
|
72
72
|
- **Output:** `migration_readiness.md` with deployment steps, rollback command, monitoring checklist
|
|
73
73
|
- **Done when:** ops/release team has everything needed to deploy safely
|
|
74
74
|
|
|
75
|
+
## Agent Interaction Diagram
|
|
76
|
+
|
|
77
|
+
<!-- agent-diagram:start -->
|
|
78
|
+
```mermaid
|
|
79
|
+
flowchart TD
|
|
80
|
+
start(["Start /add-migration"])
|
|
81
|
+
role_1["team-lead"]
|
|
82
|
+
role_2["developer"]
|
|
83
|
+
role_3["qa"]
|
|
84
|
+
role_4["pm"]
|
|
85
|
+
step_1["1. Risk & Compatibility Analysis"]
|
|
86
|
+
step_2["2. Migration Implementation"]
|
|
87
|
+
step_3["3. Test DB Validation & Data Checks"]
|
|
88
|
+
step_4["4. Review & Remediation Loop"]
|
|
89
|
+
step_5["5. Readiness Report"]
|
|
90
|
+
exit(["Validated migration + readiness report + @team-lead approval = ready to dep..."])
|
|
91
|
+
start --> step_1
|
|
92
|
+
step_1 --> step_2
|
|
93
|
+
step_2 --> step_3
|
|
94
|
+
step_3 --> step_4
|
|
95
|
+
step_4 --> step_5
|
|
96
|
+
step_5 --> exit
|
|
97
|
+
role_1 -. owns .-> step_1
|
|
98
|
+
role_2 -. owns .-> step_2
|
|
99
|
+
role_3 -. owns .-> step_3
|
|
100
|
+
role_1 -. owns .-> step_4
|
|
101
|
+
role_2 -. owns .-> step_4
|
|
102
|
+
role_4 -. owns .-> step_5
|
|
103
|
+
role_1 -. owns .-> step_5
|
|
104
|
+
step_5 -. iterate if blocked .-> step_1
|
|
105
|
+
```
|
|
106
|
+
<!-- agent-diagram:end -->
|
|
107
|
+
|
|
75
108
|
## Iteration Loop
|
|
76
109
|
If validation reveals data issues or compatibility risks → return to Step 1 for strategy revision.
|
|
77
110
|
|
|
@@ -85,5 +85,45 @@ quality-gates:
|
|
|
85
85
|
- **Output:** endpoint accepted; delivery note in `docs/<feature>/delivery_summary.md`
|
|
86
86
|
- **Done when:** `@product-owner` signs off
|
|
87
87
|
|
|
88
|
+
## Agent Interaction Diagram
|
|
89
|
+
|
|
90
|
+
<!-- agent-diagram:start -->
|
|
91
|
+
```mermaid
|
|
92
|
+
flowchart TD
|
|
93
|
+
start(["Start /create-endpoint"])
|
|
94
|
+
role_1["product-owner"]
|
|
95
|
+
role_2["pm"]
|
|
96
|
+
role_3["team-lead"]
|
|
97
|
+
role_4["developer"]
|
|
98
|
+
role_5["qa"]
|
|
99
|
+
step_1["1. Scope & Contract"]
|
|
100
|
+
step_2["2. Architecture Review"]
|
|
101
|
+
step_3["3. Implementation"]
|
|
102
|
+
step_4["4. Test Design & Execution"]
|
|
103
|
+
step_5["5. Code Review & Sign-off"]
|
|
104
|
+
step_6["6. Fix / Retest Loop"]
|
|
105
|
+
step_7["7. Acceptance"]
|
|
106
|
+
exit(["Accepted endpoint + passing tests + @team-lead sign-off = ready to merge."])
|
|
107
|
+
start --> step_1
|
|
108
|
+
step_1 --> step_2
|
|
109
|
+
step_2 --> step_3
|
|
110
|
+
step_3 --> step_4
|
|
111
|
+
step_4 --> step_5
|
|
112
|
+
step_5 --> step_6
|
|
113
|
+
step_6 --> step_7
|
|
114
|
+
step_7 --> exit
|
|
115
|
+
role_1 -. owns .-> step_1
|
|
116
|
+
role_2 -. owns .-> step_1
|
|
117
|
+
role_3 -. owns .-> step_2
|
|
118
|
+
role_4 -. owns .-> step_3
|
|
119
|
+
role_5 -. owns .-> step_4
|
|
120
|
+
role_3 -. owns .-> step_5
|
|
121
|
+
role_4 -. owns .-> step_6
|
|
122
|
+
role_5 -. owns .-> step_6
|
|
123
|
+
role_1 -. owns .-> step_7
|
|
124
|
+
role_2 -. owns .-> step_7
|
|
125
|
+
```
|
|
126
|
+
<!-- agent-diagram:end -->
|
|
127
|
+
|
|
88
128
|
## Exit
|
|
89
129
|
Accepted endpoint + passing tests + `@team-lead` sign-off = ready to merge.
|
|
@@ -73,5 +73,36 @@ quality-gates:
|
|
|
73
73
|
- **Output:** `root_cause_summary.md` — what failed, why, how fixed, how to prevent
|
|
74
74
|
- **Done when:** `@team-lead` approves fix; root cause documented; ticket closed
|
|
75
75
|
|
|
76
|
+
## Agent Interaction Diagram
|
|
77
|
+
|
|
78
|
+
<!-- agent-diagram:start -->
|
|
79
|
+
```mermaid
|
|
80
|
+
flowchart TD
|
|
81
|
+
start(["Start /debug-issue"])
|
|
82
|
+
role_1["pm"]
|
|
83
|
+
role_2["team-lead"]
|
|
84
|
+
role_3["developer"]
|
|
85
|
+
role_4["qa"]
|
|
86
|
+
step_1["1. Triage & Impact Classification"]
|
|
87
|
+
step_2["2. Reproduce & Isolate"]
|
|
88
|
+
step_3["3. Fix Implementation"]
|
|
89
|
+
step_4["4. Verification & Regression Checks"]
|
|
90
|
+
step_5["5. Technical Review & Closure"]
|
|
91
|
+
exit(["Merged fix + verified resolution + root cause documented = incident closed."])
|
|
92
|
+
start --> step_1
|
|
93
|
+
step_1 --> step_2
|
|
94
|
+
step_2 --> step_3
|
|
95
|
+
step_3 --> step_4
|
|
96
|
+
step_4 --> step_5
|
|
97
|
+
step_5 --> exit
|
|
98
|
+
role_1 -. owns .-> step_1
|
|
99
|
+
role_2 -. owns .-> step_1
|
|
100
|
+
role_3 -. owns .-> step_2
|
|
101
|
+
role_3 -. owns .-> step_3
|
|
102
|
+
role_4 -. owns .-> step_4
|
|
103
|
+
role_2 -. owns .-> step_5
|
|
104
|
+
```
|
|
105
|
+
<!-- agent-diagram:end -->
|
|
106
|
+
|
|
76
107
|
## Exit
|
|
77
108
|
Merged fix + verified resolution + root cause documented = incident closed.
|
|
@@ -71,6 +71,43 @@ quality-gates:
|
|
|
71
71
|
- **Output:** `docs/<epic>/delivery_summary.md` — accepted items, deferred items, follow-up backlog
|
|
72
72
|
- **Done when:** epic accepted; follow-up items logged
|
|
73
73
|
|
|
74
|
+
## Agent Interaction Diagram
|
|
75
|
+
|
|
76
|
+
<!-- agent-diagram:start -->
|
|
77
|
+
```mermaid
|
|
78
|
+
flowchart TD
|
|
79
|
+
start(["Start /develop-epic"])
|
|
80
|
+
role_1["product-owner"]
|
|
81
|
+
role_2["pm"]
|
|
82
|
+
role_3["team-lead"]
|
|
83
|
+
role_4["developer"]
|
|
84
|
+
role_5["qa"]
|
|
85
|
+
step_1["1. Epic Decomposition & Milestone Planning"]
|
|
86
|
+
step_2["2. Architecture Runway Definition"]
|
|
87
|
+
step_3["3. Increment Implementation"]
|
|
88
|
+
step_4["4. Increment Verification"]
|
|
89
|
+
step_5["5. Milestone Review & Replanning"]
|
|
90
|
+
step_6["6. Final Acceptance"]
|
|
91
|
+
exit(["All increments accepted by @product-owner + clean regression suite = epic d..."])
|
|
92
|
+
start --> step_1
|
|
93
|
+
step_1 --> step_2
|
|
94
|
+
step_2 --> step_3
|
|
95
|
+
step_3 --> step_4
|
|
96
|
+
step_4 --> step_5
|
|
97
|
+
step_5 --> step_6
|
|
98
|
+
step_6 --> exit
|
|
99
|
+
role_1 -. owns .-> step_1
|
|
100
|
+
role_2 -. owns .-> step_1
|
|
101
|
+
role_3 -. owns .-> step_2
|
|
102
|
+
role_4 -. owns .-> step_3
|
|
103
|
+
role_5 -. owns .-> step_4
|
|
104
|
+
role_2 -. owns .-> step_5
|
|
105
|
+
role_3 -. owns .-> step_5
|
|
106
|
+
role_1 -. owns .-> step_6
|
|
107
|
+
step_6 -. iterate if blocked .-> step_1
|
|
108
|
+
```
|
|
109
|
+
<!-- agent-diagram:end -->
|
|
110
|
+
|
|
74
111
|
## Iteration Loop
|
|
75
112
|
Steps 3–5 repeat for each increment. Replanning in Step 5 governs scope adjustments.
|
|
76
113
|
|