@jetrabbits/agentic 0.3.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +8 -0
- package/CHANGELOG.md +18 -0
- package/Makefile +26 -5
- package/README.md +25 -6
- package/agentic +801 -66
- package/areas/devops/ci-cd/workflows/onboard-repo.md +29 -0
- package/areas/devops/ci-cd/workflows/pipeline-debug.md +26 -0
- package/areas/devops/ci-cd/workflows/release-pipeline.md +53 -0
- package/areas/devops/database-ops/workflows/backup-verify.md +27 -0
- package/areas/devops/database-ops/workflows/db-incident.md +30 -0
- package/areas/devops/devsecops/workflows/policy-onboard.md +34 -0
- package/areas/devops/devsecops/workflows/security-scan-pipeline.md +33 -0
- package/areas/devops/infrastructure/workflows/destroy-environment.md +31 -0
- package/areas/devops/infrastructure/workflows/drift-remediation.md +29 -0
- package/areas/devops/infrastructure/workflows/module-development.md +32 -0
- package/areas/devops/infrastructure/workflows/provision-environment.md +29 -0
- package/areas/devops/kubernetes/workflows/cluster-bootstrap.md +36 -0
- package/areas/devops/kubernetes/workflows/debug-workload.md +29 -0
- package/areas/devops/kubernetes/workflows/onboard-service.md +35 -0
- package/areas/devops/kubernetes/workflows/upgrade-cluster.md +30 -0
- package/areas/devops/networking/workflows/onboard-ingress.md +27 -0
- package/areas/devops/networking/workflows/service-mesh-onboard.md +27 -0
- package/areas/devops/observability/workflows/alert-investigation.md +29 -0
- package/areas/devops/observability/workflows/observability-stack-setup.md +33 -0
- package/areas/devops/observability/workflows/onboard-service-monitoring.md +31 -0
- package/areas/devops/sre/workflows/incident-response.md +48 -0
- package/areas/devops/sre/workflows/postmortem.md +32 -0
- package/areas/devops/sre/workflows/slo-review.md +35 -1
- package/areas/software/backend/workflows/add-migration.md +33 -0
- package/areas/software/backend/workflows/create-endpoint.md +40 -0
- package/areas/software/backend/workflows/debug-issue.md +31 -0
- package/areas/software/backend/workflows/develop-epic.md +37 -0
- package/areas/software/backend/workflows/develop-feature.md +44 -0
- package/areas/software/backend/workflows/refactor-module.md +35 -0
- package/areas/software/backend/workflows/test-feature.md +30 -0
- package/areas/software/data-engineering/workflows/backfill-data.md +25 -0
- package/areas/software/data-engineering/workflows/data-quality-incident.md +31 -0
- package/areas/software/data-engineering/workflows/lineage-trace.md +25 -0
- package/areas/software/data-engineering/workflows/new-model.md +30 -0
- package/areas/software/data-engineering/workflows/schema-migration.md +29 -0
- package/areas/software/frontend/workflows/a11y-fix.md +30 -0
- package/areas/software/frontend/workflows/bundle-analyze.md +28 -0
- package/areas/software/frontend/workflows/release-prep.md +33 -0
- package/areas/software/frontend/workflows/scaffold-component.md +32 -0
- package/areas/software/frontend/workflows/visual-regression.md +32 -0
- package/areas/software/full-stack/workflows/backend-project-full-cycle.md +47 -2
- package/areas/software/full-stack/workflows/debug-issue.md +29 -0
- package/areas/software/full-stack/workflows/develop-feature.md +38 -0
- package/areas/software/full-stack/workflows/feature-implementation-flow.md +38 -0
- package/areas/software/full-stack/workflows/testing-ci-pipeline.md +30 -0
- package/areas/software/general/workflows/code-review-workflow.md +31 -0
- package/areas/software/general/workflows/development-cycle-workflow.md +38 -0
- package/areas/software/general/workflows/project-setup-workflow.md +38 -0
- package/areas/software/mlops/workflows/champion-challenger.md +29 -0
- package/areas/software/mlops/workflows/deploy-endpoint.md +30 -0
- package/areas/software/mlops/workflows/evaluate-model.md +28 -0
- package/areas/software/mlops/workflows/model-incident.md +29 -0
- package/areas/software/mlops/workflows/train-experiment.md +25 -0
- package/areas/software/mobile/workflows/crash-triage.md +28 -0
- package/areas/software/mobile/workflows/device-testing.md +27 -0
- package/areas/software/mobile/workflows/ota-update.md +25 -0
- package/areas/software/mobile/workflows/release-build.md +30 -0
- package/areas/software/mobile/workflows/store-submission.md +29 -0
- package/areas/software/platform/workflows/cost-audit.md +28 -0
- package/areas/software/platform/workflows/deploy-production.md +30 -0
- package/areas/software/platform/workflows/drift-check.md +29 -0
- package/areas/software/platform/workflows/incident-response.md +33 -0
- package/areas/software/platform/workflows/provision-env.md +36 -0
- package/areas/software/qa/workflows/flakiness-investigation.md +30 -0
- package/areas/software/qa/workflows/performance-audit.md +29 -0
- package/areas/software/qa/workflows/regression-suite.md +28 -0
- package/areas/software/qa/workflows/smoke-test.md +31 -0
- package/areas/software/qa/workflows/test-coverage-report.md +28 -0
- package/areas/software/security/workflows/compliance-report.md +27 -0
- package/areas/software/security/workflows/pen-test-sim.md +28 -0
- package/areas/software/security/workflows/secret-rotation.md +33 -2
- package/areas/software/security/workflows/security-scan.md +29 -0
- package/areas/software/security/workflows/threat-model-review.md +30 -0
- package/docs/agentic-usage.md +19 -2
- package/docs/catalog.schema.json +5 -1
- package/docs/mcp/README.md +28 -0
- package/docs/opencode_setup.md +21 -1
- package/docs/site/README.md +15 -1
- package/docs/site/app.js +68 -0
- package/docs/site/catalog.json +74 -1
- package/docs/site/index.html +5 -1
- package/docs/site/styles.css +52 -4
- package/extensions/opencode/opencode.json +0 -1
- package/extensions/opencode/profiles/githubcopilot/opencode.json +87 -0
- package/extensions/opencode/profiles/openai/opencode.json +100 -0
- package/package.json +1 -1
- package/scripts/build_docs_catalog.py +13 -1
- package/scripts/sync_workflow_diagrams.py +199 -0
- package/extensions/opencode/plugins/sound-notification.ts +0 -13
|
@@ -69,5 +69,34 @@ quality-gates:
|
|
|
69
69
|
- Write `docs/ci-cd.md`: stages, how to run locally, how to add a new secret
|
|
70
70
|
- **Done when:** documentation committed
|
|
71
71
|
|
|
72
|
+
## Agent Interaction Diagram
|
|
73
|
+
|
|
74
|
+
<!-- agent-diagram:start -->
|
|
75
|
+
```mermaid
|
|
76
|
+
flowchart TD
|
|
77
|
+
start(["Start /onboard-repo"])
|
|
78
|
+
role_1["devops-engineer"]
|
|
79
|
+
role_2["developer"]
|
|
80
|
+
step_1["1. Assess & Plan"]
|
|
81
|
+
step_2["2. Secrets & Environments Setup"]
|
|
82
|
+
step_3["3. Write Pipeline Config"]
|
|
83
|
+
step_4["4. First Run & Debug"]
|
|
84
|
+
step_5["5. Document"]
|
|
85
|
+
exit(["Green pipeline + staging deploy + documentation = repo onboarded."])
|
|
86
|
+
start --> step_1
|
|
87
|
+
step_1 --> step_2
|
|
88
|
+
step_2 --> step_3
|
|
89
|
+
step_3 --> step_4
|
|
90
|
+
step_4 --> step_5
|
|
91
|
+
step_5 --> exit
|
|
92
|
+
role_1 -. owns .-> step_1
|
|
93
|
+
role_1 -. owns .-> step_2
|
|
94
|
+
role_1 -. owns .-> step_3
|
|
95
|
+
role_1 -. owns .-> step_4
|
|
96
|
+
role_2 -. owns .-> step_4
|
|
97
|
+
role_1 -. owns .-> step_5
|
|
98
|
+
```
|
|
99
|
+
<!-- agent-diagram:end -->
|
|
100
|
+
|
|
72
101
|
## Exit
|
|
73
102
|
Green pipeline + staging deploy + documentation = repo onboarded.
|
|
@@ -62,5 +62,31 @@ quality-gates:
|
|
|
62
62
|
- Merge fix; confirm pipeline green on main
|
|
63
63
|
- If flaky test: add to quarantine list; file follow-up ticket with `flaky-test` label
|
|
64
64
|
|
|
65
|
+
## Agent Interaction Diagram
|
|
66
|
+
|
|
67
|
+
<!-- agent-diagram:start -->
|
|
68
|
+
```mermaid
|
|
69
|
+
flowchart TD
|
|
70
|
+
start(["Start /pipeline-debug"])
|
|
71
|
+
role_1["devops-engineer"]
|
|
72
|
+
role_2["developer"]
|
|
73
|
+
step_1["1. Classify Failure"]
|
|
74
|
+
step_2["2. Diagnose by Category"]
|
|
75
|
+
step_3["3. Fix & Verify"]
|
|
76
|
+
step_4["4. Merge & Monitor"]
|
|
77
|
+
exit(["Pipeline green + root cause documented in ticket = debug complete."])
|
|
78
|
+
start --> step_1
|
|
79
|
+
step_1 --> step_2
|
|
80
|
+
step_2 --> step_3
|
|
81
|
+
step_3 --> step_4
|
|
82
|
+
step_4 --> exit
|
|
83
|
+
role_1 -. owns .-> step_1
|
|
84
|
+
role_2 -. owns .-> step_2
|
|
85
|
+
role_1 -. owns .-> step_2
|
|
86
|
+
role_1 -. owns .-> step_3
|
|
87
|
+
role_1 -. owns .-> step_4
|
|
88
|
+
```
|
|
89
|
+
<!-- agent-diagram:end -->
|
|
90
|
+
|
|
65
91
|
## Exit
|
|
66
92
|
Pipeline green + root cause documented in ticket = debug complete.
|
|
@@ -110,6 +110,59 @@ helm upgrade --install order-service charts/order-service \
|
|
|
110
110
|
- Verify business KPIs (conversion, checkout success, error funnel).
|
|
111
111
|
- Publish deployment report with links to metrics, logs, and release artifact metadata.
|
|
112
112
|
|
|
113
|
+
## Agent Interaction Diagram
|
|
114
|
+
|
|
115
|
+
<!-- agent-diagram:start -->
|
|
116
|
+
```mermaid
|
|
117
|
+
flowchart TD
|
|
118
|
+
start(["Start /release-pipeline"])
|
|
119
|
+
role_1["team-lead"]
|
|
120
|
+
role_2["pm"]
|
|
121
|
+
role_3["developer"]
|
|
122
|
+
role_4["devops-engineer"]
|
|
123
|
+
role_5["qa"]
|
|
124
|
+
step_1["1. Release Readiness and Freeze Check"]
|
|
125
|
+
step_2["2. Database Compatibility Gate"]
|
|
126
|
+
step_3["3. Tag Release"]
|
|
127
|
+
step_4["4. CI Release Pipeline (automated) — CI system"]
|
|
128
|
+
step_5["5. Deploy Staging"]
|
|
129
|
+
step_6["6. Production Gate"]
|
|
130
|
+
step_7["7. Canary Deployment"]
|
|
131
|
+
step_8["8. Feature Flag Progression"]
|
|
132
|
+
step_9["9. Post-Deploy Validation"]
|
|
133
|
+
exit(["Release is complete when 100% traffic is healthy, post-deploy checks pass,..."])
|
|
134
|
+
start --> step_1
|
|
135
|
+
step_1 --> step_2
|
|
136
|
+
step_2 --> step_3
|
|
137
|
+
step_3 --> step_4
|
|
138
|
+
step_4 --> step_5
|
|
139
|
+
step_5 --> step_6
|
|
140
|
+
step_6 --> step_7
|
|
141
|
+
step_7 --> step_8
|
|
142
|
+
step_8 --> step_9
|
|
143
|
+
step_9 --> exit
|
|
144
|
+
role_1 -. owns .-> step_1
|
|
145
|
+
role_2 -. owns .-> step_1
|
|
146
|
+
role_3 -. owns .-> step_2
|
|
147
|
+
role_4 -. owns .-> step_2
|
|
148
|
+
role_3 -. owns .-> step_3
|
|
149
|
+
role_3 -. owns .-> step_4
|
|
150
|
+
role_4 -. owns .-> step_4
|
|
151
|
+
role_1 -. owns .-> step_4
|
|
152
|
+
role_2 -. owns .-> step_4
|
|
153
|
+
role_5 -. owns .-> step_4
|
|
154
|
+
role_4 -. owns .-> step_5
|
|
155
|
+
role_1 -. owns .-> step_6
|
|
156
|
+
role_5 -. owns .-> step_6
|
|
157
|
+
role_4 -. owns .-> step_7
|
|
158
|
+
role_3 -. owns .-> step_8
|
|
159
|
+
role_5 -. owns .-> step_8
|
|
160
|
+
role_5 -. owns .-> step_9
|
|
161
|
+
role_2 -. owns .-> step_9
|
|
162
|
+
step_9 -. iterate if blocked .-> step_1
|
|
163
|
+
```
|
|
164
|
+
<!-- agent-diagram:end -->
|
|
165
|
+
|
|
113
166
|
## Rollback
|
|
114
167
|
|
|
115
168
|
```bash
|
|
@@ -103,5 +103,32 @@ curl -X POST $SLACK_WEBHOOK \
|
|
|
103
103
|
```
|
|
104
104
|
- **If any step fails:** post failure to Slack + page on-call → P1 incident
|
|
105
105
|
|
|
106
|
+
## Agent Interaction Diagram
|
|
107
|
+
|
|
108
|
+
<!-- agent-diagram:start -->
|
|
109
|
+
```mermaid
|
|
110
|
+
flowchart TD
|
|
111
|
+
start(["Start /backup-verify"])
|
|
112
|
+
role_1["devops-engineer"]
|
|
113
|
+
step_1["1. Pre-Check: Backup Catalog"]
|
|
114
|
+
step_2["2. Provision Test Environment"]
|
|
115
|
+
step_3["3. Restore Latest Backup"]
|
|
116
|
+
step_4["4. Row Count Validation"]
|
|
117
|
+
step_5["5. Report + Cleanup"]
|
|
118
|
+
exit(["Restore successful + row counts validated + test env destroyed + report pos..."])
|
|
119
|
+
start --> step_1
|
|
120
|
+
step_1 --> step_2
|
|
121
|
+
step_2 --> step_3
|
|
122
|
+
step_3 --> step_4
|
|
123
|
+
step_4 --> step_5
|
|
124
|
+
step_5 --> exit
|
|
125
|
+
role_1 -. owns .-> step_1
|
|
126
|
+
role_1 -. owns .-> step_2
|
|
127
|
+
role_1 -. owns .-> step_3
|
|
128
|
+
role_1 -. owns .-> step_4
|
|
129
|
+
role_1 -. owns .-> step_5
|
|
130
|
+
```
|
|
131
|
+
<!-- agent-diagram:end -->
|
|
132
|
+
|
|
106
133
|
## Exit
|
|
107
134
|
Restore successful + row counts validated + test env destroyed + report posted = backup verified.
|
|
@@ -82,5 +82,35 @@ SELECT pg_terminate_backend(<pid>); -- forceful
|
|
|
82
82
|
- Root cause + fix in incident ticket
|
|
83
83
|
- If query regression: create optimization ticket for development team
|
|
84
84
|
|
|
85
|
+
## Agent Interaction Diagram
|
|
86
|
+
|
|
87
|
+
<!-- agent-diagram:start -->
|
|
88
|
+
```mermaid
|
|
89
|
+
flowchart TD
|
|
90
|
+
start(["Start /db-incident"])
|
|
91
|
+
role_1["devops-engineer"]
|
|
92
|
+
role_2["developer"]
|
|
93
|
+
step_1["1. Triage"]
|
|
94
|
+
step_2["2. Immediate Mitigation by Type"]
|
|
95
|
+
step_3["3. Root Cause"]
|
|
96
|
+
step_4["4. Fix & Verify"]
|
|
97
|
+
step_5["5. Document"]
|
|
98
|
+
exit(["Metrics normal + root cause documented = db incident resolved."])
|
|
99
|
+
start --> step_1
|
|
100
|
+
step_1 --> step_2
|
|
101
|
+
step_2 --> step_3
|
|
102
|
+
step_3 --> step_4
|
|
103
|
+
step_4 --> step_5
|
|
104
|
+
step_5 --> exit
|
|
105
|
+
role_1 -. owns .-> step_1
|
|
106
|
+
role_2 -. owns .-> step_2
|
|
107
|
+
role_1 -. owns .-> step_2
|
|
108
|
+
role_1 -. owns .-> step_3
|
|
109
|
+
role_2 -. owns .-> step_3
|
|
110
|
+
role_1 -. owns .-> step_4
|
|
111
|
+
role_1 -. owns .-> step_5
|
|
112
|
+
```
|
|
113
|
+
<!-- agent-diagram:end -->
|
|
114
|
+
|
|
85
115
|
## Exit
|
|
86
116
|
Metrics normal + root cause documented = db incident resolved.
|
|
@@ -100,5 +100,39 @@ kubectl get constraint ${POLICY} -o jsonpath='{.status.byPod}'
|
|
|
100
100
|
# metric: gatekeeper_violations_total{enforcement_action="deny"}
|
|
101
101
|
```
|
|
102
102
|
|
|
103
|
+
## Agent Interaction Diagram
|
|
104
|
+
|
|
105
|
+
<!-- agent-diagram:start -->
|
|
106
|
+
```mermaid
|
|
107
|
+
flowchart TD
|
|
108
|
+
start(["Start /policy-onboard"])
|
|
109
|
+
role_1["devops-engineer"]
|
|
110
|
+
role_2["developer"]
|
|
111
|
+
role_3["team-lead"]
|
|
112
|
+
step_1["1. Design Policy"]
|
|
113
|
+
step_2["2. Unit Test"]
|
|
114
|
+
step_3["3. Dryrun in Staging"]
|
|
115
|
+
step_4["4. Fix Existing Violations"]
|
|
116
|
+
step_5["5. Switch to Enforce"]
|
|
117
|
+
step_6["6. Monitor Policy Health"]
|
|
118
|
+
exit(["Policy tested + existing violations resolved + enforce mode active + monito..."])
|
|
119
|
+
start --> step_1
|
|
120
|
+
step_1 --> step_2
|
|
121
|
+
step_2 --> step_3
|
|
122
|
+
step_3 --> step_4
|
|
123
|
+
step_4 --> step_5
|
|
124
|
+
step_5 --> step_6
|
|
125
|
+
step_6 --> exit
|
|
126
|
+
role_1 -. owns .-> step_1
|
|
127
|
+
role_1 -. owns .-> step_2
|
|
128
|
+
role_1 -. owns .-> step_3
|
|
129
|
+
role_2 -. owns .-> step_4
|
|
130
|
+
role_1 -. owns .-> step_4
|
|
131
|
+
role_1 -. owns .-> step_5
|
|
132
|
+
role_3 -. owns .-> step_5
|
|
133
|
+
role_1 -. owns .-> step_6
|
|
134
|
+
```
|
|
135
|
+
<!-- agent-diagram:end -->
|
|
136
|
+
|
|
103
137
|
## Exit
|
|
104
138
|
Policy tested + existing violations resolved + enforce mode active + monitoring in place = policy onboarded.
|
|
@@ -151,5 +151,38 @@ echo "IaC: $(cat iac-scan.sarif | jq '.runs[0].results | length') findi
|
|
|
151
151
|
echo "SBOM: attached to registry"
|
|
152
152
|
```
|
|
153
153
|
|
|
154
|
+
## Agent Interaction Diagram
|
|
155
|
+
|
|
156
|
+
<!-- agent-diagram:start -->
|
|
157
|
+
```mermaid
|
|
158
|
+
flowchart TD
|
|
159
|
+
start(["Start /security-scan-pipeline"])
|
|
160
|
+
role_1["devops-engineer"]
|
|
161
|
+
step_1["1. Secrets Scan"]
|
|
162
|
+
step_2["2. SAST (Static Analysis)"]
|
|
163
|
+
step_3["3. Dependency CVE Scan"]
|
|
164
|
+
step_4["4. Container Image Scan"]
|
|
165
|
+
step_5["5. IaC Security Scan"]
|
|
166
|
+
step_6["6. Generate SBOM"]
|
|
167
|
+
step_7["7. Collate Report"]
|
|
168
|
+
exit(["Zero unresolved Critical/High + SBOM attached + scan report filed = securit..."])
|
|
169
|
+
start --> step_1
|
|
170
|
+
step_1 --> step_2
|
|
171
|
+
step_2 --> step_3
|
|
172
|
+
step_3 --> step_4
|
|
173
|
+
step_4 --> step_5
|
|
174
|
+
step_5 --> step_6
|
|
175
|
+
step_6 --> step_7
|
|
176
|
+
step_7 --> exit
|
|
177
|
+
role_1 -. owns .-> step_1
|
|
178
|
+
role_1 -. owns .-> step_2
|
|
179
|
+
role_1 -. owns .-> step_3
|
|
180
|
+
role_1 -. owns .-> step_4
|
|
181
|
+
role_1 -. owns .-> step_5
|
|
182
|
+
role_1 -. owns .-> step_6
|
|
183
|
+
role_1 -. owns .-> step_7
|
|
184
|
+
```
|
|
185
|
+
<!-- agent-diagram:end -->
|
|
186
|
+
|
|
154
187
|
## Exit
|
|
155
188
|
Zero unresolved Critical/High + SBOM attached + scan report filed = security scan complete.
|
|
@@ -92,5 +92,36 @@ aws dynamodb delete-item \
|
|
|
92
92
|
### 6. Document — `@devops-engineer`
|
|
93
93
|
- Record in decommission log: environment, date, approver, reason, data disposition
|
|
94
94
|
|
|
95
|
+
## Agent Interaction Diagram
|
|
96
|
+
|
|
97
|
+
<!-- agent-diagram:start -->
|
|
98
|
+
```mermaid
|
|
99
|
+
flowchart TD
|
|
100
|
+
start(["Start /destroy-environment"])
|
|
101
|
+
role_1["devops-engineer"]
|
|
102
|
+
role_2["team-lead"]
|
|
103
|
+
step_1["1. Confirm Scope"]
|
|
104
|
+
step_2["2. Approval"]
|
|
105
|
+
step_3["3. Pre-Destroy Backup"]
|
|
106
|
+
step_4["4. Ordered Teardown"]
|
|
107
|
+
step_5["5. Verify & Cleanup"]
|
|
108
|
+
step_6["6. Document"]
|
|
109
|
+
exit(["Terraform state empty + cloud console clean + documentation filed = environ..."])
|
|
110
|
+
start --> step_1
|
|
111
|
+
step_1 --> step_2
|
|
112
|
+
step_2 --> step_3
|
|
113
|
+
step_3 --> step_4
|
|
114
|
+
step_4 --> step_5
|
|
115
|
+
step_5 --> step_6
|
|
116
|
+
step_6 --> exit
|
|
117
|
+
role_1 -. owns .-> step_1
|
|
118
|
+
role_2 -. owns .-> step_2
|
|
119
|
+
role_1 -. owns .-> step_3
|
|
120
|
+
role_1 -. owns .-> step_4
|
|
121
|
+
role_1 -. owns .-> step_5
|
|
122
|
+
role_1 -. owns .-> step_6
|
|
123
|
+
```
|
|
124
|
+
<!-- agent-diagram:end -->
|
|
125
|
+
|
|
95
126
|
## Exit
|
|
96
127
|
Terraform state empty + cloud console clean + documentation filed = environment destroyed.
|
|
@@ -62,5 +62,34 @@ terraform apply remediation.plan
|
|
|
62
62
|
### 5. Report — `@devops-engineer`
|
|
63
63
|
- Update `drift-log.md` with date, resources affected, classification, action taken
|
|
64
64
|
|
|
65
|
+
## Agent Interaction Diagram
|
|
66
|
+
|
|
67
|
+
<!-- agent-diagram:start -->
|
|
68
|
+
```mermaid
|
|
69
|
+
flowchart TD
|
|
70
|
+
start(["Start /drift-remediation"])
|
|
71
|
+
role_1["devops-engineer"]
|
|
72
|
+
role_2["team-lead"]
|
|
73
|
+
step_1["1. Detect Drift"]
|
|
74
|
+
step_2["2. Classify Findings"]
|
|
75
|
+
step_3["3. Remediate (if REMEDIATE class)"]
|
|
76
|
+
step_4["4. Investigate (if INVESTIGATE class)"]
|
|
77
|
+
step_5["5. Report"]
|
|
78
|
+
exit(["All drift classified + REMEDIATE resolved + INVESTIGATE escalated = drift c..."])
|
|
79
|
+
start --> step_1
|
|
80
|
+
step_1 --> step_2
|
|
81
|
+
step_2 --> step_3
|
|
82
|
+
step_3 --> step_4
|
|
83
|
+
step_4 --> step_5
|
|
84
|
+
step_5 --> exit
|
|
85
|
+
role_1 -. owns .-> step_1
|
|
86
|
+
role_1 -. owns .-> step_2
|
|
87
|
+
role_2 -. owns .-> step_2
|
|
88
|
+
role_1 -. owns .-> step_3
|
|
89
|
+
role_1 -. owns .-> step_4
|
|
90
|
+
role_1 -. owns .-> step_5
|
|
91
|
+
```
|
|
92
|
+
<!-- agent-diagram:end -->
|
|
93
|
+
|
|
65
94
|
## Exit
|
|
66
95
|
All drift classified + REMEDIATE resolved + INVESTIGATE escalated = drift cycle complete.
|
|
@@ -97,5 +97,37 @@ git push origin modules/<module-name>/v1.0.0
|
|
|
97
97
|
# Reference in other modules: ?ref=v1.0.0 (never ?ref=main)
|
|
98
98
|
```
|
|
99
99
|
|
|
100
|
+
## Agent Interaction Diagram
|
|
101
|
+
|
|
102
|
+
<!-- agent-diagram:start -->
|
|
103
|
+
```mermaid
|
|
104
|
+
flowchart TD
|
|
105
|
+
start(["Start /module-development"])
|
|
106
|
+
role_1["devops-engineer"]
|
|
107
|
+
role_2["team-lead"]
|
|
108
|
+
step_1["1. Design Interface"]
|
|
109
|
+
step_2["2. Implement Module"]
|
|
110
|
+
step_3["3. Write Examples"]
|
|
111
|
+
step_4["4. Test"]
|
|
112
|
+
step_5["5. Code Review"]
|
|
113
|
+
step_6["6. Release"]
|
|
114
|
+
exit(["Module published + examples tested + documentation complete = module released."])
|
|
115
|
+
start --> step_1
|
|
116
|
+
step_1 --> step_2
|
|
117
|
+
step_2 --> step_3
|
|
118
|
+
step_3 --> step_4
|
|
119
|
+
step_4 --> step_5
|
|
120
|
+
step_5 --> step_6
|
|
121
|
+
step_6 --> exit
|
|
122
|
+
role_1 -. owns .-> step_1
|
|
123
|
+
role_2 -. owns .-> step_1
|
|
124
|
+
role_1 -. owns .-> step_2
|
|
125
|
+
role_1 -. owns .-> step_3
|
|
126
|
+
role_1 -. owns .-> step_4
|
|
127
|
+
role_2 -. owns .-> step_5
|
|
128
|
+
role_1 -. owns .-> step_6
|
|
129
|
+
```
|
|
130
|
+
<!-- agent-diagram:end -->
|
|
131
|
+
|
|
100
132
|
## Exit
|
|
101
133
|
Module published + examples tested + documentation complete = module released.
|
|
@@ -92,5 +92,34 @@ quality-gates:
|
|
|
92
92
|
- Write `provision_report.md`: environment, resources created, cost estimate, next steps
|
|
93
93
|
- **Done when:** report committed; outputs stored
|
|
94
94
|
|
|
95
|
+
## Agent Interaction Diagram
|
|
96
|
+
|
|
97
|
+
<!-- agent-diagram:start -->
|
|
98
|
+
```mermaid
|
|
99
|
+
flowchart TD
|
|
100
|
+
start(["Start /provision-environment"])
|
|
101
|
+
role_1["devops-engineer"]
|
|
102
|
+
role_2["team-lead"]
|
|
103
|
+
step_1["1. Plan & Review"]
|
|
104
|
+
step_2["2. Apply Infrastructure"]
|
|
105
|
+
step_3["3. Configure Nodes (Ansible)"]
|
|
106
|
+
step_4["4. Smoke Tests"]
|
|
107
|
+
step_5["5. Document & Store Outputs"]
|
|
108
|
+
exit(["Terraform apply clean + Ansible 0 failures + smoke tests pass = environment..."])
|
|
109
|
+
start --> step_1
|
|
110
|
+
step_1 --> step_2
|
|
111
|
+
step_2 --> step_3
|
|
112
|
+
step_3 --> step_4
|
|
113
|
+
step_4 --> step_5
|
|
114
|
+
step_5 --> exit
|
|
115
|
+
role_1 -. owns .-> step_1
|
|
116
|
+
role_2 -. owns .-> step_1
|
|
117
|
+
role_1 -. owns .-> step_2
|
|
118
|
+
role_1 -. owns .-> step_3
|
|
119
|
+
role_1 -. owns .-> step_4
|
|
120
|
+
role_1 -. owns .-> step_5
|
|
121
|
+
```
|
|
122
|
+
<!-- agent-diagram:end -->
|
|
123
|
+
|
|
95
124
|
## Exit
|
|
96
125
|
Terraform apply clean + Ansible 0 failures + smoke tests pass = environment provisioned.
|
|
@@ -190,5 +190,41 @@ quality-gates:
|
|
|
190
190
|
- Set up `kube-prometheus-stack` for cluster monitoring
|
|
191
191
|
- **Output:** `bootstrap_report.md` — cluster version, node IPs, installed components, kubeconfig location
|
|
192
192
|
|
|
193
|
+
## Agent Interaction Diagram
|
|
194
|
+
|
|
195
|
+
<!-- agent-diagram:start -->
|
|
196
|
+
```mermaid
|
|
197
|
+
flowchart TD
|
|
198
|
+
start(["Start /cluster-bootstrap"])
|
|
199
|
+
role_1["devops-engineer"]
|
|
200
|
+
step_1["1. Node Pre-Flight"]
|
|
201
|
+
step_2["2. Bootstrap First Control Plane"]
|
|
202
|
+
step_3["3. Install CNI (Cilium)"]
|
|
203
|
+
step_4["4. Join Remaining Control Plane Nodes"]
|
|
204
|
+
step_5["5. Join Worker Nodes"]
|
|
205
|
+
step_6["6. etcd Encryption at Rest"]
|
|
206
|
+
step_7["7. Core Platform Components"]
|
|
207
|
+
step_8["8. Apply Security Baselines"]
|
|
208
|
+
exit(["All nodes Ready + core components Running + etcd encrypted + monitoring liv..."])
|
|
209
|
+
start --> step_1
|
|
210
|
+
step_1 --> step_2
|
|
211
|
+
step_2 --> step_3
|
|
212
|
+
step_3 --> step_4
|
|
213
|
+
step_4 --> step_5
|
|
214
|
+
step_5 --> step_6
|
|
215
|
+
step_6 --> step_7
|
|
216
|
+
step_7 --> step_8
|
|
217
|
+
step_8 --> exit
|
|
218
|
+
role_1 -. owns .-> step_1
|
|
219
|
+
role_1 -. owns .-> step_2
|
|
220
|
+
role_1 -. owns .-> step_3
|
|
221
|
+
role_1 -. owns .-> step_4
|
|
222
|
+
role_1 -. owns .-> step_5
|
|
223
|
+
role_1 -. owns .-> step_6
|
|
224
|
+
role_1 -. owns .-> step_7
|
|
225
|
+
role_1 -. owns .-> step_8
|
|
226
|
+
```
|
|
227
|
+
<!-- agent-diagram:end -->
|
|
228
|
+
|
|
193
229
|
## Exit
|
|
194
230
|
All nodes Ready + core components Running + etcd encrypted + monitoring live = cluster bootstrapped.
|
|
@@ -104,5 +104,34 @@ quality-gates:
|
|
|
104
104
|
- **Output:** `docs/incidents/<date>-<workload>-root-cause.md`
|
|
105
105
|
- **Done when:** document committed; alert/runbook created if pattern is recurring
|
|
106
106
|
|
|
107
|
+
## Agent Interaction Diagram
|
|
108
|
+
|
|
109
|
+
<!-- agent-diagram:start -->
|
|
110
|
+
```mermaid
|
|
111
|
+
flowchart TD
|
|
112
|
+
start(["Start /debug-workload"])
|
|
113
|
+
role_1["devops-engineer"]
|
|
114
|
+
role_2["developer"]
|
|
115
|
+
step_1["1. Classify Symptom"]
|
|
116
|
+
step_2["2. Deep Diagnosis"]
|
|
117
|
+
step_3["3. Apply Fix"]
|
|
118
|
+
step_4["4. Verify & Monitor"]
|
|
119
|
+
step_5["5. Document"]
|
|
120
|
+
exit(["Pod Running + metrics stable + root cause documented = workload debug compl..."])
|
|
121
|
+
start --> step_1
|
|
122
|
+
step_1 --> step_2
|
|
123
|
+
step_2 --> step_3
|
|
124
|
+
step_3 --> step_4
|
|
125
|
+
step_4 --> step_5
|
|
126
|
+
step_5 --> exit
|
|
127
|
+
role_1 -. owns .-> step_1
|
|
128
|
+
role_1 -. owns .-> step_2
|
|
129
|
+
role_2 -. owns .-> step_3
|
|
130
|
+
role_1 -. owns .-> step_3
|
|
131
|
+
role_1 -. owns .-> step_4
|
|
132
|
+
role_1 -. owns .-> step_5
|
|
133
|
+
```
|
|
134
|
+
<!-- agent-diagram:end -->
|
|
135
|
+
|
|
107
136
|
## Exit
|
|
108
137
|
Pod Running + metrics stable + root cause documented = workload debug complete.
|
|
@@ -120,5 +120,40 @@ quality-gates:
|
|
|
120
120
|
- **Output:** metrics visible in Grafana; alerts configured
|
|
121
121
|
- **Done when:** Grafana dashboard shows service metrics
|
|
122
122
|
|
|
123
|
+
## Agent Interaction Diagram
|
|
124
|
+
|
|
125
|
+
<!-- agent-diagram:start -->
|
|
126
|
+
```mermaid
|
|
127
|
+
flowchart TD
|
|
128
|
+
start(["Start /onboard-service"])
|
|
129
|
+
role_1["devops-engineer"]
|
|
130
|
+
role_2["developer"]
|
|
131
|
+
step_1["1. Namespace Setup"]
|
|
132
|
+
step_2["2. RBAC Setup"]
|
|
133
|
+
step_3["3. Network Policies"]
|
|
134
|
+
step_4["4. Helm Chart"]
|
|
135
|
+
step_5["5. ArgoCD Application"]
|
|
136
|
+
step_6["6. Validate & Smoke Test"]
|
|
137
|
+
step_7["7. Monitoring"]
|
|
138
|
+
exit(["Pod Running + health check passing + ArgoCD Healthy + metrics visible = ser..."])
|
|
139
|
+
start --> step_1
|
|
140
|
+
step_1 --> step_2
|
|
141
|
+
step_2 --> step_3
|
|
142
|
+
step_3 --> step_4
|
|
143
|
+
step_4 --> step_5
|
|
144
|
+
step_5 --> step_6
|
|
145
|
+
step_6 --> step_7
|
|
146
|
+
step_7 --> exit
|
|
147
|
+
role_1 -. owns .-> step_1
|
|
148
|
+
role_1 -. owns .-> step_2
|
|
149
|
+
role_1 -. owns .-> step_3
|
|
150
|
+
role_2 -. owns .-> step_4
|
|
151
|
+
role_1 -. owns .-> step_4
|
|
152
|
+
role_1 -. owns .-> step_5
|
|
153
|
+
role_2 -. owns .-> step_6
|
|
154
|
+
role_1 -. owns .-> step_7
|
|
155
|
+
```
|
|
156
|
+
<!-- agent-diagram:end -->
|
|
157
|
+
|
|
123
158
|
## Exit
|
|
124
159
|
Pod Running + health check passing + ArgoCD Healthy + metrics visible = service onboarded.
|
|
@@ -146,6 +146,36 @@ kubectl get pdb -A
|
|
|
146
146
|
- **Output:** `upgrade_report.md` — versions before/after, issues found, time taken
|
|
147
147
|
- **Done when:** all Tier 1 services healthy; no unexpected pod restarts
|
|
148
148
|
|
|
149
|
+
## Agent Interaction Diagram
|
|
150
|
+
|
|
151
|
+
<!-- agent-diagram:start -->
|
|
152
|
+
```mermaid
|
|
153
|
+
flowchart TD
|
|
154
|
+
start(["Start /upgrade-cluster"])
|
|
155
|
+
role_1["devops-engineer"]
|
|
156
|
+
role_2["team-lead"]
|
|
157
|
+
step_1["1. etcd Backup"]
|
|
158
|
+
step_2["2. Upgrade Control Plane (kubeadm)"]
|
|
159
|
+
step_3["3. Validate Control Plane"]
|
|
160
|
+
step_4["4. Upgrade Worker Nodes (rolling)"]
|
|
161
|
+
step_5["5. Post-Upgrade Validation"]
|
|
162
|
+
exit(["All nodes on target version + Tier 1 services healthy + upgrade report comm..."])
|
|
163
|
+
start --> step_1
|
|
164
|
+
step_1 --> step_2
|
|
165
|
+
step_2 --> step_3
|
|
166
|
+
step_3 --> step_4
|
|
167
|
+
step_4 --> step_5
|
|
168
|
+
step_5 --> exit
|
|
169
|
+
role_1 -. owns .-> step_1
|
|
170
|
+
role_1 -. owns .-> step_2
|
|
171
|
+
role_1 -. owns .-> step_3
|
|
172
|
+
role_1 -. owns .-> step_4
|
|
173
|
+
role_1 -. owns .-> step_5
|
|
174
|
+
role_2 -. owns .-> step_5
|
|
175
|
+
step_5 -. iterate if blocked .-> step_1
|
|
176
|
+
```
|
|
177
|
+
<!-- agent-diagram:end -->
|
|
178
|
+
|
|
149
179
|
## Rollback Plan
|
|
150
180
|
|
|
151
181
|
```bash
|
|
@@ -60,5 +60,32 @@ done | sort | uniq -c
|
|
|
60
60
|
- Point hostname to MetalLB external IP: `kubectl get svc -n ingress-nginx`
|
|
61
61
|
- Add A record in DNS provider or internal CoreDNS
|
|
62
62
|
|
|
63
|
+
## Agent Interaction Diagram
|
|
64
|
+
|
|
65
|
+
<!-- agent-diagram:start -->
|
|
66
|
+
```mermaid
|
|
67
|
+
flowchart TD
|
|
68
|
+
start(["Start /onboard-ingress"])
|
|
69
|
+
role_1["devops-engineer"]
|
|
70
|
+
step_1["1. Write Ingress Manifest"]
|
|
71
|
+
step_2["2. Apply & Wait for Certificate"]
|
|
72
|
+
step_3["3. Verify HTTPS"]
|
|
73
|
+
step_4["4. Verify Rate Limiting"]
|
|
74
|
+
step_5["5. DNS (if needed)"]
|
|
75
|
+
exit(["HTTPS accessible + cert issued + security headers present + rate limit veri..."])
|
|
76
|
+
start --> step_1
|
|
77
|
+
step_1 --> step_2
|
|
78
|
+
step_2 --> step_3
|
|
79
|
+
step_3 --> step_4
|
|
80
|
+
step_4 --> step_5
|
|
81
|
+
step_5 --> exit
|
|
82
|
+
role_1 -. owns .-> step_1
|
|
83
|
+
role_1 -. owns .-> step_2
|
|
84
|
+
role_1 -. owns .-> step_3
|
|
85
|
+
role_1 -. owns .-> step_4
|
|
86
|
+
role_1 -. owns .-> step_5
|
|
87
|
+
```
|
|
88
|
+
<!-- agent-diagram:end -->
|
|
89
|
+
|
|
63
90
|
## Exit
|
|
64
91
|
HTTPS accessible + cert issued + security headers present + rate limit verified = ingress onboarded.
|
|
@@ -118,5 +118,32 @@ kubectl -n istio-system port-forward svc/kiali 20001:20001 &
|
|
|
118
118
|
```
|
|
119
119
|
- **Done when:** service visible in mesh dashboard; no unmeshed traffic warnings
|
|
120
120
|
|
|
121
|
+
## Agent Interaction Diagram
|
|
122
|
+
|
|
123
|
+
<!-- agent-diagram:start -->
|
|
124
|
+
```mermaid
|
|
125
|
+
flowchart TD
|
|
126
|
+
start(["Start /service-mesh-onboard"])
|
|
127
|
+
role_1["devops-engineer"]
|
|
128
|
+
step_1["1. Pre-Check Mesh Health"]
|
|
129
|
+
step_2["2. Enable Injection"]
|
|
130
|
+
step_3["3. Verify mTLS"]
|
|
131
|
+
step_4["4. Apply Traffic Policies"]
|
|
132
|
+
step_5["5. Validate in Mesh Dashboard"]
|
|
133
|
+
exit(["Sidecar injected + mTLS verified + policies applied + dashboard shows servi..."])
|
|
134
|
+
start --> step_1
|
|
135
|
+
step_1 --> step_2
|
|
136
|
+
step_2 --> step_3
|
|
137
|
+
step_3 --> step_4
|
|
138
|
+
step_4 --> step_5
|
|
139
|
+
step_5 --> exit
|
|
140
|
+
role_1 -. owns .-> step_1
|
|
141
|
+
role_1 -. owns .-> step_2
|
|
142
|
+
role_1 -. owns .-> step_3
|
|
143
|
+
role_1 -. owns .-> step_4
|
|
144
|
+
role_1 -. owns .-> step_5
|
|
145
|
+
```
|
|
146
|
+
<!-- agent-diagram:end -->
|
|
147
|
+
|
|
121
148
|
## Exit
|
|
122
149
|
Sidecar injected + mTLS verified + policies applied + dashboard shows service = onboarded.
|
|
@@ -94,5 +94,34 @@ Saturation?
|
|
|
94
94
|
- Is the alert threshold correct? (too sensitive = toil; too loose = misses real issues)
|
|
95
95
|
- Create ticket if: runbook needs update, threshold needs tuning, or root cause needs a code fix
|
|
96
96
|
|
|
97
|
+
## Agent Interaction Diagram
|
|
98
|
+
|
|
99
|
+
<!-- agent-diagram:start -->
|
|
100
|
+
```mermaid
|
|
101
|
+
flowchart TD
|
|
102
|
+
start(["Start /alert-investigation"])
|
|
103
|
+
role_1["devops-engineer"]
|
|
104
|
+
role_2["developer"]
|
|
105
|
+
step_1["1. Acknowledge & Classify"]
|
|
106
|
+
step_2["2. Correlate Signals"]
|
|
107
|
+
step_3["3. Identify Root Cause"]
|
|
108
|
+
step_4["4. Mitigate"]
|
|
109
|
+
step_5["5. Post-Investigation Notes"]
|
|
110
|
+
exit(["Alert resolved or escalated + root cause noted + runbook quality assessed =..."])
|
|
111
|
+
start --> step_1
|
|
112
|
+
step_1 --> step_2
|
|
113
|
+
step_2 --> step_3
|
|
114
|
+
step_3 --> step_4
|
|
115
|
+
step_4 --> step_5
|
|
116
|
+
step_5 --> exit
|
|
117
|
+
role_1 -. owns .-> step_1
|
|
118
|
+
role_1 -. owns .-> step_2
|
|
119
|
+
role_1 -. owns .-> step_3
|
|
120
|
+
role_2 -. owns .-> step_3
|
|
121
|
+
role_1 -. owns .-> step_4
|
|
122
|
+
role_1 -. owns .-> step_5
|
|
123
|
+
```
|
|
124
|
+
<!-- agent-diagram:end -->
|
|
125
|
+
|
|
97
126
|
## Exit
|
|
98
127
|
Alert resolved or escalated + root cause noted + runbook quality assessed = investigation complete.
|