@jetrabbits/agentic 0.3.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/AGENTS.md +8 -0
  2. package/CHANGELOG.md +18 -0
  3. package/Makefile +26 -5
  4. package/README.md +25 -6
  5. package/agentic +801 -66
  6. package/areas/devops/ci-cd/workflows/onboard-repo.md +29 -0
  7. package/areas/devops/ci-cd/workflows/pipeline-debug.md +26 -0
  8. package/areas/devops/ci-cd/workflows/release-pipeline.md +53 -0
  9. package/areas/devops/database-ops/workflows/backup-verify.md +27 -0
  10. package/areas/devops/database-ops/workflows/db-incident.md +30 -0
  11. package/areas/devops/devsecops/workflows/policy-onboard.md +34 -0
  12. package/areas/devops/devsecops/workflows/security-scan-pipeline.md +33 -0
  13. package/areas/devops/infrastructure/workflows/destroy-environment.md +31 -0
  14. package/areas/devops/infrastructure/workflows/drift-remediation.md +29 -0
  15. package/areas/devops/infrastructure/workflows/module-development.md +32 -0
  16. package/areas/devops/infrastructure/workflows/provision-environment.md +29 -0
  17. package/areas/devops/kubernetes/workflows/cluster-bootstrap.md +36 -0
  18. package/areas/devops/kubernetes/workflows/debug-workload.md +29 -0
  19. package/areas/devops/kubernetes/workflows/onboard-service.md +35 -0
  20. package/areas/devops/kubernetes/workflows/upgrade-cluster.md +30 -0
  21. package/areas/devops/networking/workflows/onboard-ingress.md +27 -0
  22. package/areas/devops/networking/workflows/service-mesh-onboard.md +27 -0
  23. package/areas/devops/observability/workflows/alert-investigation.md +29 -0
  24. package/areas/devops/observability/workflows/observability-stack-setup.md +33 -0
  25. package/areas/devops/observability/workflows/onboard-service-monitoring.md +31 -0
  26. package/areas/devops/sre/workflows/incident-response.md +48 -0
  27. package/areas/devops/sre/workflows/postmortem.md +32 -0
  28. package/areas/devops/sre/workflows/slo-review.md +35 -1
  29. package/areas/software/backend/workflows/add-migration.md +33 -0
  30. package/areas/software/backend/workflows/create-endpoint.md +40 -0
  31. package/areas/software/backend/workflows/debug-issue.md +31 -0
  32. package/areas/software/backend/workflows/develop-epic.md +37 -0
  33. package/areas/software/backend/workflows/develop-feature.md +44 -0
  34. package/areas/software/backend/workflows/refactor-module.md +35 -0
  35. package/areas/software/backend/workflows/test-feature.md +30 -0
  36. package/areas/software/data-engineering/workflows/backfill-data.md +25 -0
  37. package/areas/software/data-engineering/workflows/data-quality-incident.md +31 -0
  38. package/areas/software/data-engineering/workflows/lineage-trace.md +25 -0
  39. package/areas/software/data-engineering/workflows/new-model.md +30 -0
  40. package/areas/software/data-engineering/workflows/schema-migration.md +29 -0
  41. package/areas/software/frontend/workflows/a11y-fix.md +30 -0
  42. package/areas/software/frontend/workflows/bundle-analyze.md +28 -0
  43. package/areas/software/frontend/workflows/release-prep.md +33 -0
  44. package/areas/software/frontend/workflows/scaffold-component.md +32 -0
  45. package/areas/software/frontend/workflows/visual-regression.md +32 -0
  46. package/areas/software/full-stack/workflows/backend-project-full-cycle.md +47 -2
  47. package/areas/software/full-stack/workflows/debug-issue.md +29 -0
  48. package/areas/software/full-stack/workflows/develop-feature.md +38 -0
  49. package/areas/software/full-stack/workflows/feature-implementation-flow.md +38 -0
  50. package/areas/software/full-stack/workflows/testing-ci-pipeline.md +30 -0
  51. package/areas/software/general/workflows/code-review-workflow.md +31 -0
  52. package/areas/software/general/workflows/development-cycle-workflow.md +38 -0
  53. package/areas/software/general/workflows/project-setup-workflow.md +38 -0
  54. package/areas/software/mlops/workflows/champion-challenger.md +29 -0
  55. package/areas/software/mlops/workflows/deploy-endpoint.md +30 -0
  56. package/areas/software/mlops/workflows/evaluate-model.md +28 -0
  57. package/areas/software/mlops/workflows/model-incident.md +29 -0
  58. package/areas/software/mlops/workflows/train-experiment.md +25 -0
  59. package/areas/software/mobile/workflows/crash-triage.md +28 -0
  60. package/areas/software/mobile/workflows/device-testing.md +27 -0
  61. package/areas/software/mobile/workflows/ota-update.md +25 -0
  62. package/areas/software/mobile/workflows/release-build.md +30 -0
  63. package/areas/software/mobile/workflows/store-submission.md +29 -0
  64. package/areas/software/platform/workflows/cost-audit.md +28 -0
  65. package/areas/software/platform/workflows/deploy-production.md +30 -0
  66. package/areas/software/platform/workflows/drift-check.md +29 -0
  67. package/areas/software/platform/workflows/incident-response.md +33 -0
  68. package/areas/software/platform/workflows/provision-env.md +36 -0
  69. package/areas/software/qa/workflows/flakiness-investigation.md +30 -0
  70. package/areas/software/qa/workflows/performance-audit.md +29 -0
  71. package/areas/software/qa/workflows/regression-suite.md +28 -0
  72. package/areas/software/qa/workflows/smoke-test.md +31 -0
  73. package/areas/software/qa/workflows/test-coverage-report.md +28 -0
  74. package/areas/software/security/workflows/compliance-report.md +27 -0
  75. package/areas/software/security/workflows/pen-test-sim.md +28 -0
  76. package/areas/software/security/workflows/secret-rotation.md +33 -2
  77. package/areas/software/security/workflows/security-scan.md +29 -0
  78. package/areas/software/security/workflows/threat-model-review.md +30 -0
  79. package/docs/agentic-usage.md +19 -2
  80. package/docs/catalog.schema.json +5 -1
  81. package/docs/mcp/README.md +28 -0
  82. package/docs/opencode_setup.md +21 -1
  83. package/docs/site/README.md +15 -1
  84. package/docs/site/app.js +68 -0
  85. package/docs/site/catalog.json +74 -1
  86. package/docs/site/index.html +5 -1
  87. package/docs/site/styles.css +52 -4
  88. package/extensions/opencode/opencode.json +0 -1
  89. package/extensions/opencode/profiles/githubcopilot/opencode.json +87 -0
  90. package/extensions/opencode/profiles/openai/opencode.json +100 -0
  91. package/package.json +1 -1
  92. package/scripts/build_docs_catalog.py +13 -1
  93. package/scripts/sync_workflow_diagrams.py +199 -0
  94. package/extensions/opencode/plugins/sound-notification.ts +0 -13
@@ -69,5 +69,34 @@ quality-gates:
69
69
  - Write `docs/ci-cd.md`: stages, how to run locally, how to add a new secret
70
70
  - **Done when:** documentation committed
71
71
 
72
+ ## Agent Interaction Diagram
73
+
74
+ <!-- agent-diagram:start -->
75
+ ```mermaid
76
+ flowchart TD
77
+ start(["Start /onboard-repo"])
78
+ role_1["devops-engineer"]
79
+ role_2["developer"]
80
+ step_1["1. Assess & Plan"]
81
+ step_2["2. Secrets & Environments Setup"]
82
+ step_3["3. Write Pipeline Config"]
83
+ step_4["4. First Run & Debug"]
84
+ step_5["5. Document"]
85
+ exit(["Green pipeline + staging deploy + documentation = repo onboarded."])
86
+ start --> step_1
87
+ step_1 --> step_2
88
+ step_2 --> step_3
89
+ step_3 --> step_4
90
+ step_4 --> step_5
91
+ step_5 --> exit
92
+ role_1 -. owns .-> step_1
93
+ role_1 -. owns .-> step_2
94
+ role_1 -. owns .-> step_3
95
+ role_1 -. owns .-> step_4
96
+ role_2 -. owns .-> step_4
97
+ role_1 -. owns .-> step_5
98
+ ```
99
+ <!-- agent-diagram:end -->
100
+
72
101
  ## Exit
73
102
  Green pipeline + staging deploy + documentation = repo onboarded.
@@ -62,5 +62,31 @@ quality-gates:
62
62
  - Merge fix; confirm pipeline green on main
63
63
  - If flaky test: add to quarantine list; file follow-up ticket with `flaky-test` label
64
64
 
65
+ ## Agent Interaction Diagram
66
+
67
+ <!-- agent-diagram:start -->
68
+ ```mermaid
69
+ flowchart TD
70
+ start(["Start /pipeline-debug"])
71
+ role_1["devops-engineer"]
72
+ role_2["developer"]
73
+ step_1["1. Classify Failure"]
74
+ step_2["2. Diagnose by Category"]
75
+ step_3["3. Fix & Verify"]
76
+ step_4["4. Merge & Monitor"]
77
+ exit(["Pipeline green + root cause documented in ticket = debug complete."])
78
+ start --> step_1
79
+ step_1 --> step_2
80
+ step_2 --> step_3
81
+ step_3 --> step_4
82
+ step_4 --> exit
83
+ role_1 -. owns .-> step_1
84
+ role_2 -. owns .-> step_2
85
+ role_1 -. owns .-> step_2
86
+ role_1 -. owns .-> step_3
87
+ role_1 -. owns .-> step_4
88
+ ```
89
+ <!-- agent-diagram:end -->
90
+
65
91
  ## Exit
66
92
  Pipeline green + root cause documented in ticket = debug complete.
@@ -110,6 +110,59 @@ helm upgrade --install order-service charts/order-service \
110
110
  - Verify business KPIs (conversion, checkout success, error funnel).
111
111
  - Publish deployment report with links to metrics, logs, and release artifact metadata.
112
112
 
113
+ ## Agent Interaction Diagram
114
+
115
+ <!-- agent-diagram:start -->
116
+ ```mermaid
117
+ flowchart TD
118
+ start(["Start /release-pipeline"])
119
+ role_1["team-lead"]
120
+ role_2["pm"]
121
+ role_3["developer"]
122
+ role_4["devops-engineer"]
123
+ role_5["qa"]
124
+ step_1["1. Release Readiness and Freeze Check"]
125
+ step_2["2. Database Compatibility Gate"]
126
+ step_3["3. Tag Release"]
127
+ step_4["4. CI Release Pipeline (automated) — CI system"]
128
+ step_5["5. Deploy Staging"]
129
+ step_6["6. Production Gate"]
130
+ step_7["7. Canary Deployment"]
131
+ step_8["8. Feature Flag Progression"]
132
+ step_9["9. Post-Deploy Validation"]
133
+ exit(["Release is complete when 100% traffic is healthy, post-deploy checks pass,..."])
134
+ start --> step_1
135
+ step_1 --> step_2
136
+ step_2 --> step_3
137
+ step_3 --> step_4
138
+ step_4 --> step_5
139
+ step_5 --> step_6
140
+ step_6 --> step_7
141
+ step_7 --> step_8
142
+ step_8 --> step_9
143
+ step_9 --> exit
144
+ role_1 -. owns .-> step_1
145
+ role_2 -. owns .-> step_1
146
+ role_3 -. owns .-> step_2
147
+ role_4 -. owns .-> step_2
148
+ role_3 -. owns .-> step_3
149
+ role_3 -. owns .-> step_4
150
+ role_4 -. owns .-> step_4
151
+ role_1 -. owns .-> step_4
152
+ role_2 -. owns .-> step_4
153
+ role_5 -. owns .-> step_4
154
+ role_4 -. owns .-> step_5
155
+ role_1 -. owns .-> step_6
156
+ role_5 -. owns .-> step_6
157
+ role_4 -. owns .-> step_7
158
+ role_3 -. owns .-> step_8
159
+ role_5 -. owns .-> step_8
160
+ role_5 -. owns .-> step_9
161
+ role_2 -. owns .-> step_9
162
+ step_9 -. iterate if blocked .-> step_1
163
+ ```
164
+ <!-- agent-diagram:end -->
165
+
113
166
  ## Rollback
114
167
 
115
168
  ```bash
@@ -103,5 +103,32 @@ curl -X POST $SLACK_WEBHOOK \
103
103
  ```
104
104
  - **If any step fails:** post failure to Slack + page on-call → P1 incident
105
105
 
106
+ ## Agent Interaction Diagram
107
+
108
+ <!-- agent-diagram:start -->
109
+ ```mermaid
110
+ flowchart TD
111
+ start(["Start /backup-verify"])
112
+ role_1["devops-engineer"]
113
+ step_1["1. Pre-Check: Backup Catalog"]
114
+ step_2["2. Provision Test Environment"]
115
+ step_3["3. Restore Latest Backup"]
116
+ step_4["4. Row Count Validation"]
117
+ step_5["5. Report + Cleanup"]
118
+ exit(["Restore successful + row counts validated + test env destroyed + report pos..."])
119
+ start --> step_1
120
+ step_1 --> step_2
121
+ step_2 --> step_3
122
+ step_3 --> step_4
123
+ step_4 --> step_5
124
+ step_5 --> exit
125
+ role_1 -. owns .-> step_1
126
+ role_1 -. owns .-> step_2
127
+ role_1 -. owns .-> step_3
128
+ role_1 -. owns .-> step_4
129
+ role_1 -. owns .-> step_5
130
+ ```
131
+ <!-- agent-diagram:end -->
132
+
106
133
  ## Exit
107
134
  Restore successful + row counts validated + test env destroyed + report posted = backup verified.
@@ -82,5 +82,35 @@ SELECT pg_terminate_backend(<pid>); -- forceful
82
82
  - Root cause + fix in incident ticket
83
83
  - If query regression: create optimization ticket for development team
84
84
 
85
+ ## Agent Interaction Diagram
86
+
87
+ <!-- agent-diagram:start -->
88
+ ```mermaid
89
+ flowchart TD
90
+ start(["Start /db-incident"])
91
+ role_1["devops-engineer"]
92
+ role_2["developer"]
93
+ step_1["1. Triage"]
94
+ step_2["2. Immediate Mitigation by Type"]
95
+ step_3["3. Root Cause"]
96
+ step_4["4. Fix & Verify"]
97
+ step_5["5. Document"]
98
+ exit(["Metrics normal + root cause documented = db incident resolved."])
99
+ start --> step_1
100
+ step_1 --> step_2
101
+ step_2 --> step_3
102
+ step_3 --> step_4
103
+ step_4 --> step_5
104
+ step_5 --> exit
105
+ role_1 -. owns .-> step_1
106
+ role_2 -. owns .-> step_2
107
+ role_1 -. owns .-> step_2
108
+ role_1 -. owns .-> step_3
109
+ role_2 -. owns .-> step_3
110
+ role_1 -. owns .-> step_4
111
+ role_1 -. owns .-> step_5
112
+ ```
113
+ <!-- agent-diagram:end -->
114
+
85
115
  ## Exit
86
116
  Metrics normal + root cause documented = db incident resolved.
@@ -100,5 +100,39 @@ kubectl get constraint ${POLICY} -o jsonpath='{.status.byPod}'
100
100
  # metric: gatekeeper_violations_total{enforcement_action="deny"}
101
101
  ```
102
102
 
103
+ ## Agent Interaction Diagram
104
+
105
+ <!-- agent-diagram:start -->
106
+ ```mermaid
107
+ flowchart TD
108
+ start(["Start /policy-onboard"])
109
+ role_1["devops-engineer"]
110
+ role_2["developer"]
111
+ role_3["team-lead"]
112
+ step_1["1. Design Policy"]
113
+ step_2["2. Unit Test"]
114
+ step_3["3. Dryrun in Staging"]
115
+ step_4["4. Fix Existing Violations"]
116
+ step_5["5. Switch to Enforce"]
117
+ step_6["6. Monitor Policy Health"]
118
+ exit(["Policy tested + existing violations resolved + enforce mode active + monito..."])
119
+ start --> step_1
120
+ step_1 --> step_2
121
+ step_2 --> step_3
122
+ step_3 --> step_4
123
+ step_4 --> step_5
124
+ step_5 --> step_6
125
+ step_6 --> exit
126
+ role_1 -. owns .-> step_1
127
+ role_1 -. owns .-> step_2
128
+ role_1 -. owns .-> step_3
129
+ role_2 -. owns .-> step_4
130
+ role_1 -. owns .-> step_4
131
+ role_1 -. owns .-> step_5
132
+ role_3 -. owns .-> step_5
133
+ role_1 -. owns .-> step_6
134
+ ```
135
+ <!-- agent-diagram:end -->
136
+
103
137
  ## Exit
104
138
  Policy tested + existing violations resolved + enforce mode active + monitoring in place = policy onboarded.
@@ -151,5 +151,38 @@ echo "IaC: $(cat iac-scan.sarif | jq '.runs[0].results | length') findi
151
151
  echo "SBOM: attached to registry"
152
152
  ```
153
153
 
154
+ ## Agent Interaction Diagram
155
+
156
+ <!-- agent-diagram:start -->
157
+ ```mermaid
158
+ flowchart TD
159
+ start(["Start /security-scan-pipeline"])
160
+ role_1["devops-engineer"]
161
+ step_1["1. Secrets Scan"]
162
+ step_2["2. SAST (Static Analysis)"]
163
+ step_3["3. Dependency CVE Scan"]
164
+ step_4["4. Container Image Scan"]
165
+ step_5["5. IaC Security Scan"]
166
+ step_6["6. Generate SBOM"]
167
+ step_7["7. Collate Report"]
168
+ exit(["Zero unresolved Critical/High + SBOM attached + scan report filed = securit..."])
169
+ start --> step_1
170
+ step_1 --> step_2
171
+ step_2 --> step_3
172
+ step_3 --> step_4
173
+ step_4 --> step_5
174
+ step_5 --> step_6
175
+ step_6 --> step_7
176
+ step_7 --> exit
177
+ role_1 -. owns .-> step_1
178
+ role_1 -. owns .-> step_2
179
+ role_1 -. owns .-> step_3
180
+ role_1 -. owns .-> step_4
181
+ role_1 -. owns .-> step_5
182
+ role_1 -. owns .-> step_6
183
+ role_1 -. owns .-> step_7
184
+ ```
185
+ <!-- agent-diagram:end -->
186
+
154
187
  ## Exit
155
188
  Zero unresolved Critical/High + SBOM attached + scan report filed = security scan complete.
@@ -92,5 +92,36 @@ aws dynamodb delete-item \
92
92
  ### 6. Document — `@devops-engineer`
93
93
  - Record in decommission log: environment, date, approver, reason, data disposition
94
94
 
95
+ ## Agent Interaction Diagram
96
+
97
+ <!-- agent-diagram:start -->
98
+ ```mermaid
99
+ flowchart TD
100
+ start(["Start /destroy-environment"])
101
+ role_1["devops-engineer"]
102
+ role_2["team-lead"]
103
+ step_1["1. Confirm Scope"]
104
+ step_2["2. Approval"]
105
+ step_3["3. Pre-Destroy Backup"]
106
+ step_4["4. Ordered Teardown"]
107
+ step_5["5. Verify & Cleanup"]
108
+ step_6["6. Document"]
109
+ exit(["Terraform state empty + cloud console clean + documentation filed = environ..."])
110
+ start --> step_1
111
+ step_1 --> step_2
112
+ step_2 --> step_3
113
+ step_3 --> step_4
114
+ step_4 --> step_5
115
+ step_5 --> step_6
116
+ step_6 --> exit
117
+ role_1 -. owns .-> step_1
118
+ role_2 -. owns .-> step_2
119
+ role_1 -. owns .-> step_3
120
+ role_1 -. owns .-> step_4
121
+ role_1 -. owns .-> step_5
122
+ role_1 -. owns .-> step_6
123
+ ```
124
+ <!-- agent-diagram:end -->
125
+
95
126
  ## Exit
96
127
  Terraform state empty + cloud console clean + documentation filed = environment destroyed.
@@ -62,5 +62,34 @@ terraform apply remediation.plan
62
62
  ### 5. Report — `@devops-engineer`
63
63
  - Update `drift-log.md` with date, resources affected, classification, action taken
64
64
 
65
+ ## Agent Interaction Diagram
66
+
67
+ <!-- agent-diagram:start -->
68
+ ```mermaid
69
+ flowchart TD
70
+ start(["Start /drift-remediation"])
71
+ role_1["devops-engineer"]
72
+ role_2["team-lead"]
73
+ step_1["1. Detect Drift"]
74
+ step_2["2. Classify Findings"]
75
+ step_3["3. Remediate (if REMEDIATE class)"]
76
+ step_4["4. Investigate (if INVESTIGATE class)"]
77
+ step_5["5. Report"]
78
+ exit(["All drift classified + REMEDIATE resolved + INVESTIGATE escalated = drift c..."])
79
+ start --> step_1
80
+ step_1 --> step_2
81
+ step_2 --> step_3
82
+ step_3 --> step_4
83
+ step_4 --> step_5
84
+ step_5 --> exit
85
+ role_1 -. owns .-> step_1
86
+ role_1 -. owns .-> step_2
87
+ role_2 -. owns .-> step_2
88
+ role_1 -. owns .-> step_3
89
+ role_1 -. owns .-> step_4
90
+ role_1 -. owns .-> step_5
91
+ ```
92
+ <!-- agent-diagram:end -->
93
+
65
94
  ## Exit
66
95
  All drift classified + REMEDIATE resolved + INVESTIGATE escalated = drift cycle complete.
@@ -97,5 +97,37 @@ git push origin modules/<module-name>/v1.0.0
97
97
  # Reference in other modules: ?ref=v1.0.0 (never ?ref=main)
98
98
  ```
99
99
 
100
+ ## Agent Interaction Diagram
101
+
102
+ <!-- agent-diagram:start -->
103
+ ```mermaid
104
+ flowchart TD
105
+ start(["Start /module-development"])
106
+ role_1["devops-engineer"]
107
+ role_2["team-lead"]
108
+ step_1["1. Design Interface"]
109
+ step_2["2. Implement Module"]
110
+ step_3["3. Write Examples"]
111
+ step_4["4. Test"]
112
+ step_5["5. Code Review"]
113
+ step_6["6. Release"]
114
+ exit(["Module published + examples tested + documentation complete = module released."])
115
+ start --> step_1
116
+ step_1 --> step_2
117
+ step_2 --> step_3
118
+ step_3 --> step_4
119
+ step_4 --> step_5
120
+ step_5 --> step_6
121
+ step_6 --> exit
122
+ role_1 -. owns .-> step_1
123
+ role_2 -. owns .-> step_1
124
+ role_1 -. owns .-> step_2
125
+ role_1 -. owns .-> step_3
126
+ role_1 -. owns .-> step_4
127
+ role_2 -. owns .-> step_5
128
+ role_1 -. owns .-> step_6
129
+ ```
130
+ <!-- agent-diagram:end -->
131
+
100
132
  ## Exit
101
133
  Module published + examples tested + documentation complete = module released.
@@ -92,5 +92,34 @@ quality-gates:
92
92
  - Write `provision_report.md`: environment, resources created, cost estimate, next steps
93
93
  - **Done when:** report committed; outputs stored
94
94
 
95
+ ## Agent Interaction Diagram
96
+
97
+ <!-- agent-diagram:start -->
98
+ ```mermaid
99
+ flowchart TD
100
+ start(["Start /provision-environment"])
101
+ role_1["devops-engineer"]
102
+ role_2["team-lead"]
103
+ step_1["1. Plan & Review"]
104
+ step_2["2. Apply Infrastructure"]
105
+ step_3["3. Configure Nodes (Ansible)"]
106
+ step_4["4. Smoke Tests"]
107
+ step_5["5. Document & Store Outputs"]
108
+ exit(["Terraform apply clean + Ansible 0 failures + smoke tests pass = environment..."])
109
+ start --> step_1
110
+ step_1 --> step_2
111
+ step_2 --> step_3
112
+ step_3 --> step_4
113
+ step_4 --> step_5
114
+ step_5 --> exit
115
+ role_1 -. owns .-> step_1
116
+ role_2 -. owns .-> step_1
117
+ role_1 -. owns .-> step_2
118
+ role_1 -. owns .-> step_3
119
+ role_1 -. owns .-> step_4
120
+ role_1 -. owns .-> step_5
121
+ ```
122
+ <!-- agent-diagram:end -->
123
+
95
124
  ## Exit
96
125
  Terraform apply clean + Ansible 0 failures + smoke tests pass = environment provisioned.
@@ -190,5 +190,41 @@ quality-gates:
190
190
  - Set up `kube-prometheus-stack` for cluster monitoring
191
191
  - **Output:** `bootstrap_report.md` — cluster version, node IPs, installed components, kubeconfig location
192
192
 
193
+ ## Agent Interaction Diagram
194
+
195
+ <!-- agent-diagram:start -->
196
+ ```mermaid
197
+ flowchart TD
198
+ start(["Start /cluster-bootstrap"])
199
+ role_1["devops-engineer"]
200
+ step_1["1. Node Pre-Flight"]
201
+ step_2["2. Bootstrap First Control Plane"]
202
+ step_3["3. Install CNI (Cilium)"]
203
+ step_4["4. Join Remaining Control Plane Nodes"]
204
+ step_5["5. Join Worker Nodes"]
205
+ step_6["6. etcd Encryption at Rest"]
206
+ step_7["7. Core Platform Components"]
207
+ step_8["8. Apply Security Baselines"]
208
+ exit(["All nodes Ready + core components Running + etcd encrypted + monitoring liv..."])
209
+ start --> step_1
210
+ step_1 --> step_2
211
+ step_2 --> step_3
212
+ step_3 --> step_4
213
+ step_4 --> step_5
214
+ step_5 --> step_6
215
+ step_6 --> step_7
216
+ step_7 --> step_8
217
+ step_8 --> exit
218
+ role_1 -. owns .-> step_1
219
+ role_1 -. owns .-> step_2
220
+ role_1 -. owns .-> step_3
221
+ role_1 -. owns .-> step_4
222
+ role_1 -. owns .-> step_5
223
+ role_1 -. owns .-> step_6
224
+ role_1 -. owns .-> step_7
225
+ role_1 -. owns .-> step_8
226
+ ```
227
+ <!-- agent-diagram:end -->
228
+
193
229
  ## Exit
194
230
  All nodes Ready + core components Running + etcd encrypted + monitoring live = cluster bootstrapped.
@@ -104,5 +104,34 @@ quality-gates:
104
104
  - **Output:** `docs/incidents/<date>-<workload>-root-cause.md`
105
105
  - **Done when:** document committed; alert/runbook created if pattern is recurring
106
106
 
107
+ ## Agent Interaction Diagram
108
+
109
+ <!-- agent-diagram:start -->
110
+ ```mermaid
111
+ flowchart TD
112
+ start(["Start /debug-workload"])
113
+ role_1["devops-engineer"]
114
+ role_2["developer"]
115
+ step_1["1. Classify Symptom"]
116
+ step_2["2. Deep Diagnosis"]
117
+ step_3["3. Apply Fix"]
118
+ step_4["4. Verify & Monitor"]
119
+ step_5["5. Document"]
120
+ exit(["Pod Running + metrics stable + root cause documented = workload debug compl..."])
121
+ start --> step_1
122
+ step_1 --> step_2
123
+ step_2 --> step_3
124
+ step_3 --> step_4
125
+ step_4 --> step_5
126
+ step_5 --> exit
127
+ role_1 -. owns .-> step_1
128
+ role_1 -. owns .-> step_2
129
+ role_2 -. owns .-> step_3
130
+ role_1 -. owns .-> step_3
131
+ role_1 -. owns .-> step_4
132
+ role_1 -. owns .-> step_5
133
+ ```
134
+ <!-- agent-diagram:end -->
135
+
107
136
  ## Exit
108
137
  Pod Running + metrics stable + root cause documented = workload debug complete.
@@ -120,5 +120,40 @@ quality-gates:
120
120
  - **Output:** metrics visible in Grafana; alerts configured
121
121
  - **Done when:** Grafana dashboard shows service metrics
122
122
 
123
+ ## Agent Interaction Diagram
124
+
125
+ <!-- agent-diagram:start -->
126
+ ```mermaid
127
+ flowchart TD
128
+ start(["Start /onboard-service"])
129
+ role_1["devops-engineer"]
130
+ role_2["developer"]
131
+ step_1["1. Namespace Setup"]
132
+ step_2["2. RBAC Setup"]
133
+ step_3["3. Network Policies"]
134
+ step_4["4. Helm Chart"]
135
+ step_5["5. ArgoCD Application"]
136
+ step_6["6. Validate & Smoke Test"]
137
+ step_7["7. Monitoring"]
138
+ exit(["Pod Running + health check passing + ArgoCD Healthy + metrics visible = ser..."])
139
+ start --> step_1
140
+ step_1 --> step_2
141
+ step_2 --> step_3
142
+ step_3 --> step_4
143
+ step_4 --> step_5
144
+ step_5 --> step_6
145
+ step_6 --> step_7
146
+ step_7 --> exit
147
+ role_1 -. owns .-> step_1
148
+ role_1 -. owns .-> step_2
149
+ role_1 -. owns .-> step_3
150
+ role_2 -. owns .-> step_4
151
+ role_1 -. owns .-> step_4
152
+ role_1 -. owns .-> step_5
153
+ role_2 -. owns .-> step_6
154
+ role_1 -. owns .-> step_7
155
+ ```
156
+ <!-- agent-diagram:end -->
157
+
123
158
  ## Exit
124
159
  Pod Running + health check passing + ArgoCD Healthy + metrics visible = service onboarded.
@@ -146,6 +146,36 @@ kubectl get pdb -A
146
146
  - **Output:** `upgrade_report.md` — versions before/after, issues found, time taken
147
147
  - **Done when:** all Tier 1 services healthy; no unexpected pod restarts
148
148
 
149
+ ## Agent Interaction Diagram
150
+
151
+ <!-- agent-diagram:start -->
152
+ ```mermaid
153
+ flowchart TD
154
+ start(["Start /upgrade-cluster"])
155
+ role_1["devops-engineer"]
156
+ role_2["team-lead"]
157
+ step_1["1. etcd Backup"]
158
+ step_2["2. Upgrade Control Plane (kubeadm)"]
159
+ step_3["3. Validate Control Plane"]
160
+ step_4["4. Upgrade Worker Nodes (rolling)"]
161
+ step_5["5. Post-Upgrade Validation"]
162
+ exit(["All nodes on target version + Tier 1 services healthy + upgrade report comm..."])
163
+ start --> step_1
164
+ step_1 --> step_2
165
+ step_2 --> step_3
166
+ step_3 --> step_4
167
+ step_4 --> step_5
168
+ step_5 --> exit
169
+ role_1 -. owns .-> step_1
170
+ role_1 -. owns .-> step_2
171
+ role_1 -. owns .-> step_3
172
+ role_1 -. owns .-> step_4
173
+ role_1 -. owns .-> step_5
174
+ role_2 -. owns .-> step_5
175
+ step_5 -. iterate if blocked .-> step_1
176
+ ```
177
+ <!-- agent-diagram:end -->
178
+
149
179
  ## Rollback Plan
150
180
 
151
181
  ```bash
@@ -60,5 +60,32 @@ done | sort | uniq -c
60
60
  - Point hostname to MetalLB external IP: `kubectl get svc -n ingress-nginx`
61
61
  - Add A record in DNS provider or internal CoreDNS
62
62
 
63
+ ## Agent Interaction Diagram
64
+
65
+ <!-- agent-diagram:start -->
66
+ ```mermaid
67
+ flowchart TD
68
+ start(["Start /onboard-ingress"])
69
+ role_1["devops-engineer"]
70
+ step_1["1. Write Ingress Manifest"]
71
+ step_2["2. Apply & Wait for Certificate"]
72
+ step_3["3. Verify HTTPS"]
73
+ step_4["4. Verify Rate Limiting"]
74
+ step_5["5. DNS (if needed)"]
75
+ exit(["HTTPS accessible + cert issued + security headers present + rate limit veri..."])
76
+ start --> step_1
77
+ step_1 --> step_2
78
+ step_2 --> step_3
79
+ step_3 --> step_4
80
+ step_4 --> step_5
81
+ step_5 --> exit
82
+ role_1 -. owns .-> step_1
83
+ role_1 -. owns .-> step_2
84
+ role_1 -. owns .-> step_3
85
+ role_1 -. owns .-> step_4
86
+ role_1 -. owns .-> step_5
87
+ ```
88
+ <!-- agent-diagram:end -->
89
+
63
90
  ## Exit
64
91
  HTTPS accessible + cert issued + security headers present + rate limit verified = ingress onboarded.
@@ -118,5 +118,32 @@ kubectl -n istio-system port-forward svc/kiali 20001:20001 &
118
118
  ```
119
119
  - **Done when:** service visible in mesh dashboard; no unmeshed traffic warnings
120
120
 
121
+ ## Agent Interaction Diagram
122
+
123
+ <!-- agent-diagram:start -->
124
+ ```mermaid
125
+ flowchart TD
126
+ start(["Start /service-mesh-onboard"])
127
+ role_1["devops-engineer"]
128
+ step_1["1. Pre-Check Mesh Health"]
129
+ step_2["2. Enable Injection"]
130
+ step_3["3. Verify mTLS"]
131
+ step_4["4. Apply Traffic Policies"]
132
+ step_5["5. Validate in Mesh Dashboard"]
133
+ exit(["Sidecar injected + mTLS verified + policies applied + dashboard shows servi..."])
134
+ start --> step_1
135
+ step_1 --> step_2
136
+ step_2 --> step_3
137
+ step_3 --> step_4
138
+ step_4 --> step_5
139
+ step_5 --> exit
140
+ role_1 -. owns .-> step_1
141
+ role_1 -. owns .-> step_2
142
+ role_1 -. owns .-> step_3
143
+ role_1 -. owns .-> step_4
144
+ role_1 -. owns .-> step_5
145
+ ```
146
+ <!-- agent-diagram:end -->
147
+
121
148
  ## Exit
122
149
  Sidecar injected + mTLS verified + policies applied + dashboard shows service = onboarded.
@@ -94,5 +94,34 @@ Saturation?
94
94
  - Is the alert threshold correct? (too sensitive = toil; too loose = misses real issues)
95
95
  - Create ticket if: runbook needs update, threshold needs tuning, or root cause needs a code fix
96
96
 
97
+ ## Agent Interaction Diagram
98
+
99
+ <!-- agent-diagram:start -->
100
+ ```mermaid
101
+ flowchart TD
102
+ start(["Start /alert-investigation"])
103
+ role_1["devops-engineer"]
104
+ role_2["developer"]
105
+ step_1["1. Acknowledge & Classify"]
106
+ step_2["2. Correlate Signals"]
107
+ step_3["3. Identify Root Cause"]
108
+ step_4["4. Mitigate"]
109
+ step_5["5. Post-Investigation Notes"]
110
+ exit(["Alert resolved or escalated + root cause noted + runbook quality assessed =..."])
111
+ start --> step_1
112
+ step_1 --> step_2
113
+ step_2 --> step_3
114
+ step_3 --> step_4
115
+ step_4 --> step_5
116
+ step_5 --> exit
117
+ role_1 -. owns .-> step_1
118
+ role_1 -. owns .-> step_2
119
+ role_1 -. owns .-> step_3
120
+ role_2 -. owns .-> step_3
121
+ role_1 -. owns .-> step_4
122
+ role_1 -. owns .-> step_5
123
+ ```
124
+ <!-- agent-diagram:end -->
125
+
97
126
  ## Exit
98
127
  Alert resolved or escalated + root cause noted + runbook quality assessed = investigation complete.