omgkit 2.13.0 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/README.md +93 -10
  2. package/package.json +2 -2
  3. package/plugin/agents/api-designer.md +5 -0
  4. package/plugin/agents/architect.md +8 -0
  5. package/plugin/agents/brainstormer.md +4 -0
  6. package/plugin/agents/cicd-manager.md +6 -0
  7. package/plugin/agents/code-reviewer.md +6 -0
  8. package/plugin/agents/copywriter.md +2 -0
  9. package/plugin/agents/data-engineer.md +255 -0
  10. package/plugin/agents/database-admin.md +10 -0
  11. package/plugin/agents/debugger.md +10 -0
  12. package/plugin/agents/devsecops.md +314 -0
  13. package/plugin/agents/docs-manager.md +4 -0
  14. package/plugin/agents/domain-decomposer.md +181 -0
  15. package/plugin/agents/embedded-systems.md +397 -0
  16. package/plugin/agents/fullstack-developer.md +12 -0
  17. package/plugin/agents/game-systems-designer.md +375 -0
  18. package/plugin/agents/git-manager.md +10 -0
  19. package/plugin/agents/journal-writer.md +2 -0
  20. package/plugin/agents/ml-engineer.md +284 -0
  21. package/plugin/agents/observability-engineer.md +353 -0
  22. package/plugin/agents/oracle.md +9 -0
  23. package/plugin/agents/performance-engineer.md +290 -0
  24. package/plugin/agents/pipeline-architect.md +6 -0
  25. package/plugin/agents/planner.md +12 -0
  26. package/plugin/agents/platform-engineer.md +325 -0
  27. package/plugin/agents/project-manager.md +3 -0
  28. package/plugin/agents/researcher.md +5 -0
  29. package/plugin/agents/scientific-computing.md +426 -0
  30. package/plugin/agents/scout.md +3 -0
  31. package/plugin/agents/security-auditor.md +7 -0
  32. package/plugin/agents/sprint-master.md +17 -0
  33. package/plugin/agents/tester.md +10 -0
  34. package/plugin/agents/ui-ux-designer.md +12 -0
  35. package/plugin/agents/vulnerability-scanner.md +6 -0
  36. package/plugin/commands/data/pipeline.md +47 -0
  37. package/plugin/commands/data/quality.md +49 -0
  38. package/plugin/commands/domain/analyze.md +34 -0
  39. package/plugin/commands/domain/map.md +41 -0
  40. package/plugin/commands/game/balance.md +56 -0
  41. package/plugin/commands/game/optimize.md +62 -0
  42. package/plugin/commands/iot/provision.md +58 -0
  43. package/plugin/commands/ml/evaluate.md +47 -0
  44. package/plugin/commands/ml/train.md +48 -0
  45. package/plugin/commands/perf/benchmark.md +54 -0
  46. package/plugin/commands/perf/profile.md +49 -0
  47. package/plugin/commands/platform/blueprint.md +56 -0
  48. package/plugin/commands/security/audit.md +54 -0
  49. package/plugin/commands/security/scan.md +55 -0
  50. package/plugin/commands/sre/dashboard.md +53 -0
  51. package/plugin/registry.yaml +711 -0
  52. package/plugin/skills/ai-ml/experiment-tracking/SKILL.md +338 -0
  53. package/plugin/skills/ai-ml/feature-stores/SKILL.md +340 -0
  54. package/plugin/skills/ai-ml/llm-ops/SKILL.md +454 -0
  55. package/plugin/skills/ai-ml/ml-pipelines/SKILL.md +390 -0
  56. package/plugin/skills/ai-ml/model-monitoring/SKILL.md +398 -0
  57. package/plugin/skills/ai-ml/model-serving/SKILL.md +386 -0
  58. package/plugin/skills/event-driven/cqrs-patterns/SKILL.md +348 -0
  59. package/plugin/skills/event-driven/event-sourcing/SKILL.md +334 -0
  60. package/plugin/skills/event-driven/kafka-deep/SKILL.md +252 -0
  61. package/plugin/skills/event-driven/saga-orchestration/SKILL.md +335 -0
  62. package/plugin/skills/event-driven/schema-registry/SKILL.md +328 -0
  63. package/plugin/skills/event-driven/stream-processing/SKILL.md +313 -0
  64. package/plugin/skills/game/game-audio/SKILL.md +446 -0
  65. package/plugin/skills/game/game-networking/SKILL.md +490 -0
  66. package/plugin/skills/game/godot-patterns/SKILL.md +413 -0
  67. package/plugin/skills/game/shader-programming/SKILL.md +492 -0
  68. package/plugin/skills/game/unity-patterns/SKILL.md +488 -0
  69. package/plugin/skills/iot/device-provisioning/SKILL.md +405 -0
  70. package/plugin/skills/iot/edge-computing/SKILL.md +369 -0
  71. package/plugin/skills/iot/industrial-protocols/SKILL.md +438 -0
  72. package/plugin/skills/iot/mqtt-deep/SKILL.md +418 -0
  73. package/plugin/skills/iot/ota-updates/SKILL.md +426 -0
  74. package/plugin/skills/microservices/api-gateway-patterns/SKILL.md +201 -0
  75. package/plugin/skills/microservices/circuit-breaker-patterns/SKILL.md +246 -0
  76. package/plugin/skills/microservices/contract-testing/SKILL.md +284 -0
  77. package/plugin/skills/microservices/distributed-tracing/SKILL.md +246 -0
  78. package/plugin/skills/microservices/service-discovery/SKILL.md +304 -0
  79. package/plugin/skills/microservices/service-mesh/SKILL.md +181 -0
  80. package/plugin/skills/mobile-advanced/mobile-ci-cd/SKILL.md +407 -0
  81. package/plugin/skills/mobile-advanced/mobile-security/SKILL.md +403 -0
  82. package/plugin/skills/mobile-advanced/offline-first/SKILL.md +473 -0
  83. package/plugin/skills/mobile-advanced/push-notifications/SKILL.md +494 -0
  84. package/plugin/skills/mobile-advanced/react-native-deep/SKILL.md +374 -0
  85. package/plugin/skills/simulation/numerical-methods/SKILL.md +434 -0
  86. package/plugin/skills/simulation/parallel-computing/SKILL.md +382 -0
  87. package/plugin/skills/simulation/physics-engines/SKILL.md +377 -0
  88. package/plugin/skills/simulation/validation-verification/SKILL.md +479 -0
  89. package/plugin/skills/simulation/visualization-scientific/SKILL.md +365 -0
  90. package/plugin/workflows/ai-engineering/agent-development.md +3 -3
  91. package/plugin/workflows/ai-engineering/fine-tuning.md +3 -3
  92. package/plugin/workflows/ai-engineering/model-evaluation.md +3 -3
  93. package/plugin/workflows/ai-engineering/prompt-engineering.md +2 -2
  94. package/plugin/workflows/ai-engineering/rag-development.md +4 -4
  95. package/plugin/workflows/ai-ml/data-pipeline.md +188 -0
  96. package/plugin/workflows/ai-ml/experiment-cycle.md +203 -0
  97. package/plugin/workflows/ai-ml/feature-engineering.md +208 -0
  98. package/plugin/workflows/ai-ml/model-deployment.md +199 -0
  99. package/plugin/workflows/ai-ml/monitoring-setup.md +227 -0
  100. package/plugin/workflows/api/api-design.md +1 -1
  101. package/plugin/workflows/api/api-testing.md +2 -2
  102. package/plugin/workflows/content/technical-docs.md +1 -1
  103. package/plugin/workflows/database/migration.md +1 -1
  104. package/plugin/workflows/database/optimization.md +1 -1
  105. package/plugin/workflows/database/schema-design.md +3 -3
  106. package/plugin/workflows/development/bug-fix.md +3 -3
  107. package/plugin/workflows/development/code-review.md +2 -1
  108. package/plugin/workflows/development/feature.md +3 -3
  109. package/plugin/workflows/development/refactor.md +2 -2
  110. package/plugin/workflows/event-driven/consumer-groups.md +190 -0
  111. package/plugin/workflows/event-driven/event-storming.md +172 -0
  112. package/plugin/workflows/event-driven/replay-testing.md +186 -0
  113. package/plugin/workflows/event-driven/saga-implementation.md +206 -0
  114. package/plugin/workflows/event-driven/schema-evolution.md +173 -0
  115. package/plugin/workflows/fullstack/authentication.md +4 -4
  116. package/plugin/workflows/fullstack/full-feature.md +4 -4
  117. package/plugin/workflows/game-dev/content-pipeline.md +218 -0
  118. package/plugin/workflows/game-dev/platform-submission.md +263 -0
  119. package/plugin/workflows/game-dev/playtesting.md +237 -0
  120. package/plugin/workflows/game-dev/prototype-to-production.md +205 -0
  121. package/plugin/workflows/microservices/contract-first.md +151 -0
  122. package/plugin/workflows/microservices/distributed-tracing.md +166 -0
  123. package/plugin/workflows/microservices/domain-decomposition.md +123 -0
  124. package/plugin/workflows/microservices/integration-testing.md +149 -0
  125. package/plugin/workflows/microservices/service-mesh-setup.md +153 -0
  126. package/plugin/workflows/microservices/service-scaffolding.md +151 -0
  127. package/plugin/workflows/omega/1000x-innovation.md +2 -2
  128. package/plugin/workflows/omega/100x-architecture.md +2 -2
  129. package/plugin/workflows/omega/10x-improvement.md +2 -2
  130. package/plugin/workflows/quality/performance-optimization.md +2 -2
  131. package/plugin/workflows/research/best-practices.md +1 -1
  132. package/plugin/workflows/research/technology-research.md +1 -1
  133. package/plugin/workflows/security/penetration-testing.md +3 -3
  134. package/plugin/workflows/security/security-audit.md +3 -3
  135. package/plugin/workflows/sprint/sprint-execution.md +2 -2
  136. package/plugin/workflows/sprint/sprint-retrospective.md +1 -1
  137. package/plugin/workflows/sprint/sprint-setup.md +1 -1
@@ -0,0 +1,227 @@
1
+ ---
2
+ description: Set up comprehensive ML model monitoring for drift and performance
3
+ triggers:
4
+ - manual
5
+ - ml:monitoring
6
+ agents:
7
+ - ml-engineer
8
+ - site-reliability-engineer
9
+ ---
10
+
11
+ # ML Monitoring Setup Workflow
12
+
13
+ Implement production monitoring for ML models.
14
+
15
+ ## Prerequisites
16
+ - [ ] Model deployed to production
17
+ - [ ] Baseline metrics established
18
+ - [ ] Logging infrastructure available
19
+
20
+ ## Phase 1: Monitoring Strategy
21
+
22
+ ### Step 1.1: Define Monitoring Scope
23
+ ```yaml
24
+ agent: ml-engineer
25
+ action: define
26
+ monitoring_types:
27
+ - Data monitoring: Input feature health
28
+ - Model monitoring: Prediction quality
29
+ - Operational monitoring: System health
30
+ - Business monitoring: Impact metrics
31
+ ```
32
+
33
+ ### Step 1.2: Establish Baselines
34
+ ```yaml
35
+ agent: ml-engineer
36
+ action: baseline
37
+ metrics:
38
+ - Feature distributions
39
+ - Prediction distributions
40
+ - Model performance (with labels)
41
+ - Latency percentiles
42
+ window: Training data period
43
+ ```
44
+
45
+ ## Phase 2: Data Monitoring
46
+
47
+ ### Step 2.1: Feature Monitoring
48
+ ```yaml
49
+ agent: ml-engineer
50
+ action: configure
51
+ checks:
52
+ - Missing values: Count nulls per feature
53
+ - Type validation: Expected data types
54
+ - Range validation: Min/max bounds
55
+ - Distribution shift: Statistical tests
56
+ methods:
57
+ - KS test (continuous)
58
+ - Chi-square (categorical)
59
+ - PSI (Population Stability Index)
60
+ ```
61
+
62
+ ### Step 2.2: Data Quality Alerts
63
+ ```yaml
64
+ agent: ml-engineer
65
+ action: configure
66
+ alerts:
67
+ - schema_violation:
68
+ condition: Unexpected schema change
69
+ severity: critical
70
+ - missing_rate_high:
71
+ threshold: > 5% missing
72
+ severity: warning
73
+ - distribution_drift:
74
+ threshold: PSI > 0.2
75
+ severity: warning
76
+ ```
77
+
78
+ ## Phase 3: Model Monitoring
79
+
80
+ ### Step 3.1: Prediction Monitoring
81
+ ```yaml
82
+ agent: ml-engineer
83
+ action: configure
84
+ metrics:
85
+ - Prediction distribution
86
+ - Confidence scores
87
+ - Prediction counts by class
88
+ - Output drift detection
89
+ tools:
90
+ - Evidently
91
+ - Whylogs
92
+ - Arize
93
+ - Custom monitoring
94
+ ```
95
+
96
+ ### Step 3.2: Performance Monitoring
97
+ ```yaml
98
+ agent: ml-engineer
99
+ action: configure
100
+ with_ground_truth:
101
+ - Accuracy over time
102
+ - Precision/Recall trends
103
+ - AUC trends
104
+ - Error rate by segment
105
+ delayed_labels:
106
+ - Label collection pipeline
107
+ - Performance calculation job
108
+ - Alerting on degradation
109
+ ```
110
+
111
+ ### Step 3.3: Model Drift Detection
112
+ ```yaml
113
+ agent: ml-engineer
114
+ action: configure
115
+ drift_types:
116
+ - Concept drift: Relationship change
117
+ - Data drift: Feature distribution change
118
+ - Prediction drift: Output change
119
+ detection:
120
+ - Statistical tests
121
+ - Page-Hinkley test
122
+ - ADWIN algorithm
123
+ - Custom detectors
124
+ ```
125
+
126
+ ## Phase 4: Operational Monitoring
127
+
128
+ ### Step 4.1: System Metrics
129
+ ```yaml
130
+ agent: site-reliability-engineer
131
+ action: configure
132
+ metrics:
133
+ - Request latency (p50, p95, p99)
134
+ - Throughput (requests/second)
135
+ - Error rate
136
+ - GPU/CPU utilization
137
+ - Memory usage
138
+ - Queue depth
139
+ ```
140
+
141
+ ### Step 4.2: SLI/SLO Definition
142
+ ```yaml
143
+ agent: site-reliability-engineer
144
+ action: define
145
+ slis:
146
+ - Availability: Successful requests / Total requests
147
+ - Latency: % requests < threshold
148
+ - Freshness: Time since last successful prediction
149
+ slos:
150
+ - availability: 99.9%
151
+ - latency_p99: < 100ms
152
+ - freshness: < 1 minute
153
+ ```
154
+
155
+ ## Phase 5: Alerting and Response
156
+
157
+ ### Step 5.1: Alert Configuration
158
+ ```yaml
159
+ agent: site-reliability-engineer
160
+ action: configure
161
+ alert_rules:
162
+ - model_drift_detected:
163
+ severity: warning
164
+ action: notify_ml_team
165
+ - performance_degradation:
166
+ severity: critical
167
+ action: page_on_call
168
+ - latency_spike:
169
+ severity: warning
170
+ action: auto_scale
171
+ - error_rate_high:
172
+ severity: critical
173
+ action: rollback_ready
174
+ ```
175
+
176
+ ### Step 5.2: Response Playbooks
177
+ ```yaml
178
+ agent: site-reliability-engineer
179
+ action: document
180
+ playbooks:
181
+ - drift_detected:
182
+ - Investigate data changes
183
+ - Check upstream systems
184
+ - Consider retraining
185
+ - performance_degraded:
186
+ - Check recent deployments
187
+ - Analyze error patterns
188
+ - Rollback if needed
189
+ - latency_high:
190
+ - Check resource usage
191
+ - Scale horizontally
192
+ - Optimize batch sizes
193
+ ```
194
+
195
+ ## Phase 6: Dashboards
196
+
197
+ ### Step 6.1: Create Dashboards
198
+ ```yaml
199
+ agent: site-reliability-engineer
200
+ action: create
201
+ dashboards:
202
+ - model_health:
203
+ - Prediction volume
204
+ - Drift metrics
205
+ - Performance trends
206
+ - operational:
207
+ - Latency distribution
208
+ - Error rates
209
+ - Resource usage
210
+ - business:
211
+ - Model impact metrics
212
+ - A/B test results
213
+ ```
214
+
215
+ ## Outputs
216
+ - [ ] Monitoring configuration
217
+ - [ ] Alert rules
218
+ - [ ] Dashboards
219
+ - [ ] SLI/SLO definitions
220
+ - [ ] Response playbooks
221
+
222
+ ## Quality Gates
223
+ - All critical metrics monitored
224
+ - Alerts tested and working
225
+ - Dashboards accurate
226
+ - Playbooks documented
227
+ - Team trained on response
@@ -10,7 +10,7 @@ agents:
10
10
  - docs-manager
11
11
  - code-reviewer
12
12
  skills:
13
- - api-architecture
13
+ - backend/api-architecture
14
14
  commands:
15
15
  - /planning:plan
16
16
  - /quality:api-gen
@@ -8,8 +8,8 @@ agents:
8
8
  - tester
9
9
  - security-auditor
10
10
  skills:
11
- - api-architecture
12
- - owasp
11
+ - backend/api-architecture
12
+ - security/owasp
13
13
  commands:
14
14
  - /dev:test
15
15
  - /quality:security-scan
@@ -10,7 +10,7 @@ agents:
10
10
  - copywriter
11
11
  - code-reviewer
12
12
  skills:
13
- - writing-plans
13
+ - methodology/writing-plans
14
14
  commands:
15
15
  - /planning:doc
16
16
  - /dev:review
@@ -9,7 +9,7 @@ agents:
9
9
  - planner
10
10
  - tester
11
11
  skills:
12
- - database-migration
12
+ - databases/database-migration
13
13
  commands:
14
14
  - /planning:plan
15
15
  - /dev:test
@@ -8,7 +8,7 @@ agents:
8
8
  - database-admin
9
9
  - tester
10
10
  skills:
11
- - database-optimization
11
+ - databases/database-optimization
12
12
  commands:
13
13
  - /quality:optimize
14
14
  - /dev:test
@@ -9,9 +9,9 @@ agents:
9
9
  - planner
10
10
  - tester
11
11
  skills:
12
- - database-schema-design
13
- - postgresql
14
- - mongodb
12
+ - databases/database-schema-design
13
+ - databases/postgresql
14
+ - databases/mongodb
15
15
  commands:
16
16
  - /planning:plan
17
17
  - /dev:feature
@@ -10,9 +10,9 @@ agents:
10
10
  - tester
11
11
  - git-manager
12
12
  skills:
13
- - systematic-debugging
14
- - root-cause-tracing
15
- - test-driven-development
13
+ - methodology/systematic-debugging
14
+ - methodology/root-cause-tracing
15
+ - methodology/test-driven-development
16
16
  commands:
17
17
  - /dev:fix
18
18
  - /dev:fix-hard
@@ -9,7 +9,8 @@ agents:
9
9
  - fullstack-developer
10
10
  - git-manager
11
11
  skills:
12
- - verification-before-completion
12
+ - methodology/verification-before-completion
13
+ - security/owasp
13
14
  commands:
14
15
  - /dev:review
15
16
  - /dev:fix
@@ -11,9 +11,9 @@ agents:
11
11
  - code-reviewer
12
12
  - git-manager
13
13
  skills:
14
- - writing-plans
15
- - executing-plans
16
- - test-driven-development
14
+ - methodology/writing-plans
15
+ - methodology/executing-plans
16
+ - methodology/test-driven-development
17
17
  commands:
18
18
  - /planning:plan
19
19
  - /dev:feature
@@ -10,8 +10,8 @@ agents:
10
10
  - tester
11
11
  - git-manager
12
12
  skills:
13
- - verification-before-completion
14
- - test-driven-development
13
+ - methodology/verification-before-completion
14
+ - methodology/test-driven-development
15
15
  commands:
16
16
  - /quality:refactor
17
17
  - /dev:test
@@ -0,0 +1,190 @@
1
+ ---
2
+ description: Manage Kafka consumer groups for scaling and reliability
3
+ triggers:
4
+ - manual
5
+ - kafka:consumers
6
+ agents:
7
+ - data-engineer
8
+ - site-reliability-engineer
9
+ ---
10
+
11
+ # Consumer Group Management Workflow
12
+
13
+ Configure and manage Kafka consumer groups effectively.
14
+
15
+ ## Prerequisites
16
+ - [ ] Kafka cluster available
17
+ - [ ] Topics created
18
+ - [ ] Consumer application ready
19
+
20
+ ## Phase 1: Consumer Group Design
21
+
22
+ ### Step 1.1: Define Consumer Groups
23
+ ```yaml
24
+ agent: data-engineer
25
+ action: design
26
+ considerations:
27
+ - One group per logical consumer
28
+ - Group ID naming convention
29
+ - Isolation between environments
30
+ naming: "{app}-{env}-{purpose}"
31
+ examples:
32
+ - order-service-prod-orders
33
+ - analytics-prod-clickstream
34
+ ```
35
+
36
+ ### Step 1.2: Partition Assignment
37
+ ```yaml
38
+ agent: data-engineer
39
+ action: configure
40
+ strategies:
41
+ - RangeAssignor: Contiguous partitions per consumer
42
+ - RoundRobinAssignor: Even distribution
43
+ - StickyAssignor: Minimize rebalances
44
+ - CooperativeStickyAssignor: Incremental rebalances
45
+ recommendation: CooperativeStickyAssignor
46
+ ```
47
+
48
+ ## Phase 2: Consumer Configuration
49
+
50
+ ### Step 2.1: Performance Settings
51
+ ```yaml
52
+ agent: data-engineer
53
+ action: configure
54
+ settings:
55
+ fetch.min.bytes: 1
56
+ fetch.max.wait.ms: 500
57
+ max.poll.records: 500
58
+ max.poll.interval.ms: 300000
59
+ session.timeout.ms: 45000
60
+ heartbeat.interval.ms: 15000
61
+ ```
62
+
63
+ ### Step 2.2: Offset Management
64
+ ```yaml
65
+ agent: data-engineer
66
+ action: configure
67
+ offset_settings:
68
+ enable.auto.commit: false # Manual preferred
69
+ auto.offset.reset: earliest # or latest
70
+ isolation.level: read_committed # For transactions
71
+ commit_strategy:
72
+ - After processing batch
73
+ - With idempotency
74
+ - Track last committed
75
+ ```
76
+
77
+ ## Phase 3: Scaling Configuration
78
+
79
+ ### Step 3.1: Consumer Scaling
80
+ ```yaml
81
+ agent: site-reliability-engineer
82
+ action: configure
83
+ scaling:
84
+ min_consumers: 1
85
+ max_consumers: partition_count
86
+ scaling_metric: consumer_lag
87
+ scale_up_threshold: 10000
88
+ scale_down_threshold: 100
89
+ ```
90
+
91
+ ### Step 3.2: Rebalance Optimization
92
+ ```yaml
93
+ agent: data-engineer
94
+ action: configure
95
+ rebalance_settings:
96
+ partition.assignment.strategy: CooperativeStickyAssignor
97
+ max.poll.interval.ms: 300000 # Allow slow processing
98
+ session.timeout.ms: 45000
99
+ heartbeat.interval.ms: 15000
100
+ static_membership:
101
+ group.instance.id: unique_per_consumer
102
+ session.timeout.ms: 300000 # Longer for static
103
+ ```
104
+
105
+ ## Phase 4: Monitoring Setup
106
+
107
+ ### Step 4.1: Lag Monitoring
108
+ ```yaml
109
+ agent: site-reliability-engineer
110
+ action: configure
111
+ metrics:
112
+ - consumer_lag_per_partition
113
+ - consumer_lag_per_group
114
+ - records_consumed_rate
115
+ - rebalances_per_hour
116
+ tools:
117
+ - Burrow
118
+ - Kafka Exporter
119
+ - Built-in JMX metrics
120
+ ```
121
+
122
+ ### Step 4.2: Alerting
123
+ ```yaml
124
+ agent: site-reliability-engineer
125
+ action: configure
126
+ alerts:
127
+ - consumer_lag_critical:
128
+ threshold: 100000
129
+ severity: critical
130
+ - consumer_lag_warning:
131
+ threshold: 10000
132
+ severity: warning
133
+ - rebalance_storm:
134
+ threshold: 5_per_minute
135
+ severity: warning
136
+ - consumer_group_empty:
137
+ condition: active_members == 0
138
+ severity: critical
139
+ ```
140
+
141
+ ## Phase 5: Operations
142
+
143
+ ### Step 5.1: Offset Management Operations
144
+ ```yaml
145
+ agent: site-reliability-engineer
146
+ action: document
147
+ operations:
148
+ reset_to_earliest:
149
+ command: kafka-consumer-groups --reset-offsets --to-earliest
150
+ use_when: Reprocess all events
151
+ reset_to_timestamp:
152
+ command: kafka-consumer-groups --reset-offsets --to-datetime
153
+ use_when: Reprocess from specific time
154
+ skip_messages:
155
+ command: kafka-consumer-groups --reset-offsets --shift-by N
156
+ use_when: Skip poison messages
157
+ ```
158
+
159
+ ### Step 5.2: Troubleshooting Runbook
160
+ ```yaml
161
+ agent: site-reliability-engineer
162
+ action: document
163
+ scenarios:
164
+ - lag_increasing:
165
+ check: Consumer processing speed
166
+ check: Partition count vs consumers
167
+ action: Scale consumers or optimize
168
+ - rebalance_storm:
169
+ check: Session timeout vs processing time
170
+ check: Network issues
171
+ action: Adjust timeouts or use static membership
172
+ - stuck_consumer:
173
+ check: max.poll.interval.ms
174
+ check: Processing exceptions
175
+ action: Restart or fix code
176
+ ```
177
+
178
+ ## Outputs
179
+ - [ ] Consumer group configuration
180
+ - [ ] Scaling configuration
181
+ - [ ] Monitoring dashboards
182
+ - [ ] Alert rules
183
+ - [ ] Operations runbook
184
+
185
+ ## Quality Gates
186
+ - Consumer lag within SLA
187
+ - No rebalance storms
188
+ - Monitoring in place
189
+ - Alerts configured
190
+ - Runbook documented
@@ -0,0 +1,172 @@
1
+ ---
2
+ description: Facilitate event storming sessions to discover domain events and workflows
3
+ triggers:
4
+ - manual
5
+ - discovery:events
6
+ agents:
7
+ - domain-decomposer
8
+ - architect
9
+ ---
10
+
11
+ # Event Storming Workflow
12
+
13
+ Discover domain events and model business processes.
14
+
15
+ ## Prerequisites
16
+ - [ ] Domain experts available
17
+ - [ ] Whiteboard/digital collaboration tool
18
+ - [ ] Business process documentation
19
+
20
+ ## Phase 1: Big Picture Event Storming
21
+
22
+ ### Step 1.1: Identify Domain Events
23
+ ```yaml
24
+ agent: domain-decomposer
25
+ action: discover
26
+ focus: domain_events
27
+ notation: orange_stickies
28
+ guidelines:
29
+ - Past tense verbs (OrderPlaced, PaymentReceived)
30
+ - Business-meaningful names
31
+ - Temporal ordering
32
+ ```
33
+
34
+ ### Step 1.2: Timeline Organization
35
+ ```yaml
36
+ agent: domain-decomposer
37
+ action: organize
38
+ steps:
39
+ - Place events chronologically
40
+ - Identify parallel flows
41
+ - Mark pivotal events
42
+ - Note questions and hotspots
43
+ ```
44
+
45
+ ## Phase 2: Process Modeling
46
+
47
+ ### Step 2.1: Add Commands
48
+ ```yaml
49
+ agent: domain-decomposer
50
+ action: model
51
+ element: commands
52
+ notation: blue_stickies
53
+ pattern:
54
+ - Command triggers Event
55
+ - Name as imperative (PlaceOrder)
56
+ - Link to triggering event
57
+ ```
58
+
59
+ ### Step 2.2: Add Actors
60
+ ```yaml
61
+ agent: domain-decomposer
62
+ action: model
63
+ element: actors
64
+ notation: yellow_stickies
65
+ types:
66
+ - Users (roles)
67
+ - External systems
68
+ - Time (scheduled events)
69
+ ```
70
+
71
+ ### Step 2.3: Add Policies
72
+ ```yaml
73
+ agent: domain-decomposer
74
+ action: model
75
+ element: policies
76
+ notation: purple_stickies
77
+ pattern: "When [Event] then [Command]"
78
+ examples:
79
+ - "When OrderPlaced then ReserveInventory"
80
+ - "When PaymentFailed then CancelOrder"
81
+ ```
82
+
83
+ ## Phase 3: Aggregate Discovery
84
+
85
+ ### Step 3.1: Group Related Concepts
86
+ ```yaml
87
+ agent: domain-decomposer
88
+ action: discover
89
+ element: aggregates
90
+ notation: yellow_boundary
91
+ criteria:
92
+ - Commands that modify together
93
+ - Events from same source
94
+ - Consistency boundary
95
+ ```
96
+
97
+ ### Step 3.2: Define Aggregate Boundaries
98
+ ```yaml
99
+ agent: architect
100
+ action: define
101
+ boundaries:
102
+ - Invariants protected
103
+ - Transaction scope
104
+ - Entity relationships
105
+ outputs:
106
+ - aggregate_diagram
107
+ - invariant_rules
108
+ ```
109
+
110
+ ## Phase 4: Bounded Context Identification
111
+
112
+ ### Step 4.1: Identify Context Boundaries
113
+ ```yaml
114
+ agent: domain-decomposer
115
+ action: discover
116
+ element: bounded_contexts
117
+ indicators:
118
+ - Language changes
119
+ - Different stakeholders
120
+ - Different models for same concept
121
+ - Natural team boundaries
122
+ ```
123
+
124
+ ### Step 4.2: Map Context Relationships
125
+ ```yaml
126
+ agent: architect
127
+ action: map
128
+ relationships:
129
+ - Upstream/Downstream
130
+ - Conformist/Anti-corruption Layer
131
+ - Partnership
132
+ - Customer/Supplier
133
+ ```
134
+
135
+ ## Phase 5: Documentation
136
+
137
+ ### Step 5.1: Create Event Catalog
138
+ ```yaml
139
+ agent: domain-decomposer
140
+ action: document
141
+ outputs:
142
+ - event_catalog:
143
+ - name
144
+ - description
145
+ - payload_schema
146
+ - producers
147
+ - consumers
148
+ ```
149
+
150
+ ### Step 5.2: Create Process Diagrams
151
+ ```yaml
152
+ agent: architect
153
+ action: generate
154
+ outputs:
155
+ - process_flow_diagrams
156
+ - aggregate_diagrams
157
+ - context_maps
158
+ ```
159
+
160
+ ## Outputs
161
+ - [ ] Domain event catalog
162
+ - [ ] Command list
163
+ - [ ] Aggregate definitions
164
+ - [ ] Bounded context map
165
+ - [ ] Process flow diagrams
166
+
167
+ ## Quality Gates
168
+ - All events have clear ownership
169
+ - Aggregates have defined invariants
170
+ - Context boundaries are explicit
171
+ - Ubiquitous language documented
172
+ - Stakeholders validated model