omgkit 2.12.0 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +97 -12
  2. package/package.json +2 -2
  3. package/plugin/agents/api-designer.md +5 -0
  4. package/plugin/agents/architect.md +8 -0
  5. package/plugin/agents/brainstormer.md +4 -0
  6. package/plugin/agents/cicd-manager.md +6 -0
  7. package/plugin/agents/code-reviewer.md +6 -0
  8. package/plugin/agents/copywriter.md +2 -0
  9. package/plugin/agents/data-engineer.md +255 -0
  10. package/plugin/agents/database-admin.md +10 -0
  11. package/plugin/agents/debugger.md +10 -0
  12. package/plugin/agents/devsecops.md +314 -0
  13. package/plugin/agents/docs-manager.md +4 -0
  14. package/plugin/agents/domain-decomposer.md +181 -0
  15. package/plugin/agents/embedded-systems.md +397 -0
  16. package/plugin/agents/fullstack-developer.md +12 -0
  17. package/plugin/agents/game-systems-designer.md +375 -0
  18. package/plugin/agents/git-manager.md +10 -0
  19. package/plugin/agents/journal-writer.md +2 -0
  20. package/plugin/agents/ml-engineer.md +284 -0
  21. package/plugin/agents/observability-engineer.md +353 -0
  22. package/plugin/agents/oracle.md +9 -0
  23. package/plugin/agents/performance-engineer.md +290 -0
  24. package/plugin/agents/pipeline-architect.md +6 -0
  25. package/plugin/agents/planner.md +12 -0
  26. package/plugin/agents/platform-engineer.md +325 -0
  27. package/plugin/agents/project-manager.md +3 -0
  28. package/plugin/agents/researcher.md +5 -0
  29. package/plugin/agents/scientific-computing.md +426 -0
  30. package/plugin/agents/scout.md +3 -0
  31. package/plugin/agents/security-auditor.md +7 -0
  32. package/plugin/agents/sprint-master.md +17 -0
  33. package/plugin/agents/tester.md +10 -0
  34. package/plugin/agents/ui-ux-designer.md +12 -0
  35. package/plugin/agents/vulnerability-scanner.md +6 -0
  36. package/plugin/commands/data/pipeline.md +47 -0
  37. package/plugin/commands/data/quality.md +49 -0
  38. package/plugin/commands/domain/analyze.md +34 -0
  39. package/plugin/commands/domain/map.md +41 -0
  40. package/plugin/commands/game/balance.md +56 -0
  41. package/plugin/commands/game/optimize.md +62 -0
  42. package/plugin/commands/iot/provision.md +58 -0
  43. package/plugin/commands/ml/evaluate.md +47 -0
  44. package/plugin/commands/ml/train.md +48 -0
  45. package/plugin/commands/perf/benchmark.md +54 -0
  46. package/plugin/commands/perf/profile.md +49 -0
  47. package/plugin/commands/platform/blueprint.md +56 -0
  48. package/plugin/commands/security/audit.md +54 -0
  49. package/plugin/commands/security/scan.md +55 -0
  50. package/plugin/commands/sre/dashboard.md +53 -0
  51. package/plugin/registry.yaml +711 -0
  52. package/plugin/skills/ai-ml/experiment-tracking/SKILL.md +338 -0
  53. package/plugin/skills/ai-ml/feature-stores/SKILL.md +340 -0
  54. package/plugin/skills/ai-ml/llm-ops/SKILL.md +454 -0
  55. package/plugin/skills/ai-ml/ml-pipelines/SKILL.md +390 -0
  56. package/plugin/skills/ai-ml/model-monitoring/SKILL.md +398 -0
  57. package/plugin/skills/ai-ml/model-serving/SKILL.md +386 -0
  58. package/plugin/skills/event-driven/cqrs-patterns/SKILL.md +348 -0
  59. package/plugin/skills/event-driven/event-sourcing/SKILL.md +334 -0
  60. package/plugin/skills/event-driven/kafka-deep/SKILL.md +252 -0
  61. package/plugin/skills/event-driven/saga-orchestration/SKILL.md +335 -0
  62. package/plugin/skills/event-driven/schema-registry/SKILL.md +328 -0
  63. package/plugin/skills/event-driven/stream-processing/SKILL.md +313 -0
  64. package/plugin/skills/game/game-audio/SKILL.md +446 -0
  65. package/plugin/skills/game/game-networking/SKILL.md +490 -0
  66. package/plugin/skills/game/godot-patterns/SKILL.md +413 -0
  67. package/plugin/skills/game/shader-programming/SKILL.md +492 -0
  68. package/plugin/skills/game/unity-patterns/SKILL.md +488 -0
  69. package/plugin/skills/iot/device-provisioning/SKILL.md +405 -0
  70. package/plugin/skills/iot/edge-computing/SKILL.md +369 -0
  71. package/plugin/skills/iot/industrial-protocols/SKILL.md +438 -0
  72. package/plugin/skills/iot/mqtt-deep/SKILL.md +418 -0
  73. package/plugin/skills/iot/ota-updates/SKILL.md +426 -0
  74. package/plugin/skills/microservices/api-gateway-patterns/SKILL.md +201 -0
  75. package/plugin/skills/microservices/circuit-breaker-patterns/SKILL.md +246 -0
  76. package/plugin/skills/microservices/contract-testing/SKILL.md +284 -0
  77. package/plugin/skills/microservices/distributed-tracing/SKILL.md +246 -0
  78. package/plugin/skills/microservices/service-discovery/SKILL.md +304 -0
  79. package/plugin/skills/microservices/service-mesh/SKILL.md +181 -0
  80. package/plugin/skills/mobile-advanced/mobile-ci-cd/SKILL.md +407 -0
  81. package/plugin/skills/mobile-advanced/mobile-security/SKILL.md +403 -0
  82. package/plugin/skills/mobile-advanced/offline-first/SKILL.md +473 -0
  83. package/plugin/skills/mobile-advanced/push-notifications/SKILL.md +494 -0
  84. package/plugin/skills/mobile-advanced/react-native-deep/SKILL.md +374 -0
  85. package/plugin/skills/simulation/numerical-methods/SKILL.md +434 -0
  86. package/plugin/skills/simulation/parallel-computing/SKILL.md +382 -0
  87. package/plugin/skills/simulation/physics-engines/SKILL.md +377 -0
  88. package/plugin/skills/simulation/validation-verification/SKILL.md +479 -0
  89. package/plugin/skills/simulation/visualization-scientific/SKILL.md +365 -0
  90. package/plugin/templates/autonomous/archetypes/event-driven-app.yaml +460 -0
  91. package/plugin/templates/autonomous/archetypes/microservices-app.yaml +431 -0
  92. package/plugin/templates/autonomous/state-schema.yaml +1 -1
  93. package/plugin/workflows/ai-engineering/agent-development.md +3 -3
  94. package/plugin/workflows/ai-engineering/fine-tuning.md +3 -3
  95. package/plugin/workflows/ai-engineering/model-evaluation.md +3 -3
  96. package/plugin/workflows/ai-engineering/prompt-engineering.md +2 -2
  97. package/plugin/workflows/ai-engineering/rag-development.md +4 -4
  98. package/plugin/workflows/ai-ml/data-pipeline.md +188 -0
  99. package/plugin/workflows/ai-ml/experiment-cycle.md +203 -0
  100. package/plugin/workflows/ai-ml/feature-engineering.md +208 -0
  101. package/plugin/workflows/ai-ml/model-deployment.md +199 -0
  102. package/plugin/workflows/ai-ml/monitoring-setup.md +227 -0
  103. package/plugin/workflows/api/api-design.md +1 -1
  104. package/plugin/workflows/api/api-testing.md +2 -2
  105. package/plugin/workflows/content/technical-docs.md +1 -1
  106. package/plugin/workflows/database/migration.md +1 -1
  107. package/plugin/workflows/database/optimization.md +1 -1
  108. package/plugin/workflows/database/schema-design.md +3 -3
  109. package/plugin/workflows/development/bug-fix.md +3 -3
  110. package/plugin/workflows/development/code-review.md +2 -1
  111. package/plugin/workflows/development/feature.md +3 -3
  112. package/plugin/workflows/development/refactor.md +2 -2
  113. package/plugin/workflows/event-driven/consumer-groups.md +190 -0
  114. package/plugin/workflows/event-driven/event-storming.md +172 -0
  115. package/plugin/workflows/event-driven/replay-testing.md +186 -0
  116. package/plugin/workflows/event-driven/saga-implementation.md +206 -0
  117. package/plugin/workflows/event-driven/schema-evolution.md +173 -0
  118. package/plugin/workflows/fullstack/authentication.md +4 -4
  119. package/plugin/workflows/fullstack/full-feature.md +4 -4
  120. package/plugin/workflows/game-dev/content-pipeline.md +218 -0
  121. package/plugin/workflows/game-dev/platform-submission.md +263 -0
  122. package/plugin/workflows/game-dev/playtesting.md +237 -0
  123. package/plugin/workflows/game-dev/prototype-to-production.md +205 -0
  124. package/plugin/workflows/microservices/contract-first.md +151 -0
  125. package/plugin/workflows/microservices/distributed-tracing.md +166 -0
  126. package/plugin/workflows/microservices/domain-decomposition.md +123 -0
  127. package/plugin/workflows/microservices/integration-testing.md +149 -0
  128. package/plugin/workflows/microservices/service-mesh-setup.md +153 -0
  129. package/plugin/workflows/microservices/service-scaffolding.md +151 -0
  130. package/plugin/workflows/omega/1000x-innovation.md +2 -2
  131. package/plugin/workflows/omega/100x-architecture.md +2 -2
  132. package/plugin/workflows/omega/10x-improvement.md +2 -2
  133. package/plugin/workflows/quality/performance-optimization.md +2 -2
  134. package/plugin/workflows/research/best-practices.md +1 -1
  135. package/plugin/workflows/research/technology-research.md +1 -1
  136. package/plugin/workflows/security/penetration-testing.md +3 -3
  137. package/plugin/workflows/security/security-audit.md +3 -3
  138. package/plugin/workflows/sprint/sprint-execution.md +2 -2
  139. package/plugin/workflows/sprint/sprint-retrospective.md +1 -1
  140. package/plugin/workflows/sprint/sprint-setup.md +1 -1
@@ -0,0 +1,246 @@
1
+ ---
2
+ name: distributed-tracing
3
+ description: Comprehensive distributed tracing with Jaeger, Zipkin, OpenTelemetry, correlation IDs, and span design.
4
+ ---
5
+
6
+ # Distributed Tracing
7
+
8
+ Comprehensive distributed tracing with Jaeger, Zipkin, OpenTelemetry, correlation IDs, and span design.
9
+
10
+ ## Overview
11
+
12
+ Distributed tracing tracks requests as they flow through multiple services, enabling debugging and performance analysis in microservices architectures.
13
+
14
+ ## Key Concepts
15
+
16
+ ### Trace Model
17
+ - **Trace**: End-to-end request journey
18
+ - **Span**: Single operation within a trace
19
+ - **Span Context**: Propagated trace information
20
+ - **Baggage**: Custom key-value pairs carried across services
21
+
22
+ ### Span Attributes
23
+ - **Operation Name**: What the span represents
24
+ - **Start/End Time**: Duration measurement
25
+ - **Tags**: Indexed metadata for querying
26
+ - **Logs**: Time-stamped events within span
27
+ - **Status**: Success, error, or unset
28
+
29
+ ## OpenTelemetry Implementation
30
+
31
+ ### Instrumentation Setup
32
+ ```javascript
33
+ // Node.js OpenTelemetry setup
34
+ const { NodeTracerProvider } = require('@opentelemetry/sdk-trace-node');
35
+ const { SimpleSpanProcessor } = require('@opentelemetry/sdk-trace-base');
36
+ const { JaegerExporter } = require('@opentelemetry/exporter-jaeger');
37
+ const { registerInstrumentations } = require('@opentelemetry/instrumentation');
38
+ const { HttpInstrumentation } = require('@opentelemetry/instrumentation-http');
39
+ const { ExpressInstrumentation } = require('@opentelemetry/instrumentation-express');
40
+
41
+ const provider = new NodeTracerProvider();
42
+
43
+ provider.addSpanProcessor(
44
+ new SimpleSpanProcessor(
45
+ new JaegerExporter({
46
+ endpoint: 'http://jaeger:14268/api/traces',
47
+ })
48
+ )
49
+ );
50
+
51
+ provider.register();
52
+
53
+ registerInstrumentations({
54
+ instrumentations: [
55
+ new HttpInstrumentation(),
56
+ new ExpressInstrumentation(),
57
+ ],
58
+ });
59
+ ```
60
+
61
+ ### Manual Span Creation
62
+ ```javascript
63
+ const { trace } = require('@opentelemetry/api');
64
+
65
+ const tracer = trace.getTracer('my-service');
66
+
67
+ async function processOrder(orderId) {
68
+ return tracer.startActiveSpan('processOrder', async (span) => {
69
+ try {
70
+ span.setAttribute('order.id', orderId);
71
+
72
+ // Child span for database operation
73
+ await tracer.startActiveSpan('db.query', async (dbSpan) => {
74
+ dbSpan.setAttribute('db.system', 'postgresql');
75
+ dbSpan.setAttribute('db.statement', 'SELECT * FROM orders WHERE id = $1');
76
+ await db.query('SELECT * FROM orders WHERE id = $1', [orderId]);
77
+ dbSpan.end();
78
+ });
79
+
80
+ span.setStatus({ code: SpanStatusCode.OK });
81
+ } catch (error) {
82
+ span.setStatus({ code: SpanStatusCode.ERROR, message: error.message });
83
+ span.recordException(error);
84
+ throw error;
85
+ } finally {
86
+ span.end();
87
+ }
88
+ });
89
+ }
90
+ ```
91
+
92
+ ### Context Propagation
93
+ ```javascript
94
+ const { context, propagation } = require('@opentelemetry/api');
95
+
96
+ // Extract context from incoming request
97
+ app.use((req, res, next) => {
98
+ const ctx = propagation.extract(context.active(), req.headers);
99
+ context.with(ctx, next);
100
+ });
101
+
102
+ // Inject context into outgoing request
103
+ async function callService(url) {
104
+ const headers = {};
105
+ propagation.inject(context.active(), headers);
106
+
107
+ return fetch(url, { headers });
108
+ }
109
+ ```
110
+
111
+ ## Jaeger Configuration
112
+
113
+ ### Kubernetes Deployment
114
+ ```yaml
115
+ apiVersion: jaegertracing.io/v1
116
+ kind: Jaeger
117
+ metadata:
118
+ name: jaeger
119
+ spec:
120
+ strategy: production
121
+ storage:
122
+ type: elasticsearch
123
+ elasticsearch:
124
+ nodeCount: 3
125
+ resources:
126
+ requests:
127
+ cpu: 1
128
+ memory: 4Gi
129
+ collector:
130
+ maxReplicas: 5
131
+ query:
132
+ replicas: 2
133
+ ```
134
+
135
+ ### Sampling Strategies
136
+ ```yaml
137
+ # Jaeger sampling configuration
138
+ apiVersion: v1
139
+ kind: ConfigMap
140
+ metadata:
141
+ name: jaeger-sampling
142
+ data:
143
+ sampling: |
144
+ {
145
+ "service_strategies": [
146
+ {
147
+ "service": "order-service",
148
+ "type": "probabilistic",
149
+ "param": 0.5
150
+ },
151
+ {
152
+ "service": "payment-service",
153
+ "type": "ratelimiting",
154
+ "param": 100
155
+ }
156
+ ],
157
+ "default_strategy": {
158
+ "type": "probabilistic",
159
+ "param": 0.1
160
+ }
161
+ }
162
+ ```
163
+
164
+ ## Span Design Guidelines
165
+
166
+ ### Naming Conventions
167
+ ```
168
+ HTTP spans: HTTP {METHOD} {route}
169
+ HTTP GET /api/users/:id
170
+
171
+ Database: {db.system}.{operation}
172
+ postgresql.query
173
+
174
+ Message: {messaging.system} {operation} {destination}
175
+ kafka send orders-topic
176
+
177
+ RPC: {rpc.system}/{service}/{method}
178
+ grpc/UserService/GetUser
179
+ ```
180
+
181
+ ### Essential Attributes
182
+ ```javascript
183
+ // HTTP spans
184
+ span.setAttribute('http.method', 'GET');
185
+ span.setAttribute('http.url', 'https://api.example.com/users/123');
186
+ span.setAttribute('http.status_code', 200);
187
+ span.setAttribute('http.request_content_length', 0);
188
+ span.setAttribute('http.response_content_length', 1234);
189
+
190
+ // Database spans
191
+ span.setAttribute('db.system', 'postgresql');
192
+ span.setAttribute('db.name', 'mydb');
193
+ span.setAttribute('db.statement', 'SELECT * FROM users WHERE id = $1');
194
+ span.setAttribute('db.operation', 'SELECT');
195
+
196
+ // Messaging spans
197
+ span.setAttribute('messaging.system', 'kafka');
198
+ span.setAttribute('messaging.destination', 'orders');
199
+ span.setAttribute('messaging.operation', 'send');
200
+ ```
201
+
202
+ ## Best Practices
203
+
204
+ 1. **Consistent Naming**: Follow semantic conventions
205
+ 2. **Don't Over-Trace**: Sample appropriately
206
+ 3. **Meaningful Spans**: Business-relevant operations
207
+ 4. **Error Recording**: Always record exceptions
208
+ 5. **Context Propagation**: Ensure trace continuity
209
+
210
+ ## Sampling Strategies
211
+
212
+ ### Head-Based Sampling
213
+ - Decision made at trace start
214
+ - Simpler, consistent
215
+ - May miss interesting traces
216
+
217
+ ### Tail-Based Sampling
218
+ - Decision made at trace end
219
+ - Keeps all errors and slow traces
220
+ - More resource intensive
221
+
222
+ ### Adaptive Sampling
223
+ - Adjusts rate based on traffic
224
+ - Balances cost and coverage
225
+ - Best for variable traffic
226
+
227
+ ## Anti-Patterns
228
+
229
+ - Creating spans for every function call
230
+ - Not propagating context across service boundaries
231
+ - Ignoring span errors
232
+ - Sampling 100% in production
233
+ - Not correlating traces with logs
234
+
235
+ ## When to Use
236
+
237
+ - Microservices with complex request flows
238
+ - Debugging latency issues
239
+ - Understanding service dependencies
240
+ - Capacity planning
241
+
242
+ ## When NOT to Use
243
+
244
+ - Monolithic applications
245
+ - Very high-throughput systems without sampling
246
+ - When storage costs are a concern
@@ -0,0 +1,304 @@
1
+ ---
2
+ name: service-discovery
3
+ description: Service discovery patterns with Consul, Kubernetes DNS, Eureka, health checks, and client-side load balancing.
4
+ ---
5
+
6
+ # Service Discovery
7
+
8
+ Service discovery patterns with Consul, Kubernetes DNS, Eureka, health checks, and client-side load balancing.
9
+
10
+ ## Overview
11
+
12
+ Service discovery enables services to find and communicate with each other dynamically without hardcoded addresses.
13
+
14
+ ## Discovery Patterns
15
+
16
+ ### Client-Side Discovery
17
+ - Client queries registry
18
+ - Client performs load balancing
19
+ - Examples: Eureka, Consul client
20
+
21
+ ### Server-Side Discovery
22
+ - Load balancer queries registry
23
+ - Load balancer routes requests
24
+ - Examples: Kubernetes Services, AWS ELB
25
+
26
+ ### DNS-Based Discovery
27
+ - Services registered as DNS records
28
+ - Standard DNS resolution
29
+ - Examples: Kubernetes DNS, Consul DNS
30
+
31
+ ## Kubernetes Service Discovery
32
+
33
+ ### ClusterIP Service
34
+ ```yaml
35
+ apiVersion: v1
36
+ kind: Service
37
+ metadata:
38
+ name: user-service
39
+ spec:
40
+ selector:
41
+ app: user-service
42
+ ports:
43
+ - port: 80
44
+ targetPort: 8080
45
+ type: ClusterIP
46
+ ```
47
+
48
+ ### Headless Service (Direct Pod Discovery)
49
+ ```yaml
50
+ apiVersion: v1
51
+ kind: Service
52
+ metadata:
53
+ name: user-service-headless
54
+ spec:
55
+ clusterIP: None
56
+ selector:
57
+ app: user-service
58
+ ports:
59
+ - port: 8080
60
+ ```
61
+
62
+ ### DNS Resolution
63
+ ```
64
+ # ClusterIP service
65
+ user-service.default.svc.cluster.local
66
+
67
+ # Headless service returns all pod IPs
68
+ # SRV records for port discovery
69
+ _http._tcp.user-service.default.svc.cluster.local
70
+ ```
71
+
72
+ ### EndpointSlices
73
+ ```yaml
74
+ apiVersion: discovery.k8s.io/v1
75
+ kind: EndpointSlice
76
+ metadata:
77
+ name: user-service-abc
78
+ labels:
79
+ kubernetes.io/service-name: user-service
80
+ addressType: IPv4
81
+ endpoints:
82
+ - addresses:
83
+ - "10.0.0.1"
84
+ conditions:
85
+ ready: true
86
+ serving: true
87
+ ports:
88
+ - port: 8080
89
+ protocol: TCP
90
+ ```
91
+
92
+ ## Consul Service Discovery
93
+
94
+ ### Service Registration
95
+ ```json
96
+ {
97
+ "service": {
98
+ "name": "user-service",
99
+ "id": "user-service-1",
100
+ "port": 8080,
101
+ "tags": ["v1", "production"],
102
+ "meta": {
103
+ "version": "1.0.0"
104
+ },
105
+ "check": {
106
+ "http": "http://localhost:8080/health",
107
+ "interval": "10s",
108
+ "timeout": "5s"
109
+ }
110
+ }
111
+ }
112
+ ```
113
+
114
+ ### Health Checks
115
+ ```json
116
+ {
117
+ "checks": [
118
+ {
119
+ "id": "http-check",
120
+ "name": "HTTP Health Check",
121
+ "http": "http://localhost:8080/health",
122
+ "interval": "10s",
123
+ "timeout": "5s"
124
+ },
125
+ {
126
+ "id": "tcp-check",
127
+ "name": "TCP Check",
128
+ "tcp": "localhost:8080",
129
+ "interval": "10s"
130
+ },
131
+ {
132
+ "id": "script-check",
133
+ "name": "Script Check",
134
+ "args": ["/opt/check.sh"],
135
+ "interval": "30s"
136
+ }
137
+ ]
138
+ }
139
+ ```
140
+
141
+ ### Service Query
142
+ ```bash
143
+ # DNS query
144
+ dig @127.0.0.1 -p 8600 user-service.service.consul
145
+
146
+ # HTTP API
147
+ curl http://localhost:8500/v1/catalog/service/user-service
148
+
149
+ # Health-filtered
150
+ curl http://localhost:8500/v1/health/service/user-service?passing=true
151
+ ```
152
+
153
+ ## Client-Side Load Balancing
154
+
155
+ ### gRPC with Service Discovery
156
+ ```go
157
+ import (
158
+ "google.golang.org/grpc"
159
+ "google.golang.org/grpc/resolver"
160
+ )
161
+
162
+ // Custom resolver for Consul
163
+ type consulResolver struct {
164
+ consulClient *api.Client
165
+ serviceName string
166
+ }
167
+
168
+ func (r *consulResolver) ResolveNow(options resolver.ResolveNowOptions) {
169
+ services, _, _ := r.consulClient.Health().Service(
170
+ r.serviceName, "", true, nil)
171
+
172
+ var addrs []resolver.Address
173
+ for _, s := range services {
174
+ addrs = append(addrs, resolver.Address{
175
+ Addr: fmt.Sprintf("%s:%d", s.Service.Address, s.Service.Port),
176
+ })
177
+ }
178
+ r.cc.UpdateState(resolver.State{Addresses: addrs})
179
+ }
180
+
181
+ // Usage
182
+ conn, _ := grpc.Dial(
183
+ "consul:///user-service",
184
+ grpc.WithDefaultServiceConfig(`{"loadBalancingPolicy":"round_robin"}`),
185
+ )
186
+ ```
187
+
188
+ ### Node.js with Consul
189
+ ```javascript
190
+ const Consul = require('consul');
191
+ const consul = new Consul();
192
+
193
+ async function discoverService(serviceName) {
194
+ const services = await consul.health.service({
195
+ service: serviceName,
196
+ passing: true
197
+ });
198
+
199
+ return services.map(s => ({
200
+ address: s.Service.Address,
201
+ port: s.Service.Port,
202
+ meta: s.Service.Meta
203
+ }));
204
+ }
205
+
206
+ // Client-side load balancing
207
+ class ServiceClient {
208
+ constructor(serviceName) {
209
+ this.serviceName = serviceName;
210
+ this.instances = [];
211
+ this.currentIndex = 0;
212
+ this.refresh();
213
+ setInterval(() => this.refresh(), 30000);
214
+ }
215
+
216
+ async refresh() {
217
+ this.instances = await discoverService(this.serviceName);
218
+ }
219
+
220
+ getNextInstance() {
221
+ if (this.instances.length === 0) {
222
+ throw new Error('No instances available');
223
+ }
224
+ const instance = this.instances[this.currentIndex];
225
+ this.currentIndex = (this.currentIndex + 1) % this.instances.length;
226
+ return instance;
227
+ }
228
+ }
229
+ ```
230
+
231
+ ## Health Check Patterns
232
+
233
+ ### Liveness vs Readiness
234
+ ```yaml
235
+ # Kubernetes probes
236
+ livenessProbe:
237
+ httpGet:
238
+ path: /health/live
239
+ port: 8080
240
+ initialDelaySeconds: 10
241
+ periodSeconds: 10
242
+ failureThreshold: 3
243
+
244
+ readinessProbe:
245
+ httpGet:
246
+ path: /health/ready
247
+ port: 8080
248
+ initialDelaySeconds: 5
249
+ periodSeconds: 5
250
+ failureThreshold: 3
251
+ ```
252
+
253
+ ### Health Endpoint Implementation
254
+ ```javascript
255
+ app.get('/health/live', (req, res) => {
256
+ // Basic liveness - can the app respond?
257
+ res.status(200).json({ status: 'alive' });
258
+ });
259
+
260
+ app.get('/health/ready', async (req, res) => {
261
+ // Readiness - can it serve traffic?
262
+ const checks = {
263
+ database: await checkDatabase(),
264
+ cache: await checkCache(),
265
+ dependencies: await checkDependencies()
266
+ };
267
+
268
+ const allHealthy = Object.values(checks).every(c => c.healthy);
269
+ res.status(allHealthy ? 200 : 503).json({
270
+ status: allHealthy ? 'ready' : 'not ready',
271
+ checks
272
+ });
273
+ });
274
+ ```
275
+
276
+ ## Best Practices
277
+
278
+ 1. **Health Checks**: Always implement meaningful health checks
279
+ 2. **Graceful Shutdown**: Deregister before stopping
280
+ 3. **Client Caching**: Cache discovery results with TTL
281
+ 4. **Fallback**: Handle discovery failures gracefully
282
+ 5. **Monitoring**: Track discovery latency and failures
283
+
284
+ ## Anti-Patterns
285
+
286
+ - Hardcoding service addresses
287
+ - Not implementing health checks
288
+ - Too frequent discovery polling
289
+ - Not handling discovery failures
290
+ - Ignoring service metadata
291
+
292
+ ## When to Use
293
+
294
+ - Dynamic environments (containers, cloud)
295
+ - Frequently scaling services
296
+ - Multiple instances of services
297
+ - Zero-downtime deployments
298
+
299
+ ## When NOT to Use
300
+
301
+ - Static infrastructure
302
+ - Single instance services
303
+ - When DNS is sufficient
304
+ - Very simple architectures
@@ -0,0 +1,181 @@
1
+ ---
2
+ name: service-mesh
3
+ description: Advanced service mesh implementation with Istio, Linkerd, traffic management, mTLS, and observability.
4
+ ---
5
+
6
+ # Service Mesh
7
+
8
+ Advanced service mesh implementation with Istio, Linkerd, traffic management, mTLS, and observability.
9
+
10
+ ## Overview
11
+
12
+ Service mesh provides infrastructure-level features for service-to-service communication including traffic management, security, and observability without changing application code.
13
+
14
+ ## Key Concepts
15
+
16
+ ### Traffic Management
17
+ - **Virtual Services**: Route traffic based on rules
18
+ - **Destination Rules**: Configure load balancing, connection pools
19
+ - **Gateways**: Manage ingress/egress traffic
20
+ - **Service Entries**: Add external services to mesh
21
+
22
+ ### Security
23
+ - **mTLS**: Mutual TLS between services
24
+ - **Authorization Policies**: Fine-grained access control
25
+ - **Peer Authentication**: Identity verification
26
+ - **Request Authentication**: JWT validation
27
+
28
+ ### Observability
29
+ - **Distributed Tracing**: Automatic trace propagation
30
+ - **Metrics**: Automatic metric collection
31
+ - **Access Logging**: Request/response logging
32
+ - **Service Graph**: Visualization of dependencies
33
+
34
+ ## Istio Patterns
35
+
36
+ ### Traffic Routing
37
+ ```yaml
38
+ apiVersion: networking.istio.io/v1beta1
39
+ kind: VirtualService
40
+ metadata:
41
+ name: reviews-route
42
+ spec:
43
+ hosts:
44
+ - reviews
45
+ http:
46
+ - match:
47
+ - headers:
48
+ end-user:
49
+ exact: jason
50
+ route:
51
+ - destination:
52
+ host: reviews
53
+ subset: v2
54
+ - route:
55
+ - destination:
56
+ host: reviews
57
+ subset: v1
58
+ ```
59
+
60
+ ### Canary Deployment
61
+ ```yaml
62
+ apiVersion: networking.istio.io/v1beta1
63
+ kind: VirtualService
64
+ metadata:
65
+ name: my-service
66
+ spec:
67
+ hosts:
68
+ - my-service
69
+ http:
70
+ - route:
71
+ - destination:
72
+ host: my-service
73
+ subset: v1
74
+ weight: 90
75
+ - destination:
76
+ host: my-service
77
+ subset: v2
78
+ weight: 10
79
+ ```
80
+
81
+ ### Circuit Breaker
82
+ ```yaml
83
+ apiVersion: networking.istio.io/v1beta1
84
+ kind: DestinationRule
85
+ metadata:
86
+ name: my-service
87
+ spec:
88
+ host: my-service
89
+ trafficPolicy:
90
+ connectionPool:
91
+ tcp:
92
+ maxConnections: 100
93
+ http:
94
+ h2UpgradePolicy: UPGRADE
95
+ http1MaxPendingRequests: 100
96
+ http2MaxRequests: 1000
97
+ outlierDetection:
98
+ consecutive5xxErrors: 5
99
+ interval: 30s
100
+ baseEjectionTime: 30s
101
+ maxEjectionPercent: 50
102
+ ```
103
+
104
+ ### mTLS Configuration
105
+ ```yaml
106
+ apiVersion: security.istio.io/v1beta1
107
+ kind: PeerAuthentication
108
+ metadata:
109
+ name: default
110
+ namespace: istio-system
111
+ spec:
112
+ mtls:
113
+ mode: STRICT
114
+ ```
115
+
116
+ ## Linkerd Patterns
117
+
118
+ ### Service Profile
119
+ ```yaml
120
+ apiVersion: linkerd.io/v1alpha2
121
+ kind: ServiceProfile
122
+ metadata:
123
+ name: my-service.default.svc.cluster.local
124
+ spec:
125
+ routes:
126
+ - name: GET /api/users
127
+ condition:
128
+ method: GET
129
+ pathRegex: /api/users
130
+ responseClasses:
131
+ - condition:
132
+ status:
133
+ min: 500
134
+ max: 599
135
+ isFailure: true
136
+ ```
137
+
138
+ ### Traffic Split
139
+ ```yaml
140
+ apiVersion: split.smi-spec.io/v1alpha1
141
+ kind: TrafficSplit
142
+ metadata:
143
+ name: my-service-split
144
+ spec:
145
+ service: my-service
146
+ backends:
147
+ - service: my-service-v1
148
+ weight: 900m
149
+ - service: my-service-v2
150
+ weight: 100m
151
+ ```
152
+
153
+ ## Best Practices
154
+
155
+ 1. **Start with Observability**: Enable tracing before traffic management
156
+ 2. **Gradual mTLS Rollout**: Use permissive mode first
157
+ 3. **Circuit Breaker Tuning**: Start conservative, adjust based on data
158
+ 4. **Avoid Mesh Complexity**: Don't over-engineer routing rules
159
+ 5. **Resource Limits**: Set appropriate proxy resource limits
160
+
161
+ ## Anti-Patterns
162
+
163
+ - Putting business logic in routing rules
164
+ - Ignoring sidecar resource consumption
165
+ - Not monitoring mesh control plane
166
+ - Over-complicating traffic policies
167
+ - Skipping gradual rollout of mesh features
168
+
169
+ ## When to Use
170
+
171
+ - Multiple services needing consistent traffic management
172
+ - Zero-trust security requirements
173
+ - Need for advanced observability without code changes
174
+ - Complex deployment strategies (canary, blue-green)
175
+
176
+ ## When NOT to Use
177
+
178
+ - Simple applications with few services
179
+ - When latency is extremely critical (adds ~1ms)
180
+ - Teams without Kubernetes expertise
181
+ - Tight resource constraints