e11y 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +4 -0
  3. data/.rubocop.yml +69 -0
  4. data/CHANGELOG.md +26 -0
  5. data/CODE_OF_CONDUCT.md +64 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +179 -0
  8. data/Rakefile +37 -0
  9. data/benchmarks/run_all.rb +33 -0
  10. data/config/README.md +83 -0
  11. data/config/loki-local-config.yaml +35 -0
  12. data/config/prometheus.yml +15 -0
  13. data/docker-compose.yml +78 -0
  14. data/docs/00-ICP-AND-TIMELINE.md +483 -0
  15. data/docs/01-SCALE-REQUIREMENTS.md +858 -0
  16. data/docs/ADR-001-architecture.md +2617 -0
  17. data/docs/ADR-002-metrics-yabeda.md +1395 -0
  18. data/docs/ADR-003-slo-observability.md +3337 -0
  19. data/docs/ADR-004-adapter-architecture.md +2385 -0
  20. data/docs/ADR-005-tracing-context.md +1372 -0
  21. data/docs/ADR-006-security-compliance.md +4143 -0
  22. data/docs/ADR-007-opentelemetry-integration.md +1385 -0
  23. data/docs/ADR-008-rails-integration.md +1911 -0
  24. data/docs/ADR-009-cost-optimization.md +2993 -0
  25. data/docs/ADR-010-developer-experience.md +2166 -0
  26. data/docs/ADR-011-testing-strategy.md +1836 -0
  27. data/docs/ADR-012-event-evolution.md +958 -0
  28. data/docs/ADR-013-reliability-error-handling.md +2750 -0
  29. data/docs/ADR-014-event-driven-slo.md +1533 -0
  30. data/docs/ADR-015-middleware-order.md +1061 -0
  31. data/docs/ADR-016-self-monitoring-slo.md +1234 -0
  32. data/docs/API-REFERENCE-L28.md +914 -0
  33. data/docs/COMPREHENSIVE-CONFIGURATION.md +2366 -0
  34. data/docs/IMPLEMENTATION_NOTES.md +2804 -0
  35. data/docs/IMPLEMENTATION_PLAN.md +1971 -0
  36. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +586 -0
  37. data/docs/PLAN.md +148 -0
  38. data/docs/QUICK-START.md +934 -0
  39. data/docs/README.md +296 -0
  40. data/docs/design/00-memory-optimization.md +593 -0
  41. data/docs/guides/MIGRATION-L27-L28.md +692 -0
  42. data/docs/guides/PERFORMANCE-BENCHMARKS.md +434 -0
  43. data/docs/guides/README.md +44 -0
  44. data/docs/prd/01-overview-vision.md +440 -0
  45. data/docs/use_cases/README.md +119 -0
  46. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +813 -0
  47. data/docs/use_cases/UC-002-business-event-tracking.md +1953 -0
  48. data/docs/use_cases/UC-003-pattern-based-metrics.md +1627 -0
  49. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +728 -0
  50. data/docs/use_cases/UC-005-sentry-integration.md +759 -0
  51. data/docs/use_cases/UC-006-trace-context-management.md +905 -0
  52. data/docs/use_cases/UC-007-pii-filtering.md +2648 -0
  53. data/docs/use_cases/UC-008-opentelemetry-integration.md +1153 -0
  54. data/docs/use_cases/UC-009-multi-service-tracing.md +1043 -0
  55. data/docs/use_cases/UC-010-background-job-tracking.md +1018 -0
  56. data/docs/use_cases/UC-011-rate-limiting.md +1906 -0
  57. data/docs/use_cases/UC-012-audit-trail.md +2301 -0
  58. data/docs/use_cases/UC-013-high-cardinality-protection.md +2127 -0
  59. data/docs/use_cases/UC-014-adaptive-sampling.md +1940 -0
  60. data/docs/use_cases/UC-015-cost-optimization.md +735 -0
  61. data/docs/use_cases/UC-016-rails-logger-migration.md +785 -0
  62. data/docs/use_cases/UC-017-local-development.md +867 -0
  63. data/docs/use_cases/UC-018-testing-events.md +1081 -0
  64. data/docs/use_cases/UC-019-tiered-storage-migration.md +562 -0
  65. data/docs/use_cases/UC-020-event-versioning.md +708 -0
  66. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +956 -0
  67. data/docs/use_cases/UC-022-event-registry.md +648 -0
  68. data/docs/use_cases/backlog.md +226 -0
  69. data/e11y.gemspec +76 -0
  70. data/lib/e11y/adapters/adaptive_batcher.rb +207 -0
  71. data/lib/e11y/adapters/audit_encrypted.rb +239 -0
  72. data/lib/e11y/adapters/base.rb +580 -0
  73. data/lib/e11y/adapters/file.rb +224 -0
  74. data/lib/e11y/adapters/in_memory.rb +216 -0
  75. data/lib/e11y/adapters/loki.rb +333 -0
  76. data/lib/e11y/adapters/otel_logs.rb +203 -0
  77. data/lib/e11y/adapters/registry.rb +141 -0
  78. data/lib/e11y/adapters/sentry.rb +230 -0
  79. data/lib/e11y/adapters/stdout.rb +108 -0
  80. data/lib/e11y/adapters/yabeda.rb +370 -0
  81. data/lib/e11y/buffers/adaptive_buffer.rb +339 -0
  82. data/lib/e11y/buffers/base_buffer.rb +40 -0
  83. data/lib/e11y/buffers/request_scoped_buffer.rb +246 -0
  84. data/lib/e11y/buffers/ring_buffer.rb +267 -0
  85. data/lib/e11y/buffers.rb +14 -0
  86. data/lib/e11y/console.rb +122 -0
  87. data/lib/e11y/current.rb +48 -0
  88. data/lib/e11y/event/base.rb +894 -0
  89. data/lib/e11y/event/value_sampling_config.rb +84 -0
  90. data/lib/e11y/events/base_audit_event.rb +43 -0
  91. data/lib/e11y/events/base_payment_event.rb +33 -0
  92. data/lib/e11y/events/rails/cache/delete.rb +21 -0
  93. data/lib/e11y/events/rails/cache/read.rb +23 -0
  94. data/lib/e11y/events/rails/cache/write.rb +22 -0
  95. data/lib/e11y/events/rails/database/query.rb +45 -0
  96. data/lib/e11y/events/rails/http/redirect.rb +21 -0
  97. data/lib/e11y/events/rails/http/request.rb +26 -0
  98. data/lib/e11y/events/rails/http/send_file.rb +21 -0
  99. data/lib/e11y/events/rails/http/start_processing.rb +26 -0
  100. data/lib/e11y/events/rails/job/completed.rb +22 -0
  101. data/lib/e11y/events/rails/job/enqueued.rb +22 -0
  102. data/lib/e11y/events/rails/job/failed.rb +22 -0
  103. data/lib/e11y/events/rails/job/scheduled.rb +23 -0
  104. data/lib/e11y/events/rails/job/started.rb +22 -0
  105. data/lib/e11y/events/rails/log.rb +56 -0
  106. data/lib/e11y/events/rails/view/render.rb +23 -0
  107. data/lib/e11y/events.rb +18 -0
  108. data/lib/e11y/instruments/active_job.rb +201 -0
  109. data/lib/e11y/instruments/rails_instrumentation.rb +141 -0
  110. data/lib/e11y/instruments/sidekiq.rb +175 -0
  111. data/lib/e11y/logger/bridge.rb +205 -0
  112. data/lib/e11y/metrics/cardinality_protection.rb +172 -0
  113. data/lib/e11y/metrics/cardinality_tracker.rb +134 -0
  114. data/lib/e11y/metrics/registry.rb +234 -0
  115. data/lib/e11y/metrics/relabeling.rb +226 -0
  116. data/lib/e11y/metrics.rb +102 -0
  117. data/lib/e11y/middleware/audit_signing.rb +174 -0
  118. data/lib/e11y/middleware/base.rb +140 -0
  119. data/lib/e11y/middleware/event_slo.rb +167 -0
  120. data/lib/e11y/middleware/pii_filter.rb +266 -0
  121. data/lib/e11y/middleware/pii_filtering.rb +280 -0
  122. data/lib/e11y/middleware/rate_limiting.rb +214 -0
  123. data/lib/e11y/middleware/request.rb +163 -0
  124. data/lib/e11y/middleware/routing.rb +157 -0
  125. data/lib/e11y/middleware/sampling.rb +254 -0
  126. data/lib/e11y/middleware/slo.rb +168 -0
  127. data/lib/e11y/middleware/trace_context.rb +131 -0
  128. data/lib/e11y/middleware/validation.rb +118 -0
  129. data/lib/e11y/middleware/versioning.rb +132 -0
  130. data/lib/e11y/middleware.rb +12 -0
  131. data/lib/e11y/pii/patterns.rb +90 -0
  132. data/lib/e11y/pii.rb +13 -0
  133. data/lib/e11y/pipeline/builder.rb +155 -0
  134. data/lib/e11y/pipeline/zone_validator.rb +110 -0
  135. data/lib/e11y/pipeline.rb +12 -0
  136. data/lib/e11y/presets/audit_event.rb +65 -0
  137. data/lib/e11y/presets/debug_event.rb +34 -0
  138. data/lib/e11y/presets/high_value_event.rb +51 -0
  139. data/lib/e11y/presets.rb +19 -0
  140. data/lib/e11y/railtie.rb +138 -0
  141. data/lib/e11y/reliability/circuit_breaker.rb +216 -0
  142. data/lib/e11y/reliability/dlq/file_storage.rb +277 -0
  143. data/lib/e11y/reliability/dlq/filter.rb +117 -0
  144. data/lib/e11y/reliability/retry_handler.rb +207 -0
  145. data/lib/e11y/reliability/retry_rate_limiter.rb +117 -0
  146. data/lib/e11y/sampling/error_spike_detector.rb +225 -0
  147. data/lib/e11y/sampling/load_monitor.rb +161 -0
  148. data/lib/e11y/sampling/stratified_tracker.rb +92 -0
  149. data/lib/e11y/sampling/value_extractor.rb +82 -0
  150. data/lib/e11y/self_monitoring/buffer_monitor.rb +79 -0
  151. data/lib/e11y/self_monitoring/performance_monitor.rb +97 -0
  152. data/lib/e11y/self_monitoring/reliability_monitor.rb +146 -0
  153. data/lib/e11y/slo/event_driven.rb +150 -0
  154. data/lib/e11y/slo/tracker.rb +119 -0
  155. data/lib/e11y/version.rb +9 -0
  156. data/lib/e11y.rb +283 -0
  157. metadata +452 -0
@@ -0,0 +1,1153 @@
1
+ # UC-008: OpenTelemetry Integration
2
+
3
+ **Status:** v1.1+ Enhancement
4
+ **Complexity:** Advanced
5
+ **Setup Time:** 30-45 minutes
6
+ **Target Users:** Platform Engineers, SRE, DevOps
7
+
8
+ ---
9
+
10
+ ## 📋 Overview
11
+
12
+ ### Problem Statement
13
+
14
+ **The fragmentation problem:**
15
+ ```ruby
16
+ # ❌ BEFORE: Separate systems, no integration
17
+ # - E11y for business events → Loki
18
+ # - OpenTelemetry for traces → Jaeger
19
+ # - Prometheus for metrics → Grafana
20
+ # - Logs go to different place than traces
21
+ # - Can't correlate events with spans
22
+ # - Different metadata formats
23
+ # - Manual trace ID management
24
+
25
+ # Problems:
26
+ # 1. Three different telemetry systems
27
+ # 2. No automatic correlation (logs ↔ traces ↔ metrics)
28
+ # 3. Different semantic conventions (your fields vs OTel fields)
29
+ # 4. Manual instrumentation duplication
30
+ # 5. Can't use OTel Collector benefits (sampling, filtering, routing)
31
+ ```
32
+
33
+ ### E11y Solution
34
+
35
+ **Native OpenTelemetry integration:**
36
+ ```ruby
37
+ # ✅ AFTER: Unified observability via OpenTelemetry
38
+ E11y.configure do |config|
39
+ config.opentelemetry do
40
+ enabled true
41
+
42
+ # Use OTel Collector as backend
43
+ collector_endpoint 'http://otel-collector:4318'
44
+
45
+ # Automatic semantic conventions
46
+ use_semantic_conventions true
47
+
48
+ # Automatic span creation from events
49
+ create_spans_for severity: [:error, :warn]
50
+
51
+ # Export to OTel Logs Signal
52
+ export_logs true
53
+ end
54
+ end
55
+
56
+ # Result:
57
+ # ✅ Events → OTel Logs Signal → OTel Collector
58
+ # ✅ Automatic span creation for errors
59
+ # ✅ Trace context from OTel SDK (W3C Trace Context)
60
+ # ✅ Semantic conventions applied automatically
61
+ # ✅ All benefits of OTel Collector (sampling, routing, etc.)
62
+ ```
63
+
64
+ ---
65
+
66
+ ## 🎯 Features
67
+
68
+ > **Implementation:** See [ADR-007: OpenTelemetry Integration](../ADR-007-opentelemetry-integration.md) for complete architecture, including [Section 3: OTel Collector Adapter](../ADR-007-opentelemetry-integration.md#3-otel-collector-adapter), [Section 4: Semantic Conventions](../ADR-007-opentelemetry-integration.md#4-semantic-conventions), and [Section 5: Logs Signal Export](../ADR-007-opentelemetry-integration.md#5-logs-signal-export).
69
+
70
+ ### 1. OpenTelemetry Collector Adapter
71
+
72
+ **Route events to OTel Collector:**
73
+ ```ruby
74
+ # config/initializers/e11y.rb
75
+ E11y.configure do |config|
76
+ config.adapters << E11y::Adapters::OpenTelemetryCollectorAdapter.new(
77
+ endpoint: ENV['OTEL_EXPORTER_OTLP_ENDPOINT'] || 'http://localhost:4318',
78
+ protocol: :http, # :http or :grpc
79
+ headers: {
80
+ 'X-API-Key' => ENV['OTEL_API_KEY']
81
+ },
82
+
83
+ # Signal types
84
+ export_logs: true, # E11y events → OTel Logs Signal
85
+ export_traces: true, # Spans from events → OTel Traces
86
+ export_metrics: false, # Use Yabeda for metrics (better)
87
+
88
+ # Batching
89
+ batch_size: 100,
90
+ flush_interval: 10.seconds,
91
+
92
+ # Compression
93
+ compression: :gzip,
94
+
95
+ # Retry
96
+ retry_enabled: true,
97
+ max_retries: 3
98
+ )
99
+ end
100
+
101
+ # Architecture:
102
+ # E11y Event → OTel Logs/Traces → OTel Collector → Multiple Backends
103
+ # ├─→ Jaeger (traces)
104
+ # ├─→ Loki (logs)
105
+ # ├─→ Prometheus (metrics)
106
+ # └─→ S3 (archive)
107
+ ```
108
+
109
+ ---
110
+
111
+ ### 2. Semantic Conventions Mapping
112
+
113
+ > **Implementation:** See [ADR-007 Section 4: Semantic Conventions](../ADR-007-opentelemetry-integration.md#4-semantic-conventions) for automatic field mapping across HTTP, DB, RPC, Messaging, and Exception patterns.
114
+
115
+ **Automatic field mapping to OTel standards:**
116
+ ```ruby
117
+ # E11y event (your fields)
118
+ Events::HttpRequest.track(
119
+ method: 'POST',
120
+ path: '/api/orders',
121
+ status_code: 201,
122
+ duration_ms: 45
123
+ )
124
+
125
+ # ↓ Automatic mapping ↓
126
+
127
+ # OTel Logs Signal (semantic conventions)
128
+ {
129
+ Timestamp: 1673520000000000000,
130
+ SeverityText: 'INFO',
131
+ SeverityNumber: 9,
132
+ Body: 'order.created',
133
+
134
+ # Resource attributes (service metadata)
135
+ Resource: {
136
+ 'service.name': 'api',
137
+ 'service.version': '1.0.0',
138
+ 'service.instance.id': 'pod-abc-123',
139
+ 'deployment.environment': 'production'
140
+ },
141
+
142
+ # Span context (trace correlation)
143
+ TraceId: 'abc123...',
144
+ SpanId: 'def456...',
145
+ TraceFlags: 1,
146
+
147
+ # Attributes (semantic conventions applied!)
148
+ Attributes: {
149
+ # HTTP semantic conventions
150
+ 'http.method': 'POST', # ← Mapped from 'method'
151
+ 'http.route': '/api/orders', # ← Mapped from 'path'
152
+ 'http.status_code': 201, # ← Mapped from 'status_code'
153
+ 'http.request.duration_ms': 45, # ← Mapped from 'duration_ms'
154
+
155
+ # Event metadata
156
+ 'event.name': 'order.created',
157
+ 'event.domain': 'order'
158
+ }
159
+ }
160
+ ```
161
+
162
+ **Supported Semantic Conventions:**
163
+ - ✅ HTTP (requests, routes, status codes)
164
+ - ✅ Database (queries, connections)
165
+ - ✅ RPC (gRPC, JSON-RPC)
166
+ - ✅ Messaging (queues, topics)
167
+ - ✅ FaaS (serverless functions)
168
+ - ✅ Exceptions (errors, stack traces)
169
+
170
+ ---
171
+
172
+ ### 3. Automatic Span Creation
173
+
174
+ > **Implementation:** See [ADR-007 Section 6: Traces Signal Export](../ADR-007-opentelemetry-integration.md#6-traces-signal-export) for automatic span creation rules and parent-child relationships.
175
+
176
+ **Create spans from E11y events:**
177
+ ```ruby
178
+ # config/initializers/e11y.rb
179
+ E11y.configure do |config|
180
+ config.opentelemetry do
181
+ # Create spans for errors (for distributed tracing)
182
+ create_spans_for severity: [:error, :fatal],
183
+ span_kind: :internal
184
+
185
+ # HTTP requests already have spans (from OTel auto-instrumentation)
186
+ # But business events need spans too!
187
+ create_spans_for pattern: 'order.*',
188
+ span_kind: :internal,
189
+ span_name: ->(event) { event.event_name }
190
+ end
191
+ end
192
+
193
+ # Usage: Automatic span creation!
194
+ Events::OrderProcessingStarted.track(
195
+ order_id: '123',
196
+ severity: :info
197
+ )
198
+
199
+ # Result in Jaeger:
200
+ # Parent Span: POST /api/orders (from OTel auto-instrumentation)
201
+ # └─ Child Span: order.processing.started (from E11y event)
202
+ # └─ Child Span: payment.captured (from E11y event)
203
+ # └─ Child Span: shipment.scheduled (from E11y event)
204
+ ```
205
+
206
+ ---
207
+
208
+ ### 4. W3C Trace Context Integration
209
+
210
+ > **Implementation:** See [ADR-007 Section 8: Trace Context Integration](../ADR-007-opentelemetry-integration.md#8-trace-context-integration) for OTel SDK as primary trace context source.
211
+
212
+ **Automatic trace context from OpenTelemetry SDK:**
213
+ ```ruby
214
+ # E11y automatically uses OTel trace context
215
+ require 'opentelemetry/sdk'
216
+
217
+ OpenTelemetry::SDK.configure do |c|
218
+ c.service_name = 'api'
219
+ c.use_all # Auto-instrumentation
220
+ end
221
+
222
+ # E11y automatically detects OTel context!
223
+ E11y.configure do |config|
224
+ config.trace_context do
225
+ source :opentelemetry # Use OTel SDK (automatic!)
226
+ end
227
+ end
228
+
229
+ # Now all E11y events have OTel trace context:
230
+ Events::OrderCreated.track(order_id: '123')
231
+
232
+ # Event includes:
233
+ # - trace_id: from OpenTelemetry::Trace.current_span.context.trace_id
234
+ # - span_id: from OpenTelemetry::Trace.current_span.context.span_id
235
+ # - trace_flags: from OpenTelemetry::Trace.current_span.context.trace_flags
236
+ # → Can correlate with OTel traces in Jaeger!
237
+ ```
238
+
239
+ ---
240
+
241
+ ### 5. OTel Logs Signal Export
242
+
243
+ > **Implementation:** See [ADR-007 Section 5: Logs Signal Export](../ADR-007-opentelemetry-integration.md#5-logs-signal-export) for OTLP JSON format and trace correlation details.
244
+
245
+ **Export E11y events as OpenTelemetry Logs:**
246
+ ```ruby
247
+ # config/initializers/e11y.rb
248
+ E11y.configure do |config|
249
+ config.opentelemetry do
250
+ # Export to OTel Logs Signal (OTLP)
251
+ export_logs true
252
+
253
+ # Map severity levels
254
+ severity_mapping do
255
+ debug -> OpenTelemetry::SDK::Logs::SeverityNumber::DEBUG
256
+ info -> OpenTelemetry::SDK::Logs::SeverityNumber::INFO
257
+ warn -> OpenTelemetry::SDK::Logs::SeverityNumber::WARN
258
+ error -> OpenTelemetry::SDK::Logs::SeverityNumber::ERROR
259
+ fatal -> OpenTelemetry::SDK::Logs::SeverityNumber::FATAL
260
+ success -> OpenTelemetry::SDK::Logs::SeverityNumber::INFO # Custom!
261
+ end
262
+
263
+ # Resource attributes (service metadata)
264
+ resource_attributes do
265
+ 'service.name' ENV['SERVICE_NAME']
266
+ 'service.version' ENV['GIT_SHA']
267
+ 'deployment.environment' Rails.env
268
+ 'host.name' Socket.gethostname
269
+ end
270
+ end
271
+ end
272
+
273
+ # Every E11y event → OTel Logs Signal
274
+ Events::OrderCreated.track(order_id: '123')
275
+
276
+ # ↓ Exported as ↓
277
+
278
+ # OpenTelemetry Log Record
279
+ {
280
+ Timestamp: 1673520000000000000,
281
+ ObservedTimestamp: 1673520000000000000,
282
+ SeverityText: 'INFO',
283
+ SeverityNumber: 9,
284
+ Body: {
285
+ event_name: 'order.created',
286
+ order_id: '123'
287
+ },
288
+ TraceId: 'abc...',
289
+ SpanId: 'def...',
290
+ Resource: { ... },
291
+ Attributes: { ... }
292
+ }
293
+ ```
294
+
295
+ ---
296
+
297
+ ### 6. Baggage PII Protection (C08 Resolution) ⚠️ CRITICAL
298
+
299
+ > **⚠️ CRITICAL: C08 Conflict Resolution - PII Leaking via OpenTelemetry Baggage**
300
+ > **See:** [ADR-006 Section 5.5](../ADR-006-security-compliance.md#55-opentelemetry-baggage-pii-protection-c08-resolution--critical) for detailed architecture and GDPR compliance rationale.
301
+ > **Problem:** OpenTelemetry Baggage propagates data via HTTP headers (`baggage: key1=value1,key2=value2`), bypassing E11y's PII filtering. If a developer accidentally adds PII to baggage, it leaks across all services.
302
+ > **Solution:** Block ALL baggage keys by default, allow ONLY safe keys via allowlist.
303
+
304
+ **The Problem - PII Leaking via HTTP Headers:**
305
+
306
+ OpenTelemetry Baggage is a W3C standard for propagating key-value pairs across distributed traces. However, it bypasses ALL security controls:
307
+
308
+ ```ruby
309
+ # ❌ DANGER: PII in baggage leaks via HTTP headers
310
+ # Service A:
311
+ OpenTelemetry::Baggage.set_value('user_email', 'user@example.com')
312
+ OpenTelemetry::Baggage.set_value('ip_address', '192.168.1.100')
313
+
314
+ # HTTP call to Service B includes:
315
+ # baggage: user_email=user@example.com,ip_address=192.168.1.100
316
+ # ↑ PII transmitted in PLAIN TEXT via HTTP headers!
317
+ # ↑ Bypasses E11y PII filtering entirely!
318
+
319
+ # Problems:
320
+ # 1. ❌ GDPR violation - PII transmitted without consent
321
+ # 2. ❌ Security risk - PII visible in HTTP logs, proxies, CDNs
322
+ # 3. ❌ Audit risk - No record of PII transmission
323
+ # 4. ❌ Compliance risk - PII leaves your infrastructure without controls
324
+ ```
325
+
326
+ **The Solution - Allowlist-Only Baggage:**
327
+
328
+ E11y blocks ALL baggage keys by default, allowing ONLY safe keys (no PII):
329
+
330
+ ```ruby
331
+ # config/initializers/e11y.rb
332
+ E11y.configure do |config|
333
+ config.security.baggage_protection do
334
+ enabled true # ✅ CRITICAL: Always enable in production
335
+
336
+ # Allowlist: ONLY these keys are safe
337
+ allowed_keys [
338
+ 'trace_id', # ✅ Safe: Correlation ID
339
+ 'span_id', # ✅ Safe: Trace context
340
+ 'environment', # ✅ Safe: Deployment context
341
+ 'version', # ✅ Safe: Service version
342
+ 'service_name', # ✅ Safe: Service identifier
343
+ 'request_id', # ✅ Safe: Request identifier
344
+ # Custom safe keys (no PII!):
345
+ 'feature_flag_id', # ✅ Safe: Feature flag name
346
+ 'ab_test_variant' # ✅ Safe: A/B test group
347
+ ]
348
+
349
+ # Block mode: What happens when PII detected?
350
+ block_mode :silent # Options: :silent (log), :warn (log+warn), :raise (exception)
351
+
352
+ # Monitoring: Track violations
353
+ on_blocked_key do |key, value, caller_location|
354
+ Yabeda.e11y_baggage_pii_blocked.increment(
355
+ key: key,
356
+ service: ENV['SERVICE_NAME']
357
+ )
358
+ end
359
+ end
360
+ end
361
+ ```
362
+
363
+ **Usage Examples:**
364
+
365
+ **❌ BLOCKED: PII Keys (Not in Allowlist)**
366
+
367
+ ```ruby
368
+ # Service A:
369
+ OpenTelemetry::Baggage.set_value('user_email', 'user@example.com')
370
+ # → BLOCKED ❌ (not in allowlist)
371
+ # → Logged: "[E11y] Blocked PII from OpenTelemetry baggage: key='user_email'"
372
+
373
+ OpenTelemetry::Baggage.set_value('ip_address', '192.168.1.100')
374
+ # → BLOCKED ❌ (not in allowlist)
375
+
376
+ OpenTelemetry::Baggage.set_value('session_id', 'abc123')
377
+ # → BLOCKED ❌ (not in allowlist)
378
+
379
+ # HTTP call to Service B:
380
+ # baggage: (empty - all PII blocked!)
381
+ ```
382
+
383
+ **✅ ALLOWED: Safe Keys (In Allowlist)**
384
+
385
+ ```ruby
386
+ # Service A:
387
+ OpenTelemetry::Baggage.set_value('trace_id', 'abc123def456')
388
+ # → ALLOWED ✅
389
+
390
+ OpenTelemetry::Baggage.set_value('environment', 'production')
391
+ # → ALLOWED ✅
392
+
393
+ OpenTelemetry::Baggage.set_value('version', 'v2.1.0')
394
+ # → ALLOWED ✅
395
+
396
+ OpenTelemetry::Baggage.set_value('feature_flag_id', 'new_checkout_v2')
397
+ # → ALLOWED ✅
398
+
399
+ # HTTP call to Service B:
400
+ # baggage: trace_id=abc123def456,environment=production,version=v2.1.0,feature_flag_id=new_checkout_v2
401
+ # ✅ All safe keys propagated, no PII!
402
+ ```
403
+
404
+ **✅ ALTERNATIVE: Use Pseudonymized Identifiers**
405
+
406
+ If you need to propagate user context, use non-PII identifiers:
407
+
408
+ ```ruby
409
+ # ❌ BAD: PII in baggage
410
+ OpenTelemetry::Baggage.set_value('user_email', 'user@example.com')
411
+
412
+ # ✅ GOOD: Pseudonymized user identifier
413
+ OpenTelemetry::Baggage.set_value('user_id_hash', Digest::SHA256.hexdigest(user.email))
414
+ # → No PII, still allows correlation across services ✅
415
+ ```
416
+
417
+ **Strict Mode for Development:**
418
+
419
+ Fail fast in non-production environments:
420
+
421
+ ```ruby
422
+ # config/environments/development.rb
423
+ E11y.configure do |config|
424
+ config.security.baggage_protection do
425
+ enabled true
426
+ block_mode :raise # ← RAISE exception on blocked keys (fail fast)
427
+ allowed_keys E11y::Middleware::BaggageProtection::ALLOWED_KEYS
428
+ end
429
+ end
430
+
431
+ # Developer tries to set PII:
432
+ OpenTelemetry::Baggage.set_value('user_email', 'test@example.com')
433
+ # → RAISES BaggagePiiError:
434
+ # "Blocked PII from OpenTelemetry baggage: key='user_email'.
435
+ # Only allowed keys: trace_id, environment, version, ..."
436
+ # ✅ Catch PII leaks during development!
437
+ ```
438
+
439
+ **Why This Matters (GDPR Compliance):**
440
+
441
+ | GDPR Article | Requirement | How Baggage Protection Helps |
442
+ |--------------|-------------|------------------------------|
443
+ | **Art. 5(1)(c)** | Data minimisation | Only necessary metadata propagated |
444
+ | **Art. 5(1)(f)** | Integrity and confidentiality | PII cannot leak via trace context |
445
+ | **Art. 32** | Security of processing | Technical measure to prevent PII transmission |
446
+
447
+ **Monitoring:**
448
+
449
+ Track baggage protection effectiveness:
450
+
451
+ ```ruby
452
+ # Metrics (via Yabeda)
453
+ Yabeda.e11y_baggage_pii_blocked_total.increment(
454
+ key: 'user_email',
455
+ service: 'api-gateway'
456
+ )
457
+
458
+ # Alert on repeated violations (indicates developer training needed)
459
+ if Yabeda.e11y_baggage_pii_blocked_total.get > 100
460
+ Sentry.capture_message(
461
+ "High volume of baggage PII violations detected",
462
+ level: :warning
463
+ )
464
+ end
465
+ ```
466
+
467
+ ---
468
+
469
+ ## 💻 Implementation Examples
470
+
471
+ ### Example 1: OTel Collector Setup
472
+
473
+ ```yaml
474
+ # otel-collector-config.yaml
475
+ receivers:
476
+ otlp:
477
+ protocols:
478
+ http:
479
+ endpoint: 0.0.0.0:4318
480
+ grpc:
481
+ endpoint: 0.0.0.0:4317
482
+
483
+ processors:
484
+ # Batch for efficiency
485
+ batch:
486
+ timeout: 10s
487
+ send_batch_size: 100
488
+
489
+ # Add resource attributes
490
+ resource:
491
+ attributes:
492
+ - key: deployment.environment
493
+ value: production
494
+ action: insert
495
+
496
+ # Filter out debug logs in production
497
+ filter:
498
+ logs:
499
+ exclude:
500
+ match_type: strict
501
+ severity_texts: ['DEBUG', 'DEBUG2']
502
+
503
+ # Tail-based sampling (keep errors, sample success)
504
+ tail_sampling:
505
+ policies:
506
+ - name: errors-policy
507
+ type: status_code
508
+ status_code:
509
+ status_codes: [ERROR]
510
+ - name: sample-policy
511
+ type: probabilistic
512
+ probabilistic:
513
+ sampling_percentage: 10
514
+
515
+ exporters:
516
+ # Logs → Loki
517
+ loki:
518
+ endpoint: http://loki:3100/loki/api/v1/push
519
+ labels:
520
+ resource:
521
+ service.name: "service_name"
522
+ deployment.environment: "env"
523
+
524
+ # Traces → Jaeger
525
+ jaeger:
526
+ endpoint: jaeger:14250
527
+ tls:
528
+ insecure: true
529
+
530
+ # Metrics → Prometheus
531
+ prometheus:
532
+ endpoint: 0.0.0.0:8889
533
+
534
+ # Archive → S3
535
+ s3:
536
+ region: us-east-1
537
+ bucket: telemetry-archive
538
+ prefix: logs/
539
+
540
+ service:
541
+ pipelines:
542
+ logs:
543
+ receivers: [otlp]
544
+ processors: [batch, resource, filter]
545
+ exporters: [loki, s3]
546
+
547
+ traces:
548
+ receivers: [otlp]
549
+ processors: [batch, tail_sampling]
550
+ exporters: [jaeger]
551
+
552
+ metrics:
553
+ receivers: [otlp]
554
+ processors: [batch]
555
+ exporters: [prometheus]
556
+ ```
557
+
558
+ ```ruby
559
+ # config/initializers/e11y.rb
560
+ E11y.configure do |config|
561
+ config.adapters << E11y::Adapters::OpenTelemetryCollectorAdapter.new(
562
+ endpoint: 'http://otel-collector:4318',
563
+ protocol: :http,
564
+ export_logs: true,
565
+ export_traces: true
566
+ )
567
+ end
568
+
569
+ # Now all E11y events flow through OTel Collector!
570
+ # Benefits:
571
+ # - Centralized routing
572
+ # - Tail-based sampling
573
+ # - Multiple backends
574
+ # - Cost optimization
575
+ ```
576
+
577
+ ---
578
+
579
+ ### Example 2: Semantic Conventions for HTTP
580
+
581
+ ```ruby
582
+ # app/events/http_request.rb
583
+ module Events
584
+ class HttpRequest < E11y::Event::Base
585
+ # Enable OTel semantic conventions
586
+ use_otel_conventions :http
587
+
588
+ schema do
589
+ required(:method).filled(:string)
590
+ required(:route).filled(:string)
591
+ required(:status_code).filled(:integer)
592
+ required(:duration_ms).filled(:float)
593
+ optional(:request_size).filled(:integer)
594
+ optional(:response_size).filled(:integer)
595
+ end
596
+
597
+ # OTel mapping (automatic!)
598
+ otel_mapping do
599
+ 'http.method' from: :method
600
+ 'http.route' from: :route
601
+ 'http.status_code' from: :status_code
602
+ 'http.request.duration_ms' from: :duration_ms
603
+ 'http.request.body.size' from: :request_size
604
+ 'http.response.body.size' from: :response_size
605
+ end
606
+ end
607
+ end
608
+
609
+ # Usage: Just track the event!
610
+ Events::HttpRequest.track(
611
+ method: 'POST',
612
+ route: '/api/orders',
613
+ status_code: 201,
614
+ duration_ms: 45.2,
615
+ request_size: 1024,
616
+ response_size: 512
617
+ )
618
+
619
+ # OTel Collector receives:
620
+ # {
621
+ # Attributes: {
622
+ # 'http.method': 'POST',
623
+ # 'http.route': '/api/orders',
624
+ # 'http.status_code': 201,
625
+ # 'http.request.duration_ms': 45.2,
626
+ # 'http.request.body.size': 1024,
627
+ # 'http.response.body.size': 512
628
+ # }
629
+ # }
630
+
631
+ # Grafana query (works with OTel conventions!):
632
+ # {http.status_code="201"} | json
633
+ ```
634
+
635
+ ---
636
+
637
+ ### Example 3: Database Query Events
638
+
639
+ ```ruby
640
+ # app/events/database_query.rb
641
+ module Events
642
+ class DatabaseQuery < E11y::Event::Base
643
+ use_otel_conventions :database
644
+
645
+ schema do
646
+ required(:statement).filled(:string)
647
+ required(:duration_ms).filled(:float)
648
+ optional(:rows_affected).filled(:integer)
649
+ optional(:connection_id).filled(:string)
650
+ end
651
+
652
+ otel_mapping do
653
+ 'db.statement' from: :statement
654
+ 'db.operation.duration_ms' from: :duration_ms
655
+ 'db.operation.rows_affected' from: :rows_affected
656
+ 'db.connection.id' from: :connection_id
657
+ 'db.system' value: 'postgresql'
658
+ 'db.name' from_config: 'database.name'
659
+ end
660
+ end
661
+ end
662
+
663
+ # Usage
664
+ Events::DatabaseQuery.track(
665
+ statement: 'SELECT * FROM orders WHERE status = ?',
666
+ duration_ms: 12.5,
667
+ rows_affected: 145
668
+ )
669
+
670
+ # OTel attributes:
671
+ # {
672
+ # 'db.statement': 'SELECT * FROM orders WHERE status = ?',
673
+ # 'db.operation.duration_ms': 12.5,
674
+ # 'db.operation.rows_affected': 145,
675
+ # 'db.system': 'postgresql',
676
+ # 'db.name': 'production_db'
677
+ # }
678
+ ```
679
+
680
+ ---
681
+
682
+ ### Example 4: Automatic Span Creation from Events
683
+
684
+ ```ruby
685
+ # config/initializers/e11y.rb
686
+ E11y.configure do |config|
687
+ config.opentelemetry do
688
+ # Create spans for order processing events
689
+ create_spans_for pattern: 'order.*' do
690
+ span_kind :internal
691
+ span_name ->(event) { event.event_name }
692
+
693
+ # Span attributes from event payload
694
+ span_attributes do |event|
695
+ {
696
+ 'order.id' => event.payload[:order_id],
697
+ 'order.amount' => event.payload[:amount],
698
+ 'order.status' => event.payload[:status]
699
+ }
700
+ end
701
+
702
+ # Mark span as error if event severity is error
703
+ mark_error_if ->(event) { event.severity.in?([:error, :fatal]) }
704
+ end
705
+ end
706
+ end
707
+
708
+ # Usage: Track events, get spans automatically!
709
+ def process_order(order_id)
710
+ Events::OrderProcessingStarted.track(order_id: order_id)
711
+
712
+ Events::InventoryChecked.track(
713
+ order_id: order_id,
714
+ items_available: true
715
+ )
716
+
717
+ Events::PaymentCaptured.track(
718
+ order_id: order_id,
719
+ amount: 99.99
720
+ )
721
+
722
+ Events::OrderProcessingCompleted.track(
723
+ order_id: order_id,
724
+ severity: :success
725
+ )
726
+ end
727
+
728
+ # Result in Jaeger:
729
+ # Trace: abc-123
730
+ # Span: order.processing.started (45ms)
731
+ # Span: inventory.checked (12ms)
732
+ # Span: payment.captured (180ms)
733
+ # Span: order.processing.completed (2ms)
734
+ # → Complete distributed trace from E11y events!
735
+ ```
736
+
737
+ ---
738
+
739
+ ### Example 5: Multi-Backend Routing via OTel Collector
740
+
741
+ ```ruby
742
+ # config/initializers/e11y.rb
743
+ E11y.configure do |config|
744
+ # Single adapter: OTel Collector
745
+ config.adapters = [
746
+ E11y::Adapters::OpenTelemetryCollectorAdapter.new(
747
+ endpoint: 'http://otel-collector:4318',
748
+ export_logs: true,
749
+ export_traces: true
750
+ )
751
+ ]
752
+
753
+ # OTel Collector handles routing to multiple backends!
754
+ # No need for multiple E11y adapters
755
+ end
756
+
757
+ # OTel Collector routes to:
758
+ # - Loki (logs, last 30 days)
759
+ # - Jaeger (traces, last 7 days)
760
+ # - S3 (archive, long-term storage)
761
+ # - Prometheus (metrics via remote write)
762
+
763
+ # Benefits:
764
+ # 1. Single integration point
765
+ # 2. Centralized sampling/filtering
766
+ # 3. Cost optimization (tail-based sampling)
767
+ # 4. Flexible routing (add backends without changing code)
768
+ ```
769
+
770
+ ---
771
+
772
+ ## 🔧 Configuration
773
+
774
+ ### Full Configuration
775
+
776
+ ```ruby
777
+ # config/initializers/e11y.rb
778
+ E11y.configure do |config|
779
+ config.opentelemetry do
780
+ # === BASIC ===
781
+ enabled true
782
+
783
+ # === COLLECTOR ===
784
+ collector do
785
+ endpoint ENV['OTEL_EXPORTER_OTLP_ENDPOINT'] || 'http://localhost:4318'
786
+ protocol :http # :http or :grpc
787
+
788
+ # Headers (for authentication)
789
+ headers do
790
+ 'X-API-Key' ENV['OTEL_API_KEY']
791
+ 'X-Tenant-ID' ENV['TENANT_ID']
792
+ end
793
+
794
+ # TLS
795
+ tls do
796
+ enabled true
797
+ ca_file '/path/to/ca.crt'
798
+ client_cert '/path/to/client.crt'
799
+ client_key '/path/to/client.key'
800
+ end
801
+
802
+ # Timeout
803
+ timeout connect: 5.seconds, write: 10.seconds, read: 10.seconds
804
+
805
+ # Retry
806
+ retry_enabled true
807
+ max_retries 3
808
+ retry_backoff initial: 1.second, max: 30.seconds, multiplier: 2
809
+
810
+ # Compression
811
+ compression :gzip # :none, :gzip
812
+
813
+ # Batching
814
+ batch_size 100
815
+ flush_interval 10.seconds
816
+ end
817
+
818
+ # === SIGNALS ===
819
+ signals do
820
+ # Logs Signal (E11y events → OTel Logs)
821
+ logs do
822
+ enabled true
823
+ include_body true
824
+ include_attributes true
825
+ max_attribute_length 4096
826
+ end
827
+
828
+ # Traces Signal (E11y events → OTel Spans)
829
+ traces do
830
+ enabled true
831
+ create_spans_for severity: [:error, :warn, :fatal]
832
+ create_spans_for pattern: 'order.*'
833
+ span_kind :internal # :internal, :server, :client
834
+ end
835
+
836
+ # Metrics Signal (disabled, use Yabeda instead)
837
+ metrics do
838
+ enabled false # Yabeda is better for metrics
839
+ end
840
+ end
841
+
842
+ # === SEMANTIC CONVENTIONS ===
843
+ semantic_conventions do
844
+ enabled true
845
+
846
+ # HTTP conventions
847
+ http do
848
+ map 'http.method' from: :method
849
+ map 'http.route' from: :path
850
+ map 'http.status_code' from: :status_code
851
+ map 'http.request.duration_ms' from: :duration_ms
852
+ end
853
+
854
+ # Database conventions
855
+ database do
856
+ map 'db.statement' from: :query
857
+ map 'db.operation.duration_ms' from: :duration_ms
858
+ map 'db.system' value: 'postgresql'
859
+ end
860
+
861
+ # Custom conventions
862
+ custom do
863
+ map 'business.order.id' from: :order_id
864
+ map 'business.user.segment' from: :user_segment
865
+ end
866
+ end
867
+
868
+ # === RESOURCE ATTRIBUTES ===
869
+ resource_attributes do
870
+ # Service identification (REQUIRED for OTel!)
871
+ 'service.name' ENV['SERVICE_NAME'] || 'api'
872
+ 'service.version' ENV['GIT_SHA'] || 'unknown'
873
+ 'service.instance.id' ENV['HOSTNAME'] || Socket.gethostname
874
+
875
+ # Deployment
876
+ 'deployment.environment' Rails.env.to_s
877
+ 'deployment.region' ENV['AWS_REGION']
878
+
879
+ # Host
880
+ 'host.name' Socket.gethostname
881
+ 'host.type' ENV['INSTANCE_TYPE']
882
+
883
+ # Container (if applicable)
884
+ 'container.id' ENV['CONTAINER_ID']
885
+ 'container.name' ENV['CONTAINER_NAME']
886
+
887
+ # Kubernetes (if applicable)
888
+ 'k8s.namespace.name' ENV['K8S_NAMESPACE']
889
+ 'k8s.pod.name' ENV['K8S_POD_NAME']
890
+ 'k8s.deployment.name' ENV['K8S_DEPLOYMENT']
891
+ end
892
+
893
+ # === TRACE CONTEXT ===
894
+ trace_context do
895
+ # Use OTel SDK for trace context (automatic!)
896
+ source :opentelemetry
897
+
898
+ # Fallback to E11y trace context if OTel not available
899
+ fallback_to_e11y true
900
+ end
901
+
902
+ # === SAMPLING ===
903
+ sampling do
904
+ # Parent-based (respect upstream sampling decision)
905
+ parent_based true
906
+
907
+ # Default sampler
908
+ default_sampler :always_on # :always_on, :always_off, :trace_id_ratio
909
+
910
+ # Ratio (if using :trace_id_ratio)
911
+ ratio 0.1 # 10% sampling
912
+ end
913
+ end
914
+ end
915
+ ```
916
+
917
+ ---
918
+
919
+ ## 📊 Benefits of OTel Collector
920
+
921
+ ### 1. Centralized Telemetry Pipeline
922
+ ```
923
+ ┌─────────────┐
924
+ │ E11y │─┐
925
+ └─────────────┘ │
926
+
927
+ ┌─────────────┐ │ ┌──────────────────┐
928
+ │ Rails.logger│─┼───→│ OTel Collector │─┐
929
+ └─────────────┘ │ └──────────────────┘ │
930
+ │ │
931
+ ┌─────────────┐ │ ├─→ Loki (logs)
932
+ │ Sidekiq │─┘ ├─→ Jaeger (traces)
933
+ └─────────────┘ ├─→ Prometheus (metrics)
934
+ ├─→ S3 (archive)
935
+ └─→ Datadog (optional)
936
+ ```
937
+
938
+ ### 2. Advanced Sampling
939
+ - **Tail-based sampling:** Keep all errors, sample success
940
+ - **Probabilistic sampling:** 10% of all traffic
941
+ - **Rate limiting:** Max 1000 spans/sec
942
+ - **Policy-based:** Different policies per service
943
+
944
+ ### 3. Cost Optimization
945
+ ```ruby
946
+ # Without OTel Collector:
947
+ # - 100% of events → Loki ($$$)
948
+ # - 100% of traces → Jaeger ($$$)
949
+
950
+ # With OTel Collector:
951
+ # - 10% sampled → Loki ($)
952
+ # - 100% errors → Loki (important!)
953
+ # - Tail sampling → 90% reduction
954
+ # → $68k/month → $6.8k/month (90% savings!)
955
+ ```
956
+
957
+ ### 4. Vendor Flexibility
958
+ ```yaml
959
+ # Easy to switch backends (just reconfigure OTel Collector)
960
+ # No code changes needed!
961
+
962
+ # Day 1: Use Jaeger
963
+ exporters:
964
+ jaeger:
965
+ endpoint: jaeger:14250
966
+
967
+ # Day 30: Switch to Grafana Tempo
968
+ exporters:
969
+ otlp/tempo:
970
+ endpoint: tempo:4317
971
+
972
+ # Day 60: Add Datadog too
973
+ exporters:
974
+ jaeger: { ... }
975
+ otlp/tempo: { ... }
976
+ datadog:
977
+ api:
978
+ key: ${DD_API_KEY}
979
+ ```
980
+
981
+ ---
982
+
983
+ ## 🧪 Testing
984
+
985
+ ```ruby
986
+ # spec/support/opentelemetry_helper.rb
987
+ RSpec.configure do |config|
988
+ config.before(:suite) do
989
+ # Setup in-memory OTel exporter for testing
990
+ OpenTelemetry::SDK.configure do |c|
991
+ c.service_name = 'test'
992
+ c.add_span_processor(
993
+ OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
994
+ OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter.new
995
+ )
996
+ )
997
+ end
998
+ end
999
+
1000
+ config.after(:each) do
1001
+ # Clear spans after each test
1002
+ OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter.reset
1003
+ end
1004
+ end
1005
+
1006
+ # spec/events/order_created_spec.rb
1007
+ RSpec.describe Events::OrderCreated do
1008
+ it 'creates OTel span' do
1009
+ # Track event
1010
+ Events::OrderCreated.track(order_id: '123')
1011
+
1012
+ # Get recorded spans
1013
+ spans = OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter.get_finished_spans
1014
+
1015
+ # Verify span created
1016
+ expect(spans.size).to eq(1)
1017
+
1018
+ span = spans.first
1019
+ expect(span.name).to eq('order.created')
1020
+ expect(span.kind).to eq(:internal)
1021
+ expect(span.attributes['order.id']).to eq('123')
1022
+ end
1023
+
1024
+ it 'includes trace context' do
1025
+ # Create parent span
1026
+ tracer = OpenTelemetry.tracer_provider.tracer('test')
1027
+ tracer.in_span('parent') do |parent_span|
1028
+ # Track event (should be child span)
1029
+ Events::OrderCreated.track(order_id: '123')
1030
+
1031
+ spans = OpenTelemetry::SDK::Trace::Export::InMemorySpanExporter.get_finished_spans
1032
+ child_span = spans.find { |s| s.name == 'order.created' }
1033
+
1034
+ # Verify parent-child relationship
1035
+ expect(child_span.parent_span_id).to eq(parent_span.context.span_id)
1036
+ expect(child_span.trace_id).to eq(parent_span.context.trace_id)
1037
+ end
1038
+ end
1039
+ end
1040
+ ```
1041
+
1042
+ ---
1043
+
1044
+ ## 💡 Best Practices
1045
+
1046
+ ### ✅ DO
1047
+
1048
+ **1. Use OTel Collector in production**
1049
+ ```ruby
1050
+ # ✅ GOOD: Central pipeline
1051
+ config.adapters = [
1052
+ E11y::Adapters::OpenTelemetryCollectorAdapter.new(
1053
+ endpoint: 'http://otel-collector:4318'
1054
+ )
1055
+ ]
1056
+
1057
+ # OTel Collector handles:
1058
+ # - Sampling
1059
+ # - Filtering
1060
+ # - Routing to multiple backends
1061
+ # - Cost optimization
1062
+ ```
1063
+
1064
+ **2. Use semantic conventions**
1065
+ ```ruby
1066
+ # ✅ GOOD: Standard field names
1067
+ module Events
1068
+ class HttpRequest < E11y::Event::Base
1069
+ use_otel_conventions :http
1070
+
1071
+ otel_mapping do
1072
+ 'http.method' from: :method # ← Standard!
1073
+ 'http.status_code' from: :status # ← Standard!
1074
+ end
1075
+ end
1076
+ end
1077
+ ```
1078
+
1079
+ **3. Enable trace context integration**
1080
+ ```ruby
1081
+ # ✅ GOOD: Use OTel SDK trace context
1082
+ config.trace_context do
1083
+ source :opentelemetry # Automatic correlation!
1084
+ end
1085
+ ```
1086
+
1087
+ ---
1088
+
1089
+ ### ❌ DON'T
1090
+
1091
+ **1. Don't bypass OTel Collector**
1092
+ ```ruby
1093
+ # ❌ BAD: Direct to backends (no sampling, no routing)
1094
+ config.adapters = [
1095
+ E11y::Adapters::JaegerAdapter.new(...),
1096
+ E11y::Adapters::LokiAdapter.new(...),
1097
+ E11y::Adapters::S3Adapter.new(...)
1098
+ ]
1099
+
1100
+ # ✅ GOOD: Through OTel Collector
1101
+ config.adapters = [
1102
+ E11y::Adapters::OpenTelemetryCollectorAdapter.new(...)
1103
+ ]
1104
+ ```
1105
+
1106
+ **2. Don't use custom field names**
1107
+ ```ruby
1108
+ # ❌ BAD: Non-standard fields
1109
+ Events::HttpRequest.track(
1110
+ verb: 'POST', # ← Should be 'method'
1111
+ code: 201 # ← Should be 'status_code'
1112
+ )
1113
+
1114
+ # ✅ GOOD: OTel semantic conventions
1115
+ Events::HttpRequest.track(
1116
+ method: 'POST',
1117
+ status_code: 201
1118
+ )
1119
+ ```
1120
+
1121
+ ---
1122
+
1123
+ ## 📚 Related Use Cases
1124
+
1125
+ - **[UC-006: Trace Context Management](./UC-006-trace-context-management.md)** - W3C Trace Context
1126
+ - **[UC-007: PII Filtering](./UC-007-pii-filtering.md)** - PII protection (baggage allowlist: C08)
1127
+ - **[UC-009: Multi-Service Tracing](./UC-009-multi-service-tracing.md)** - Distributed traces
1128
+ - **[UC-010: Background Job Tracking](./UC-010-background-job-tracking.md)** - Job tracing
1129
+
1130
+ ---
1131
+
1132
+ ## 🎯 Summary
1133
+
1134
+ ### OpenTelemetry Benefits
1135
+
1136
+ | Feature | Without OTel | With OTel |
1137
+ |---------|-------------|-----------|
1138
+ | **Backend Flexibility** | Locked to E11y adapters | Any OTel-compatible backend |
1139
+ | **Sampling** | Basic (E11y only) | Advanced (tail-based, policy-based) |
1140
+ | **Cost** | High (100% events) | Optimized (10-20% sampled) |
1141
+ | **Routing** | Code changes | Config-only |
1142
+ | **Standards** | E11y conventions | Industry-standard OTel |
1143
+ | **Trace Correlation** | Manual | Automatic (W3C) |
1144
+
1145
+ **Setup Time:**
1146
+ - Initial: 30-45 min (OTel Collector + E11y config)
1147
+ - Per event: 0 min (semantic conventions automatic!)
1148
+
1149
+ ---
1150
+
1151
+ **Document Version:** 1.0
1152
+ **Last Updated:** January 12, 2026
1153
+ **Status:** ✅ Complete