e11y 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +4 -0
  3. data/.rubocop.yml +69 -0
  4. data/CHANGELOG.md +26 -0
  5. data/CODE_OF_CONDUCT.md +64 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +179 -0
  8. data/Rakefile +37 -0
  9. data/benchmarks/run_all.rb +33 -0
  10. data/config/README.md +83 -0
  11. data/config/loki-local-config.yaml +35 -0
  12. data/config/prometheus.yml +15 -0
  13. data/docker-compose.yml +78 -0
  14. data/docs/00-ICP-AND-TIMELINE.md +483 -0
  15. data/docs/01-SCALE-REQUIREMENTS.md +858 -0
  16. data/docs/ADR-001-architecture.md +2617 -0
  17. data/docs/ADR-002-metrics-yabeda.md +1395 -0
  18. data/docs/ADR-003-slo-observability.md +3337 -0
  19. data/docs/ADR-004-adapter-architecture.md +2385 -0
  20. data/docs/ADR-005-tracing-context.md +1372 -0
  21. data/docs/ADR-006-security-compliance.md +4143 -0
  22. data/docs/ADR-007-opentelemetry-integration.md +1385 -0
  23. data/docs/ADR-008-rails-integration.md +1911 -0
  24. data/docs/ADR-009-cost-optimization.md +2993 -0
  25. data/docs/ADR-010-developer-experience.md +2166 -0
  26. data/docs/ADR-011-testing-strategy.md +1836 -0
  27. data/docs/ADR-012-event-evolution.md +958 -0
  28. data/docs/ADR-013-reliability-error-handling.md +2750 -0
  29. data/docs/ADR-014-event-driven-slo.md +1533 -0
  30. data/docs/ADR-015-middleware-order.md +1061 -0
  31. data/docs/ADR-016-self-monitoring-slo.md +1234 -0
  32. data/docs/API-REFERENCE-L28.md +914 -0
  33. data/docs/COMPREHENSIVE-CONFIGURATION.md +2366 -0
  34. data/docs/IMPLEMENTATION_NOTES.md +2804 -0
  35. data/docs/IMPLEMENTATION_PLAN.md +1971 -0
  36. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +586 -0
  37. data/docs/PLAN.md +148 -0
  38. data/docs/QUICK-START.md +934 -0
  39. data/docs/README.md +296 -0
  40. data/docs/design/00-memory-optimization.md +593 -0
  41. data/docs/guides/MIGRATION-L27-L28.md +692 -0
  42. data/docs/guides/PERFORMANCE-BENCHMARKS.md +434 -0
  43. data/docs/guides/README.md +44 -0
  44. data/docs/prd/01-overview-vision.md +440 -0
  45. data/docs/use_cases/README.md +119 -0
  46. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +813 -0
  47. data/docs/use_cases/UC-002-business-event-tracking.md +1953 -0
  48. data/docs/use_cases/UC-003-pattern-based-metrics.md +1627 -0
  49. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +728 -0
  50. data/docs/use_cases/UC-005-sentry-integration.md +759 -0
  51. data/docs/use_cases/UC-006-trace-context-management.md +905 -0
  52. data/docs/use_cases/UC-007-pii-filtering.md +2648 -0
  53. data/docs/use_cases/UC-008-opentelemetry-integration.md +1153 -0
  54. data/docs/use_cases/UC-009-multi-service-tracing.md +1043 -0
  55. data/docs/use_cases/UC-010-background-job-tracking.md +1018 -0
  56. data/docs/use_cases/UC-011-rate-limiting.md +1906 -0
  57. data/docs/use_cases/UC-012-audit-trail.md +2301 -0
  58. data/docs/use_cases/UC-013-high-cardinality-protection.md +2127 -0
  59. data/docs/use_cases/UC-014-adaptive-sampling.md +1940 -0
  60. data/docs/use_cases/UC-015-cost-optimization.md +735 -0
  61. data/docs/use_cases/UC-016-rails-logger-migration.md +785 -0
  62. data/docs/use_cases/UC-017-local-development.md +867 -0
  63. data/docs/use_cases/UC-018-testing-events.md +1081 -0
  64. data/docs/use_cases/UC-019-tiered-storage-migration.md +562 -0
  65. data/docs/use_cases/UC-020-event-versioning.md +708 -0
  66. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +956 -0
  67. data/docs/use_cases/UC-022-event-registry.md +648 -0
  68. data/docs/use_cases/backlog.md +226 -0
  69. data/e11y.gemspec +76 -0
  70. data/lib/e11y/adapters/adaptive_batcher.rb +207 -0
  71. data/lib/e11y/adapters/audit_encrypted.rb +239 -0
  72. data/lib/e11y/adapters/base.rb +580 -0
  73. data/lib/e11y/adapters/file.rb +224 -0
  74. data/lib/e11y/adapters/in_memory.rb +216 -0
  75. data/lib/e11y/adapters/loki.rb +333 -0
  76. data/lib/e11y/adapters/otel_logs.rb +203 -0
  77. data/lib/e11y/adapters/registry.rb +141 -0
  78. data/lib/e11y/adapters/sentry.rb +230 -0
  79. data/lib/e11y/adapters/stdout.rb +108 -0
  80. data/lib/e11y/adapters/yabeda.rb +370 -0
  81. data/lib/e11y/buffers/adaptive_buffer.rb +339 -0
  82. data/lib/e11y/buffers/base_buffer.rb +40 -0
  83. data/lib/e11y/buffers/request_scoped_buffer.rb +246 -0
  84. data/lib/e11y/buffers/ring_buffer.rb +267 -0
  85. data/lib/e11y/buffers.rb +14 -0
  86. data/lib/e11y/console.rb +122 -0
  87. data/lib/e11y/current.rb +48 -0
  88. data/lib/e11y/event/base.rb +894 -0
  89. data/lib/e11y/event/value_sampling_config.rb +84 -0
  90. data/lib/e11y/events/base_audit_event.rb +43 -0
  91. data/lib/e11y/events/base_payment_event.rb +33 -0
  92. data/lib/e11y/events/rails/cache/delete.rb +21 -0
  93. data/lib/e11y/events/rails/cache/read.rb +23 -0
  94. data/lib/e11y/events/rails/cache/write.rb +22 -0
  95. data/lib/e11y/events/rails/database/query.rb +45 -0
  96. data/lib/e11y/events/rails/http/redirect.rb +21 -0
  97. data/lib/e11y/events/rails/http/request.rb +26 -0
  98. data/lib/e11y/events/rails/http/send_file.rb +21 -0
  99. data/lib/e11y/events/rails/http/start_processing.rb +26 -0
  100. data/lib/e11y/events/rails/job/completed.rb +22 -0
  101. data/lib/e11y/events/rails/job/enqueued.rb +22 -0
  102. data/lib/e11y/events/rails/job/failed.rb +22 -0
  103. data/lib/e11y/events/rails/job/scheduled.rb +23 -0
  104. data/lib/e11y/events/rails/job/started.rb +22 -0
  105. data/lib/e11y/events/rails/log.rb +56 -0
  106. data/lib/e11y/events/rails/view/render.rb +23 -0
  107. data/lib/e11y/events.rb +18 -0
  108. data/lib/e11y/instruments/active_job.rb +201 -0
  109. data/lib/e11y/instruments/rails_instrumentation.rb +141 -0
  110. data/lib/e11y/instruments/sidekiq.rb +175 -0
  111. data/lib/e11y/logger/bridge.rb +205 -0
  112. data/lib/e11y/metrics/cardinality_protection.rb +172 -0
  113. data/lib/e11y/metrics/cardinality_tracker.rb +134 -0
  114. data/lib/e11y/metrics/registry.rb +234 -0
  115. data/lib/e11y/metrics/relabeling.rb +226 -0
  116. data/lib/e11y/metrics.rb +102 -0
  117. data/lib/e11y/middleware/audit_signing.rb +174 -0
  118. data/lib/e11y/middleware/base.rb +140 -0
  119. data/lib/e11y/middleware/event_slo.rb +167 -0
  120. data/lib/e11y/middleware/pii_filter.rb +266 -0
  121. data/lib/e11y/middleware/pii_filtering.rb +280 -0
  122. data/lib/e11y/middleware/rate_limiting.rb +214 -0
  123. data/lib/e11y/middleware/request.rb +163 -0
  124. data/lib/e11y/middleware/routing.rb +157 -0
  125. data/lib/e11y/middleware/sampling.rb +254 -0
  126. data/lib/e11y/middleware/slo.rb +168 -0
  127. data/lib/e11y/middleware/trace_context.rb +131 -0
  128. data/lib/e11y/middleware/validation.rb +118 -0
  129. data/lib/e11y/middleware/versioning.rb +132 -0
  130. data/lib/e11y/middleware.rb +12 -0
  131. data/lib/e11y/pii/patterns.rb +90 -0
  132. data/lib/e11y/pii.rb +13 -0
  133. data/lib/e11y/pipeline/builder.rb +155 -0
  134. data/lib/e11y/pipeline/zone_validator.rb +110 -0
  135. data/lib/e11y/pipeline.rb +12 -0
  136. data/lib/e11y/presets/audit_event.rb +65 -0
  137. data/lib/e11y/presets/debug_event.rb +34 -0
  138. data/lib/e11y/presets/high_value_event.rb +51 -0
  139. data/lib/e11y/presets.rb +19 -0
  140. data/lib/e11y/railtie.rb +138 -0
  141. data/lib/e11y/reliability/circuit_breaker.rb +216 -0
  142. data/lib/e11y/reliability/dlq/file_storage.rb +277 -0
  143. data/lib/e11y/reliability/dlq/filter.rb +117 -0
  144. data/lib/e11y/reliability/retry_handler.rb +207 -0
  145. data/lib/e11y/reliability/retry_rate_limiter.rb +117 -0
  146. data/lib/e11y/sampling/error_spike_detector.rb +225 -0
  147. data/lib/e11y/sampling/load_monitor.rb +161 -0
  148. data/lib/e11y/sampling/stratified_tracker.rb +92 -0
  149. data/lib/e11y/sampling/value_extractor.rb +82 -0
  150. data/lib/e11y/self_monitoring/buffer_monitor.rb +79 -0
  151. data/lib/e11y/self_monitoring/performance_monitor.rb +97 -0
  152. data/lib/e11y/self_monitoring/reliability_monitor.rb +146 -0
  153. data/lib/e11y/slo/event_driven.rb +150 -0
  154. data/lib/e11y/slo/tracker.rb +119 -0
  155. data/lib/e11y/version.rb +9 -0
  156. data/lib/e11y.rb +283 -0
  157. metadata +452 -0
@@ -0,0 +1,1385 @@
1
+ # ADR-007: OpenTelemetry Integration
2
+
3
+ **Status:** Draft
4
+ **Date:** January 13, 2026
5
+ **Covers:** UC-008 (OpenTelemetry Integration)
6
+ **Depends On:** ADR-001 (Core), ADR-005 (Tracing), ADR-004 (Adapters)
7
+ **Priority:** 🟡 Medium (v1.1+ enhancement)
8
+
9
+ ---
10
+
11
+ ## 📋 Table of Contents
12
+
13
+ 1. [Context & Problem](#1-context--problem)
14
+ 2. [Architecture Overview](#2-architecture-overview)
15
+ - 2.2. [Metrics Backend Selection (C03 Resolution)](#22-metrics-backend-selection-c03-resolution) ⚠️ CRITICAL
16
+ 3. [OTel Collector Adapter](#3-otel-collector-adapter)
17
+ 4. [Semantic Conventions](#4-semantic-conventions)
18
+ 5. [Logs Signal Export](#5-logs-signal-export)
19
+ 6. [Traces Signal Export](#6-traces-signal-export)
20
+ 7. [Resource Attributes](#7-resource-attributes)
21
+ 8. [Trace Context Integration](#8-trace-context-integration)
22
+ 9. [Testing Strategy](#9-testing-strategy)
23
+ 10. [Trade-offs](#10-trade-offs)
24
+
25
+ **Note:** Cardinality Protection (C04 Resolution) moved to [ADR-009 Cost Optimization §8](ADR-009-cost-optimization.md#8-cardinality-protection-c04-resolution) ⚠️
26
+
27
+ ---
28
+
29
+ ## 1. Context & Problem
30
+
31
+ ### 1.1. Problem Statement
32
+
33
+ **Telemetry Fragmentation:**
34
+
35
+ ```ruby
36
+ # ❌ CURRENT: Separate systems, manual integration
37
+ # - E11y events → Loki (custom adapter)
38
+ # - Rails logs → File/Stdout
39
+ # - Sidekiq → Redis logs
40
+ # - Traces → Manual instrumentation
41
+ # - Metrics → Yabeda → Prometheus
42
+
43
+ # Problems:
44
+ # 1. Multiple telemetry pipelines (5+ different systems)
45
+ # 2. No automatic correlation (logs ↔ traces ↔ metrics)
46
+ # 3. Different field naming conventions
47
+ # 4. Manual span creation from events
48
+ # 5. Can't use OTel Collector benefits (sampling, routing, filtering)
49
+ # 6. Vendor lock-in (custom adapters for each backend)
50
+ ```
51
+
52
+ **Missing OpenTelemetry Integration:**
53
+ - ❌ No OTel Logs Signal support (E11y → custom formats only)
54
+ - ❌ No automatic semantic conventions mapping
55
+ - ❌ No OTel Collector adapter (direct to backends only)
56
+ - ❌ No automatic span creation from events
57
+ - ❌ Manual trace context management
58
+ - ❌ Can't use OTel ecosystem tools (processors, exporters, samplers)
59
+
60
+ ### 1.2. Goals
61
+
62
+ **Primary Goals:**
63
+ - ✅ **OTel Collector Adapter** (OTLP HTTP/gRPC support)
64
+ - ✅ **Logs Signal Export** (E11y events → OTel Logs)
65
+ - ✅ **Semantic Conventions** (automatic field mapping)
66
+ - ✅ **Automatic Span Creation** (events → spans)
67
+ - ✅ **Trace Context Integration** (use OTel SDK trace context)
68
+ - ✅ **Resource Attributes** (service metadata)
69
+
70
+ **Non-Goals:**
71
+ - ❌ Replace Yabeda (metrics stay with Yabeda, better for Rails)
72
+ - ❌ Replace existing adapters (OTel is optional, v1.1+)
73
+ - ❌ OTel auto-instrumentation (already exists separately)
74
+
75
+ > **⚠️ NOTE (C03 Resolution):** OpenTelemetry is **optional** for E11y. **Yabeda is the default metrics backend** (see ADR-002). You can choose OpenTelemetry for metrics, but **not both simultaneously** to avoid double overhead. See [Metrics Backend Selection](#22-metrics-backend-selection-c03-resolution) and [CONFLICT-ANALYSIS.md C03](../researches/CONFLICT-ANALYSIS.md#c03-dual-metrics-collection-overhead).
76
+
77
+ ### 1.3. Success Metrics
78
+
79
+ | Metric | Target | Critical? |
80
+ |--------|--------|-----------|
81
+ | **OTel compatibility** | 100% OTLP spec | ✅ Yes |
82
+ | **Semantic conventions coverage** | HTTP, DB, RPC, Messaging | ✅ Yes |
83
+ | **Trace correlation** | 100% automatic | ✅ Yes |
84
+ | **Performance overhead** | <5% vs direct adapters | ✅ Yes |
85
+ | **Backend flexibility** | Any OTel-compatible | ✅ Yes |
86
+
87
+ ---
88
+
89
+ ## 2. Architecture Overview
90
+
91
+ ### 2.1. System Context
92
+
93
+ ```mermaid
94
+ C4Context
95
+ title OpenTelemetry Integration Context
96
+
97
+ Person(dev, "Developer", "Rails application")
98
+
99
+ System(e11y, "E11y Gem", "Tracks business events")
100
+
101
+ System_Ext(otel_collector, "OTel Collector", "Centralized telemetry pipeline")
102
+ System_Ext(loki, "Loki", "Logs storage")
103
+ System_Ext(jaeger, "Jaeger", "Traces storage")
104
+ System_Ext(prometheus, "Prometheus", "Metrics storage")
105
+ System_Ext(s3, "S3", "Long-term archive")
106
+
107
+ Rel(dev, e11y, "Tracks events", "E11y API")
108
+ Rel(e11y, otel_collector, "Exports", "OTLP HTTP/gRPC")
109
+ Rel(otel_collector, loki, "Routes logs", "Loki API")
110
+ Rel(otel_collector, jaeger, "Routes traces", "Jaeger API")
111
+ Rel(otel_collector, prometheus, "Routes metrics", "Remote Write")
112
+ Rel(otel_collector, s3, "Archives", "S3 API")
113
+
114
+ UpdateLayoutConfig($c4ShapeInRow="3", $c4BoundaryInRow="1")
115
+ ```
116
+
117
+ ### 2.2. Metrics Backend Selection (C03 Resolution) ⚠️ CRITICAL
118
+
119
+ **Reference:** [CONFLICT-ANALYSIS.md - C03: Dual Metrics Collection Overhead](../researches/CONFLICT-ANALYSIS.md#c03-dual-metrics-collection-overhead)
120
+
121
+ **Problem:** Running both Yabeda (ADR-002) and OpenTelemetry metrics simultaneously causes **double overhead** - every event increments counters in both systems, doubling CPU/memory usage and storage costs.
122
+
123
+ **Decision:** E11y supports **configurable metrics backend** - choose ONE:
124
+ 1. **`:yabeda`** (default) - Ruby-native, Prometheus, best for Rails
125
+ 2. **`:opentelemetry`** (optional) - Vendor-neutral, OTLP, multi-backend
126
+ 3. **`[:yabeda, :opentelemetry]`** (migration only) - Both enabled (⚠️ double overhead!)
127
+
128
+ **Configuration:**
129
+
130
+ ```ruby
131
+ # config/initializers/e11y.rb
132
+ E11y.configure do |config|
133
+ # Option 1: Yabeda only (DEFAULT, recommended for Rails)
134
+ config.metrics do
135
+ backend :yabeda # Prometheus via Yabeda
136
+ end
137
+
138
+ # Option 2: OpenTelemetry only (for OTLP backends)
139
+ # config.metrics do
140
+ # backend :opentelemetry # OTLP via OTel SDK
141
+ # end
142
+
143
+ # Option 3: Both (for migration period ONLY)
144
+ # config.metrics do
145
+ # backend [:yabeda, :opentelemetry] # ⚠️ DOUBLE OVERHEAD!
146
+ # end
147
+ end
148
+ ```
149
+
150
+ **Metrics Adapter Pattern:**
151
+
152
+ E11y uses an internal **Metrics Adapter** to abstract the backend:
153
+
154
+ ```ruby
155
+ # lib/e11y/metrics.rb
156
+ module E11y
157
+ module Metrics
158
+ class << self
159
+ # Unified API (backend-agnostic)
160
+ def increment(metric_name, tags = {}, by: 1)
161
+ backends.each do |backend|
162
+ case backend
163
+ when :yabeda
164
+ increment_yabeda(metric_name, tags, by)
165
+ when :opentelemetry
166
+ increment_opentelemetry(metric_name, tags, by)
167
+ end
168
+ end
169
+ end
170
+
171
+ def histogram(metric_name, value, tags = {})
172
+ backends.each do |backend|
173
+ case backend
174
+ when :yabeda
175
+ histogram_yabeda(metric_name, value, tags)
176
+ when :opentelemetry
177
+ histogram_opentelemetry(metric_name, value, tags)
178
+ end
179
+ end
180
+ end
181
+
182
+ private
183
+
184
+ def backends
185
+ Array(E11y.config.metrics.backend)
186
+ end
187
+
188
+ def increment_yabeda(metric_name, tags, by)
189
+ return unless defined?(Yabeda)
190
+
191
+ # Convert metric_name to Yabeda format
192
+ # e.g., 'events_total' → Yabeda.e11y_events_total
193
+ yabeda_metric = Yabeda.e11y.public_send(metric_name)
194
+ yabeda_metric.increment(tags, by: by)
195
+ rescue NameError => e
196
+ E11y.logger.warn "Yabeda metric not found: #{metric_name} (#{e.message})"
197
+ end
198
+
199
+ def increment_opentelemetry(metric_name, tags, by)
200
+ return unless defined?(OpenTelemetry)
201
+
202
+ # Convert to OpenTelemetry format
203
+ # e.g., 'events_total' → 'e11y.events.total'
204
+ otel_metric_name = "e11y.#{metric_name.to_s.tr('_', '.')}"
205
+
206
+ meter = OpenTelemetry.meter_provider.meter('e11y')
207
+ counter = meter.create_counter(otel_metric_name, unit: '1', description: 'E11y metric')
208
+ counter.add(by, attributes: tags)
209
+ end
210
+
211
+ def histogram_yabeda(metric_name, value, tags)
212
+ return unless defined?(Yabeda)
213
+
214
+ yabeda_metric = Yabeda.e11y.public_send(metric_name)
215
+ yabeda_metric.measure(tags, value)
216
+ end
217
+
218
+ def histogram_opentelemetry(metric_name, value, tags)
219
+ return unless defined?(OpenTelemetry)
220
+
221
+ otel_metric_name = "e11y.#{metric_name.to_s.tr('_', '.')}"
222
+
223
+ meter = OpenTelemetry.meter_provider.meter('e11y')
224
+ histogram = meter.create_histogram(otel_metric_name, unit: 'ms', description: 'E11y metric')
225
+ histogram.record(value, attributes: tags)
226
+ end
227
+ end
228
+ end
229
+ end
230
+ ```
231
+
232
+ **Usage in E11y (backend-agnostic):**
233
+
234
+ ```ruby
235
+ # lib/e11y/event.rb
236
+ class Event
237
+ def track
238
+ # Single call - backend determined by config
239
+ E11y::Metrics.increment('events_total', {
240
+ event_name: self.event_name,
241
+ severity: self.severity
242
+ })
243
+
244
+ # ... rest of tracking logic
245
+ end
246
+ end
247
+
248
+ # Depending on config.metrics.backend:
249
+ # - :yabeda → Yabeda.e11y_events_total.increment(...)
250
+ # - :opentelemetry → OpenTelemetry counter.add(...)
251
+ # - [:yabeda, :opentelemetry] → BOTH (double overhead!)
252
+ ```
253
+
254
+ **Warning System:**
255
+
256
+ ```ruby
257
+ # lib/e11y/config/metrics.rb
258
+ module E11y
259
+ module Config
260
+ class Metrics
261
+ attr_accessor :backend
262
+
263
+ def initialize
264
+ @backend = :yabeda # Default
265
+ end
266
+
267
+ def backend=(value)
268
+ @backend = value
269
+
270
+ # Warn if both backends enabled
271
+ if Array(value).size > 1
272
+ E11y.logger.warn do
273
+ "⚠️ Multiple metrics backends enabled: #{Array(value).join(', ')}. " \
274
+ "This causes DOUBLE OVERHEAD (CPU, memory, storage). " \
275
+ "Only use multiple backends during migration. " \
276
+ "See ADR-007 and CONFLICT-ANALYSIS.md C03."
277
+ end
278
+ end
279
+ end
280
+ end
281
+ end
282
+ end
283
+ ```
284
+
285
+ **Migration Guide (Yabeda → OpenTelemetry):**
286
+
287
+ ```ruby
288
+ # Step 1: Start with Yabeda (production)
289
+ config.metrics.backend = :yabeda
290
+
291
+ # Step 2: Enable both backends in staging (test OTLP pipeline)
292
+ config.metrics.backend = [:yabeda, :opentelemetry]
293
+ # ⚠️ Monitor: CPU/memory usage should ~2× (expected)
294
+
295
+ # Step 3: Validate OTLP metrics (Grafana dashboards work)
296
+ # Check: e11y.events.total (OTLP) matches e11y_events_total (Prometheus)
297
+
298
+ # Step 4: Switch to OpenTelemetry only in production
299
+ config.metrics.backend = :opentelemetry
300
+
301
+ # Step 5: Remove Yabeda gem dependency (cleanup)
302
+ # gem 'yabeda' - no longer needed
303
+ ```
304
+
305
+ **Performance Impact:**
306
+
307
+ ```ruby
308
+ # Benchmark: 10,000 events/sec
309
+ # Single backend (:yabeda OR :opentelemetry):
310
+ # - CPU: ~5% overhead
311
+ # - Memory: ~10 MB for metric buffers
312
+ # - Latency: +0.1ms per event
313
+
314
+ # Both backends ([:yabeda, :opentelemetry]):
315
+ # - CPU: ~10% overhead (2×)
316
+ # - Memory: ~20 MB (2×)
317
+ # - Latency: +0.2ms per event (2×)
318
+ # ⚠️ Only use during migration (1-2 weeks max)
319
+ ```
320
+
321
+ **Monitoring:**
322
+
323
+ ```ruby
324
+ # Track which backends are active
325
+ E11y::Metrics.gauge('e11y.metrics.backends_active',
326
+ Array(E11y.config.metrics.backend).size,
327
+ { backends: Array(E11y.config.metrics.backend).join(',') }
328
+ )
329
+
330
+ # Alert if multiple backends enabled in production
331
+ # Alert: e11y_metrics_backends_active{env="production"} > 1
332
+ ```
333
+
334
+ **Trade-offs:**
335
+
336
+ | Aspect | Yabeda (default) | OpenTelemetry | Both (migration) |
337
+ |--------|------------------|---------------|------------------|
338
+ | **Performance** | Fast (Ruby-native) | Slightly slower (SDK overhead) | 2× overhead ⚠️ |
339
+ | **Ecosystem** | Rails/Ruby best fit | Vendor-neutral | N/A |
340
+ | **Backend** | Prometheus only | Any OTLP backend | Prometheus + OTLP |
341
+ | **Setup** | Simple (gem install) | Requires OTel Collector | Complex |
342
+ | **Use case** | Rails apps, Prometheus | Multi-language, cloud-native | Migration period only |
343
+
344
+ **Recommendation:**
345
+ - **Rails apps with Prometheus:** Use `:yabeda` (default)
346
+ - **Cloud-native, multi-backend:** Use `:opentelemetry`
347
+ - **Migration period:** Use `[:yabeda, :opentelemetry]` for 1-2 weeks max
348
+
349
+ ### 2.3. Component Architecture
350
+
351
+ ```mermaid
352
+ graph TB
353
+ subgraph "E11y Gem"
354
+ Event[Event Tracked] --> Pipeline[Processing Pipeline]
355
+ Pipeline --> OTelAdapter[OTel Collector Adapter]
356
+
357
+ OTelAdapter --> LogsExporter[Logs Signal Exporter]
358
+ OTelAdapter --> TracesExporter[Traces Signal Exporter]
359
+
360
+ subgraph "Semantic Conventions"
361
+ LogsExporter --> HTTPConv[HTTP Conventions]
362
+ LogsExporter --> DBConv[Database Conventions]
363
+ LogsExporter --> RPCConv[RPC Conventions]
364
+ LogsExporter --> MsgConv[Messaging Conventions]
365
+ end
366
+
367
+ subgraph "Trace Context"
368
+ TracesExporter --> OTelSDK[OTel SDK Integration]
369
+ TracesExporter --> W3CContext[W3C Trace Context]
370
+ TracesExporter --> SpanCreator[Automatic Span Creator]
371
+ end
372
+
373
+ subgraph "Resource Attributes"
374
+ OTelAdapter --> ServiceAttrs[Service Metadata]
375
+ OTelAdapter --> DeploymentAttrs[Deployment Info]
376
+ OTelAdapter --> HostAttrs[Host/Container Info]
377
+ end
378
+ end
379
+
380
+ subgraph "OpenTelemetry Collector"
381
+ OTelCollector[Receiver: OTLP] --> Processors[Processors]
382
+ Processors --> Batch[Batch]
383
+ Processors --> Filter[Filter]
384
+ Processors --> Sampling[Tail Sampling]
385
+ Processors --> Resource[Resource]
386
+
387
+ Sampling --> Exporters[Exporters]
388
+ Exporters --> Loki[Loki]
389
+ Exporters --> Jaeger[Jaeger]
390
+ Exporters --> S3[S3]
391
+ end
392
+
393
+ LogsExporter --> OTelCollector
394
+ TracesExporter --> OTelCollector
395
+
396
+ style OTelAdapter fill:#d1ecf1
397
+ style OTelCollector fill:#fff3cd
398
+ style HTTPConv fill:#d4edda
399
+ ```
400
+
401
+ ### 2.4. Data Flow Sequence
402
+
403
+ ```mermaid
404
+ sequenceDiagram
405
+ participant App as Rails App
406
+ participant E11y as E11y Event
407
+ participant Mapper as Semantic Mapper
408
+ participant Exporter as OTel Exporter
409
+ participant Collector as OTel Collector
410
+ participant Backend as Backend (Loki/Jaeger)
411
+
412
+ App->>E11y: Events::HttpRequest.track(method, status)
413
+ E11y->>Mapper: Map to semantic conventions
414
+ Mapper->>Mapper: method → http.method<br/>status → http.status_code
415
+ Mapper->>Exporter: OTel Log Record
416
+
417
+ Note over Exporter: Add Resource Attributes<br/>service.name, service.version
418
+
419
+ Exporter->>Exporter: Get trace context from OTel SDK
420
+ Exporter->>Exporter: Batch (100 events or 10s)
421
+ Exporter->>Collector: POST /v1/logs (OTLP HTTP)
422
+
423
+ Collector->>Collector: Process: Batch, Filter, Sample
424
+ Collector->>Backend: Export to Loki/Jaeger
425
+ Backend-->>Collector: ACK
426
+ Collector-->>Exporter: 200 OK
427
+ ```
428
+
429
+ ---
430
+
431
+ ## 3. OTel Collector Adapter
432
+
433
+ ### 3.1. Adapter Implementation
434
+
435
+ ```ruby
436
+ # lib/e11y/adapters/opentelemetry_collector.rb
437
+ module E11y
438
+ module Adapters
439
+ class OpenTelemetryCollector < Base
440
+ def initialize(config = {})
441
+ super(name: :opentelemetry_collector)
442
+
443
+ @endpoint = config[:endpoint] || ENV['OTEL_EXPORTER_OTLP_ENDPOINT'] || 'http://localhost:4318'
444
+ @protocol = config[:protocol] || :http # :http or :grpc
445
+ @headers = config[:headers] || {}
446
+ @timeout = config[:timeout] || 10
447
+ @compression = config[:compression] || :gzip # :none, :gzip
448
+
449
+ # Signal types
450
+ @export_logs = config[:export_logs] != false
451
+ @export_traces = config[:export_traces] || false
452
+ @export_metrics = config[:export_metrics] || false
453
+
454
+ # Batching
455
+ @batch_size = config[:batch_size] || 100
456
+ @flush_interval = config[:flush_interval] || 10
457
+
458
+ # Resource attributes (cached once)
459
+ @resource_attributes = build_resource_attributes(config[:resource_attributes] || {})
460
+
461
+ # HTTP client (Faraday with connection pooling)
462
+ @http_client = build_http_client
463
+ end
464
+
465
+ def send_batch(events)
466
+ results = {}
467
+
468
+ # Export logs (most common)
469
+ if @export_logs
470
+ log_records = events.map { |event| to_otel_log_record(event) }
471
+ results[:logs] = export_logs(log_records)
472
+ end
473
+
474
+ # Export traces (spans from events)
475
+ if @export_traces
476
+ spans = events.select { |e| should_create_span?(e) }
477
+ .map { |event| to_otel_span(event) }
478
+ results[:traces] = export_traces(spans) if spans.any?
479
+ end
480
+
481
+ {
482
+ success: results.values.all? { |r| r[:success] },
483
+ sent: events.size,
484
+ results: results
485
+ }
486
+ rescue => error
487
+ {
488
+ success: false,
489
+ error: error.message,
490
+ sent: 0
491
+ }
492
+ end
493
+
494
+ private
495
+
496
+ # === OTLP HTTP Export ===
497
+
498
+ def export_logs(log_records)
499
+ payload = {
500
+ resourceLogs: [{
501
+ resource: {
502
+ attributes: @resource_attributes
503
+ },
504
+ scopeLogs: [{
505
+ scope: {
506
+ name: 'e11y',
507
+ version: E11y::VERSION
508
+ },
509
+ logRecords: log_records
510
+ }]
511
+ }]
512
+ }
513
+
514
+ send_otlp_request('/v1/logs', payload)
515
+ end
516
+
517
+ def export_traces(spans)
518
+ payload = {
519
+ resourceSpans: [{
520
+ resource: {
521
+ attributes: @resource_attributes
522
+ },
523
+ scopeSpans: [{
524
+ scope: {
525
+ name: 'e11y',
526
+ version: E11y::VERSION
527
+ },
528
+ spans: spans
529
+ }]
530
+ }]
531
+ }
532
+
533
+ send_otlp_request('/v1/traces', payload)
534
+ end
535
+
536
+ def send_otlp_request(path, payload)
537
+ response = @http_client.post do |req|
538
+ req.url path
539
+ req.headers['Content-Type'] = 'application/json'
540
+ req.headers['Content-Encoding'] = 'gzip' if @compression == :gzip
541
+ @headers.each { |k, v| req.headers[k] = v }
542
+
543
+ body = payload.to_json
544
+ req.body = @compression == :gzip ? compress_gzip(body) : body
545
+ end
546
+
547
+ {
548
+ success: response.success?,
549
+ status: response.status,
550
+ sent: payload.dig(:resourceLogs, 0, :scopeLogs, 0, :logRecords)&.size || 0
551
+ }
552
+ rescue => error
553
+ { success: false, error: error.message, sent: 0 }
554
+ end
555
+
556
+ # === OTel Log Record Conversion ===
557
+
558
+ def to_otel_log_record(event)
559
+ {
560
+ timeUnixNano: time_to_unix_nano(event[:timestamp]),
561
+ observedTimeUnixNano: time_to_unix_nano(Time.now),
562
+ severityNumber: map_severity_to_otel(event[:severity]),
563
+ severityText: event[:severity].to_s.upcase,
564
+ body: {
565
+ stringValue: event[:event_name]
566
+ },
567
+ attributes: build_log_attributes(event),
568
+ traceId: encode_trace_id(event[:trace_id]),
569
+ spanId: encode_span_id(event[:span_id]),
570
+ flags: event[:trace_flags] || 0
571
+ }.compact
572
+ end
573
+
574
+ def build_log_attributes(event)
575
+ attributes = []
576
+
577
+ # Semantic conventions mapping
578
+ mapped_payload = E11y::OpenTelemetry::SemanticConventions.map(
579
+ event[:event_name],
580
+ event[:payload]
581
+ )
582
+
583
+ # Convert to OTel key-value pairs
584
+ mapped_payload.each do |key, value|
585
+ attributes << {
586
+ key: key.to_s,
587
+ value: encode_otel_value(value)
588
+ }
589
+ end
590
+
591
+ # Add event metadata
592
+ attributes << { key: 'event.name', value: { stringValue: event[:event_name] } }
593
+ attributes << { key: 'event.domain', value: { stringValue: event[:domain] } } if event[:domain]
594
+
595
+ attributes
596
+ end
597
+
598
+ # === OTel Span Conversion ===
599
+
600
+ def to_otel_span(event)
601
+ start_time = time_to_unix_nano(event[:timestamp])
602
+ end_time = event[:duration_ms] ?
603
+ start_time + (event[:duration_ms] * 1_000_000).to_i :
604
+ start_time + 1_000_000 # 1ms default
605
+
606
+ {
607
+ traceId: encode_trace_id(event[:trace_id]),
608
+ spanId: encode_span_id(event[:span_id]),
609
+ parentSpanId: encode_span_id(event[:parent_span_id]),
610
+ name: event[:event_name],
611
+ kind: span_kind_to_otel(event[:span_kind] || :internal),
612
+ startTimeUnixNano: start_time,
613
+ endTimeUnixNano: end_time,
614
+ attributes: build_span_attributes(event),
615
+ status: build_span_status(event)
616
+ }.compact
617
+ end
618
+
619
+ def build_span_attributes(event)
620
+ attributes = []
621
+
622
+ event[:payload].each do |key, value|
623
+ attributes << {
624
+ key: key.to_s,
625
+ value: encode_otel_value(value)
626
+ }
627
+ end
628
+
629
+ attributes
630
+ end
631
+
632
+ def build_span_status(event)
633
+ if event[:severity].in?([:error, :fatal])
634
+ {
635
+ code: 2, # STATUS_CODE_ERROR
636
+ message: event[:payload][:error_message] || 'Error'
637
+ }
638
+ else
639
+ {
640
+ code: 1 # STATUS_CODE_OK
641
+ }
642
+ end
643
+ end
644
+
645
+ # === Resource Attributes ===
646
+
647
+ def build_resource_attributes(custom_attrs)
648
+ attributes = []
649
+
650
+ # Service (REQUIRED)
651
+ attributes << kv('service.name', ENV['SERVICE_NAME'] || 'api')
652
+ attributes << kv('service.version', ENV['GIT_SHA'] || 'unknown')
653
+ attributes << kv('service.instance.id', ENV['HOSTNAME'] || Socket.gethostname)
654
+
655
+ # Deployment
656
+ attributes << kv('deployment.environment', Rails.env.to_s)
657
+ attributes << kv('deployment.region', ENV['AWS_REGION']) if ENV['AWS_REGION']
658
+
659
+ # Host
660
+ attributes << kv('host.name', Socket.gethostname)
661
+ attributes << kv('host.type', ENV['INSTANCE_TYPE']) if ENV['INSTANCE_TYPE']
662
+
663
+ # Container (if applicable)
664
+ if ENV['CONTAINER_ID']
665
+ attributes << kv('container.id', ENV['CONTAINER_ID'])
666
+ attributes << kv('container.name', ENV['CONTAINER_NAME']) if ENV['CONTAINER_NAME']
667
+ end
668
+
669
+ # Kubernetes (if applicable)
670
+ if ENV['K8S_NAMESPACE']
671
+ attributes << kv('k8s.namespace.name', ENV['K8S_NAMESPACE'])
672
+ attributes << kv('k8s.pod.name', ENV['K8S_POD_NAME']) if ENV['K8S_POD_NAME']
673
+ attributes << kv('k8s.deployment.name', ENV['K8S_DEPLOYMENT']) if ENV['K8S_DEPLOYMENT']
674
+ end
675
+
676
+ # Custom attributes
677
+ custom_attrs.each do |key, value|
678
+ attributes << kv(key.to_s, value)
679
+ end
680
+
681
+ attributes
682
+ end
683
+
684
+ def kv(key, value)
685
+ {
686
+ key: key,
687
+ value: encode_otel_value(value)
688
+ }
689
+ end
690
+
691
+ # === Helpers ===
692
+
693
+ def encode_otel_value(value)
694
+ case value
695
+ when String
696
+ { stringValue: value }
697
+ when Integer
698
+ { intValue: value }
699
+ when Float
700
+ { doubleValue: value }
701
+ when TrueClass, FalseClass
702
+ { boolValue: value }
703
+ when Array
704
+ { arrayValue: { values: value.map { |v| encode_otel_value(v) } } }
705
+ when Hash
706
+ { kvlistValue: { values: value.map { |k, v| { key: k.to_s, value: encode_otel_value(v) } } } }
707
+ else
708
+ { stringValue: value.to_s }
709
+ end
710
+ end
711
+
712
+ def time_to_unix_nano(time)
713
+ time = Time.parse(time) if time.is_a?(String)
714
+ (time.to_f * 1_000_000_000).to_i
715
+ end
716
+
717
+ def encode_trace_id(trace_id)
718
+ return nil unless trace_id
719
+ # W3C trace-id is 32 hex chars → 16 bytes → base64
720
+ [trace_id].pack('H*').unpack1('m0')
721
+ end
722
+
723
+ def encode_span_id(span_id)
724
+ return nil unless span_id
725
+ # W3C span-id is 16 hex chars → 8 bytes → base64
726
+ [span_id].pack('H*').unpack1('m0')
727
+ end
728
+
729
+ def map_severity_to_otel(severity)
730
+ # OTel Severity Numbers: https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber
731
+ case severity.to_sym
732
+ when :debug then 5 # DEBUG
733
+ when :info then 9 # INFO
734
+ when :success then 9 # INFO (custom severity)
735
+ when :warn then 13 # WARN
736
+ when :error then 17 # ERROR
737
+ when :fatal then 21 # FATAL
738
+ else 0 # UNSPECIFIED
739
+ end
740
+ end
741
+
742
+ def span_kind_to_otel(kind)
743
+ # OTel Span Kinds
744
+ case kind.to_sym
745
+ when :internal then 1
746
+ when :server then 2
747
+ when :client then 3
748
+ when :producer then 4
749
+ when :consumer then 5
750
+ else 0 # UNSPECIFIED
751
+ end
752
+ end
753
+
754
+ def should_create_span?(event)
755
+ # Create spans for errors by default
756
+ return true if event[:severity].in?([:error, :fatal])
757
+
758
+ # Check if event matches span creation patterns
759
+ E11y.config.opentelemetry.span_creation_patterns.any? do |pattern|
760
+ File.fnmatch(pattern, event[:event_name])
761
+ end
762
+ end
763
+
764
+ def build_http_client
765
+ Faraday.new(url: @endpoint) do |f|
766
+ f.request :retry, max: 3, interval: 1, backoff_factor: 2
767
+ f.adapter :net_http_persistent, pool_size: 5
768
+ f.options.timeout = @timeout
769
+ end
770
+ end
771
+
772
+ def compress_gzip(data)
773
+ require 'zlib'
774
+ io = StringIO.new
775
+ gz = Zlib::GzipWriter.new(io)
776
+ gz.write(data)
777
+ gz.close
778
+ io.string
779
+ end
780
+ end
781
+ end
782
+ end
783
+ ```
784
+
785
+ ---
786
+
787
+ ## 4. Semantic Conventions
788
+
789
+ ### 4.1. Semantic Conventions Mapper
790
+
791
+ ```ruby
792
+ # lib/e11y/opentelemetry/semantic_conventions.rb
793
+ module E11y
794
+ module OpenTelemetry
795
+ class SemanticConventions
796
+ # Semantic conventions registry
797
+ CONVENTIONS = {
798
+ # HTTP Semantic Conventions
799
+ # https://opentelemetry.io/docs/specs/semconv/http/
800
+ http: {
801
+ 'method' => 'http.method',
802
+ 'route' => 'http.route',
803
+ 'path' => 'http.target',
804
+ 'status_code' => 'http.status_code',
805
+ 'status' => 'http.status_code',
806
+ 'duration_ms' => 'http.server.duration',
807
+ 'request_size' => 'http.request.body.size',
808
+ 'response_size' => 'http.response.body.size',
809
+ 'user_agent' => 'http.user_agent',
810
+ 'client_ip' => 'http.client_ip',
811
+ 'scheme' => 'http.scheme',
812
+ 'host' => 'http.host',
813
+ 'server_name' => 'http.server_name'
814
+ },
815
+
816
+ # Database Semantic Conventions
817
+ # https://opentelemetry.io/docs/specs/semconv/database/
818
+ database: {
819
+ 'query' => 'db.statement',
820
+ 'statement' => 'db.statement',
821
+ 'duration_ms' => 'db.operation.duration',
822
+ 'rows_affected' => 'db.operation.rows_affected',
823
+ 'connection_id' => 'db.connection.id',
824
+ 'database_name' => 'db.name',
825
+ 'table_name' => 'db.sql.table',
826
+ 'operation' => 'db.operation'
827
+ },
828
+
829
+ # RPC/gRPC Semantic Conventions
830
+ # https://opentelemetry.io/docs/specs/semconv/rpc/
831
+ rpc: {
832
+ 'service' => 'rpc.service',
833
+ 'method' => 'rpc.method',
834
+ 'system' => 'rpc.system',
835
+ 'status_code' => 'rpc.grpc.status_code'
836
+ },
837
+
838
+ # Messaging Semantic Conventions
839
+ # https://opentelemetry.io/docs/specs/semconv/messaging/
840
+ messaging: {
841
+ 'queue_name' => 'messaging.destination.name',
842
+ 'message_id' => 'messaging.message.id',
843
+ 'conversation_id' => 'messaging.message.conversation_id',
844
+ 'payload_size' => 'messaging.message.payload_size_bytes',
845
+ 'operation' => 'messaging.operation'
846
+ },
847
+
848
+ # Exception Semantic Conventions
849
+ # https://opentelemetry.io/docs/specs/semconv/exceptions/
850
+ exception: {
851
+ 'error_type' => 'exception.type',
852
+ 'error_message' => 'exception.message',
853
+ 'error_class' => 'exception.type',
854
+ 'stacktrace' => 'exception.stacktrace'
855
+ }
856
+ }.freeze
857
+
858
+ def self.map(event_name, payload)
859
+ # Detect convention type from event name
860
+ convention_type = detect_convention_type(event_name)
861
+
862
+ return payload unless convention_type
863
+
864
+ # Map fields
865
+ mapped = {}
866
+ conventions = CONVENTIONS[convention_type]
867
+
868
+ payload.each do |key, value|
869
+ otel_key = conventions[key.to_s] || key.to_s
870
+ mapped[otel_key] = value
871
+ end
872
+
873
+ # Add system-specific attributes
874
+ mapped.merge!(system_attributes(convention_type))
875
+
876
+ mapped
877
+ end
878
+
879
+ def self.detect_convention_type(event_name)
880
+ case event_name
881
+ when /http|request|response/i
882
+ :http
883
+ when /database|query|sql|postgres|mysql/i
884
+ :database
885
+ when /rpc|grpc/i
886
+ :rpc
887
+ when /message|queue|kafka|rabbitmq|sidekiq|job/i
888
+ :messaging
889
+ when /error|exception|failure/i
890
+ :exception
891
+ else
892
+ nil # No convention
893
+ end
894
+ end
895
+
896
+ def self.system_attributes(convention_type)
897
+ case convention_type
898
+ when :database
899
+ { 'db.system' => 'postgresql' } # From config
900
+ when :rpc
901
+ { 'rpc.system' => 'grpc' }
902
+ when :messaging
903
+ { 'messaging.system' => 'sidekiq' }
904
+ else
905
+ {}
906
+ end
907
+ end
908
+ end
909
+ end
910
+ end
911
+ ```
912
+
913
+ ### 4.2. Event-Level Convention Declaration
914
+
915
+ ```ruby
916
+ # app/events/http_request.rb
917
+ module Events
918
+ class HttpRequest < E11y::Event::Base
919
+ # Declare OTel conventions for this event
920
+ use_otel_conventions :http
921
+
922
+ schema do
923
+ required(:method).filled(:string)
924
+ required(:route).filled(:string)
925
+ required(:status_code).filled(:integer)
926
+ required(:duration_ms).filled(:float)
927
+ end
928
+
929
+ # Optional: Custom OTel mapping
930
+ otel_mapping do
931
+ 'http.method' from: :method
932
+ 'http.route' from: :route
933
+ 'http.status_code' from: :status_code
934
+ 'http.server.duration' from: :duration_ms
935
+
936
+ # Static values
937
+ 'http.scheme' value: 'https'
938
+
939
+ # From config
940
+ 'http.server_name' from_config: 'server.name'
941
+ end
942
+ end
943
+ end
944
+
945
+ # Usage: Automatic mapping!
946
+ Events::HttpRequest.track(
947
+ method: 'POST',
948
+ route: '/api/orders',
949
+ status_code: 201,
950
+ duration_ms: 45.2
951
+ )
952
+
953
+ # → OTel Logs Signal receives:
954
+ # {
955
+ # Attributes: {
956
+ # 'http.method': 'POST',
957
+ # 'http.route': '/api/orders',
958
+ # 'http.status_code': 201,
959
+ # 'http.server.duration': 45.2,
960
+ # 'http.scheme': 'https'
961
+ # }
962
+ # }
963
+ ```
964
+
965
+ ---
966
+
967
+ ## 5. Logs Signal Export
968
+
969
+ ### 5.1. OTel Log Record Structure
970
+
971
+ ```ruby
972
+ # OTel Logs Signal format (OTLP JSON)
973
+ {
974
+ resourceLogs: [{
975
+ resource: {
976
+ attributes: [
977
+ { key: 'service.name', value: { stringValue: 'api' } },
978
+ { key: 'service.version', value: { stringValue: 'abc123' } },
979
+ { key: 'deployment.environment', value: { stringValue: 'production' } }
980
+ ]
981
+ },
982
+ scopeLogs: [{
983
+ scope: {
984
+ name: 'e11y',
985
+ version: '1.0.0'
986
+ },
987
+ logRecords: [
988
+ {
989
+ timeUnixNano: 1673520000000000000,
990
+ observedTimeUnixNano: 1673520000000000000,
991
+ severityNumber: 9, # INFO
992
+ severityText: 'INFO',
993
+ body: {
994
+ stringValue: 'Events::OrderCreated'
995
+ },
996
+ attributes: [
997
+ { key: 'order.id', value: { stringValue: '123' } },
998
+ { key: 'order.amount', value: { doubleValue: 99.99 } },
999
+ { key: 'event.name', value: { stringValue: 'Events::OrderCreated' } }
1000
+ ],
1001
+ traceId: 'YWJjMTIzZGVmNDU2', # Base64-encoded
1002
+ spanId: 'eHl6Nzg5', # Base64-encoded
1003
+ flags: 1
1004
+ }
1005
+ ]
1006
+ }]
1007
+ }]
1008
+ }
1009
+ ```
1010
+
1011
+ ---
1012
+
1013
+ ## 6. Traces Signal Export
1014
+
1015
+ ### 6.1. Automatic Span Creation
1016
+
1017
+ ```ruby
1018
+ # lib/e11y/opentelemetry/span_creator.rb
1019
+ module E11y
1020
+ module OpenTelemetry
1021
+ class SpanCreator
1022
+ def self.create_span_from_event(event)
1023
+ return unless should_create_span?(event)
1024
+
1025
+ tracer = ::OpenTelemetry.tracer_provider.tracer('e11y', E11y::VERSION)
1026
+
1027
+ # Get current span (parent)
1028
+ parent_context = ::OpenTelemetry::Trace.current_span.context
1029
+
1030
+ # Create child span
1031
+ span = tracer.start_span(
1032
+ event[:event_name],
1033
+ with_parent: parent_context,
1034
+ kind: span_kind(event),
1035
+ start_timestamp: time_to_timestamp(event[:timestamp])
1036
+ )
1037
+
1038
+ # Add attributes
1039
+ event[:payload].each do |key, value|
1040
+ span.set_attribute(key.to_s, value)
1041
+ end
1042
+
1043
+ # Mark as error if needed
1044
+ if event[:severity].in?([:error, :fatal])
1045
+ span.status = ::OpenTelemetry::Trace::Status.error(
1046
+ event[:payload][:error_message] || 'Error'
1047
+ )
1048
+ else
1049
+ span.status = ::OpenTelemetry::Trace::Status.ok
1050
+ end
1051
+
1052
+ # End span (with duration if available)
1053
+ end_timestamp = if event[:duration_ms]
1054
+ time_to_timestamp(event[:timestamp]) + (event[:duration_ms] * 1_000_000).to_i
1055
+ else
1056
+ time_to_timestamp(Time.now)
1057
+ end
1058
+
1059
+ span.finish(end_timestamp: end_timestamp)
1060
+
1061
+ span
1062
+ end
1063
+
1064
+ private
1065
+
1066
+ def self.should_create_span?(event)
1067
+ # Always create spans for errors
1068
+ return true if event[:severity].in?([:error, :fatal])
1069
+
1070
+ # Check configured patterns
1071
+ patterns = E11y.config.opentelemetry.span_creation_patterns || []
1072
+ patterns.any? { |pattern| File.fnmatch(pattern, event[:event_name]) }
1073
+ end
1074
+
1075
+ def self.span_kind(event)
1076
+ case event[:span_kind]
1077
+ when :server then ::OpenTelemetry::Trace::SpanKind::SERVER
1078
+ when :client then ::OpenTelemetry::Trace::SpanKind::CLIENT
1079
+ when :producer then ::OpenTelemetry::Trace::SpanKind::PRODUCER
1080
+ when :consumer then ::OpenTelemetry::Trace::SpanKind::CONSUMER
1081
+ else ::OpenTelemetry::Trace::SpanKind::INTERNAL
1082
+ end
1083
+ end
1084
+
1085
+ def self.time_to_timestamp(time)
1086
+ time = Time.parse(time) if time.is_a?(String)
1087
+ (time.to_f * 1_000_000_000).to_i
1088
+ end
1089
+ end
1090
+ end
1091
+ end
1092
+ ```
1093
+
1094
+ ### 6.2. Configuration
1095
+
1096
+ ```ruby
1097
+ # config/initializers/e11y.rb
1098
+ E11y.configure do |config|
1099
+ config.opentelemetry do
1100
+ enabled true
1101
+
1102
+ # Automatic span creation
1103
+ create_spans_for do
1104
+ # Create spans for errors
1105
+ severity [:error, :fatal]
1106
+
1107
+ # Create spans for order processing
1108
+ pattern 'order.*'
1109
+ pattern 'payment.*'
1110
+
1111
+ # Span settings
1112
+ span_kind :internal
1113
+ span_name ->(event) { event.event_name }
1114
+ end
1115
+ end
1116
+ end
1117
+ ```
1118
+
1119
+ ---
1120
+
1121
+ ## 7. Resource Attributes
1122
+
1123
+ ### 7.1. Resource Attributes Configuration
1124
+
1125
+ ```ruby
1126
+ # config/initializers/e11y.rb
1127
+ E11y.configure do |config|
1128
+ config.opentelemetry do
1129
+ resource_attributes do
1130
+ # Service (REQUIRED)
1131
+ 'service.name' ENV['SERVICE_NAME'] || 'api'
1132
+ 'service.version' ENV['GIT_SHA'] || 'unknown'
1133
+ 'service.instance.id' ENV['HOSTNAME'] || Socket.gethostname
1134
+
1135
+ # Deployment
1136
+ 'deployment.environment' Rails.env.to_s
1137
+ 'deployment.region' ENV['AWS_REGION']
1138
+
1139
+ # Host
1140
+ 'host.name' Socket.gethostname
1141
+ 'host.type' ENV['INSTANCE_TYPE']
1142
+
1143
+ # Container
1144
+ 'container.id' ENV['CONTAINER_ID']
1145
+ 'container.name' ENV['CONTAINER_NAME']
1146
+
1147
+ # Kubernetes
1148
+ 'k8s.namespace.name' ENV['K8S_NAMESPACE']
1149
+ 'k8s.pod.name' ENV['K8S_POD_NAME']
1150
+ 'k8s.deployment.name' ENV['K8S_DEPLOYMENT']
1151
+ end
1152
+ end
1153
+ end
1154
+ ```
1155
+
1156
+ ---
1157
+
1158
+ ## 8. Trace Context Integration
1159
+
1160
+ ### 8.1. OTel SDK Trace Context
1161
+
1162
+ ```ruby
1163
+ # lib/e11y/trace_context/opentelemetry_source.rb
1164
+ module E11y
1165
+ module TraceContext
1166
+ class OpenTelemetrySource
1167
+ def self.extract
1168
+ # Use OTel SDK current span context
1169
+ span_context = ::OpenTelemetry::Trace.current_span.context
1170
+
1171
+ return {} unless span_context.valid?
1172
+
1173
+ {
1174
+ trace_id: span_context.hex_trace_id,
1175
+ span_id: span_context.hex_span_id,
1176
+ trace_flags: span_context.trace_flags.sampled? ? 1 : 0,
1177
+ trace_state: span_context.tracestate.to_s
1178
+ }
1179
+ end
1180
+
1181
+ def self.inject(trace_id:, span_id:, trace_flags: 1, **_options)
1182
+ # Not needed: OTel SDK handles injection automatically
1183
+ # E11y just reads from OTel SDK
1184
+ end
1185
+
1186
+ def self.available?
1187
+ defined?(::OpenTelemetry) && ::OpenTelemetry.tracer_provider
1188
+ end
1189
+ end
1190
+ end
1191
+ end
1192
+ ```
1193
+
1194
+ ### 8.2. Configuration
1195
+
1196
+ ```ruby
1197
+ # config/initializers/e11y.rb
1198
+ E11y.configure do |config|
1199
+ config.trace_context do
1200
+ # Use OTel SDK as primary source
1201
+ source :opentelemetry
1202
+
1203
+ # Fallback to E11y if OTel not available
1204
+ fallback_to_e11y true
1205
+ end
1206
+ end
1207
+ ```
1208
+
1209
+ ---
1210
+
1211
+ ## 9. Cardinality Protection (C04 Resolution) ⚠️
1212
+
1213
+ **This section has been moved to [ADR-009 Cost Optimization §8: Cardinality Protection](ADR-009-cost-optimization.md#8-cardinality-protection-c04-resolution).**
1214
+
1215
+ **Rationale:** Cardinality explosion is a **cost optimization concern** affecting ALL backends (Yabeda/Prometheus, OpenTelemetry, Loki), not just OTLP. The unified solution is now documented in ADR-009.
1216
+
1217
+ **Summary:**
1218
+ - ✅ Unified cardinality protection for **all adapters** (Yabeda, OpenTelemetry, Loki)
1219
+ - ✅ Single config: `E11y.config.cardinality_protection` applies globally
1220
+ - ✅ Per-backend overrides: `inherit_from :global` or custom limits
1221
+ - ✅ 90% cost reduction for high-cardinality OTLP attributes
1222
+
1223
+ See [ADR-009 §8](ADR-009-cost-optimization.md#8-cardinality-protection-c04-resolution) for full implementation details.
1224
+
1225
+ ---
1226
+
1227
+ ## 10. Testing Strategy
1228
+
1229
+ ### 10.1. OTel Adapter Tests
1230
+
1231
+ ```ruby
1232
+ # spec/e11y/adapters/opentelemetry_collector_spec.rb
1233
+ RSpec.describe E11y::Adapters::OpenTelemetryCollector do
1234
+ let(:adapter) do
1235
+ described_class.new(
1236
+ endpoint: 'http://localhost:4318',
1237
+ export_logs: true,
1238
+ export_traces: false
1239
+ )
1240
+ end
1241
+
1242
+ describe '#send_batch' do
1243
+ it 'exports events as OTel Logs Signal' do
1244
+ stub_request(:post, 'http://localhost:4318/v1/logs')
1245
+ .to_return(status: 200, body: '{}')
1246
+
1247
+ events = [
1248
+ {
1249
+ timestamp: Time.now.iso8601,
1250
+ event_name: 'Events::OrderCreated',
1251
+ severity: :info,
1252
+ payload: { order_id: '123' },
1253
+ trace_id: 'abc123',
1254
+ span_id: 'def456'
1255
+ }
1256
+ ]
1257
+
1258
+ result = adapter.send_batch(events)
1259
+
1260
+ expect(result[:success]).to be true
1261
+ expect(result[:sent]).to eq(1)
1262
+
1263
+ # Verify OTLP format
1264
+ expect(WebMock).to have_requested(:post, 'http://localhost:4318/v1/logs')
1265
+ .with { |req|
1266
+ body = JSON.parse(req.body)
1267
+ expect(body['resourceLogs']).to be_present
1268
+ expect(body['resourceLogs'][0]['scopeLogs'][0]['logRecords'].size).to eq(1)
1269
+ }
1270
+ end
1271
+
1272
+ it 'applies semantic conventions' do
1273
+ stub_request(:post, 'http://localhost:4318/v1/logs')
1274
+ .to_return(status: 200)
1275
+
1276
+ events = [
1277
+ {
1278
+ timestamp: Time.now.iso8601,
1279
+ event_name: 'Events::HttpRequest',
1280
+ severity: :info,
1281
+ payload: { method: 'POST', status_code: 201 }
1282
+ }
1283
+ ]
1284
+
1285
+ adapter.send_batch(events)
1286
+
1287
+ expect(WebMock).to have_requested(:post, 'http://localhost:4318/v1/logs')
1288
+ .with { |req|
1289
+ body = JSON.parse(req.body)
1290
+ attributes = body['resourceLogs'][0]['scopeLogs'][0]['logRecords'][0]['attributes']
1291
+
1292
+ # Check semantic conventions mapping
1293
+ expect(attributes).to include(
1294
+ { 'key' => 'http.method', 'value' => { 'stringValue' => 'POST' } },
1295
+ { 'key' => 'http.status_code', 'value' => { 'intValue' => 201 } }
1296
+ )
1297
+ }
1298
+ end
1299
+ end
1300
+ end
1301
+ ```
1302
+
1303
+ ### 10.2. Semantic Conventions Tests
1304
+
1305
+ ```ruby
1306
+ # spec/e11y/opentelemetry/semantic_conventions_spec.rb
1307
+ RSpec.describe E11y::OpenTelemetry::SemanticConventions do
1308
+ describe '.map' do
1309
+ it 'maps HTTP fields to OTel conventions' do
1310
+ result = described_class.map(
1311
+ 'Events::HttpRequest',
1312
+ method: 'POST',
1313
+ status_code: 201,
1314
+ duration_ms: 45.2
1315
+ )
1316
+
1317
+ expect(result).to eq(
1318
+ 'http.method' => 'POST',
1319
+ 'http.status_code' => 201,
1320
+ 'http.server.duration' => 45.2,
1321
+ 'http.scheme' => 'https' # System attribute
1322
+ )
1323
+ end
1324
+
1325
+ it 'maps database fields to OTel conventions' do
1326
+ result = described_class.map(
1327
+ 'Events::DatabaseQuery',
1328
+ query: 'SELECT * FROM orders',
1329
+ duration_ms: 12.5
1330
+ )
1331
+
1332
+ expect(result).to eq(
1333
+ 'db.statement' => 'SELECT * FROM orders',
1334
+ 'db.operation.duration' => 12.5,
1335
+ 'db.system' => 'postgresql'
1336
+ )
1337
+ end
1338
+
1339
+ it 'preserves unmapped fields' do
1340
+ result = described_class.map(
1341
+ 'Events::CustomEvent',
1342
+ custom_field: 'value'
1343
+ )
1344
+
1345
+ expect(result).to eq(
1346
+ 'custom_field' => 'value'
1347
+ )
1348
+ end
1349
+ end
1350
+ end
1351
+ ```
1352
+
1353
+ ---
1354
+
1355
+ ## 11. Trade-offs
1356
+
1357
+ ### 11.1. Key Decisions
1358
+
1359
+ | Decision | Pro | Con | Rationale |
1360
+ |----------|-----|-----|-----------|
1361
+ | **Optional v1.1+** | No breaking changes | Later adoption | Rails 8+ ecosystem first |
1362
+ | **OTel Collector required** | Advanced features | Extra component | Industry standard |
1363
+ | **Logs Signal primary** | Best fit for events | Not traces-first | E11y is event-focused |
1364
+ | **Yabeda for metrics (C03)** ⚠️ | Better Rails integration | Separate from OTLP | Yabeda is superior for Rails |
1365
+ | **HTTP OTLP only** | Simple, universal | No gRPC (v1) | HTTP is 95% use case |
1366
+
1367
+ ### 11.2. Alternatives Considered
1368
+
1369
+ **A) Direct OTel SDK Integration**
1370
+ - ❌ Rejected: Too complex for v1.0, optional for v1.1+
1371
+
1372
+ **B) Replace All Adapters with OTel**
1373
+ - ❌ Rejected: Breaks existing users, OTel is enhancement
1374
+
1375
+ **C) Metrics Signal Export**
1376
+ - ❌ Rejected: Yabeda is better for Rails metrics
1377
+
1378
+ **D) gRPC OTLP Support**
1379
+ - ⏳ Deferred: v1.2+ (HTTP is sufficient for v1.1)
1380
+
1381
+ ---
1382
+
1383
+ **Status:** ✅ Draft Complete
1384
+ **Next:** Implementation (v1.1 release)
1385
+ **Estimated Implementation:** 2 weeks