e11y 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +4 -0
  3. data/.rubocop.yml +69 -0
  4. data/CHANGELOG.md +26 -0
  5. data/CODE_OF_CONDUCT.md +64 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +179 -0
  8. data/Rakefile +37 -0
  9. data/benchmarks/run_all.rb +33 -0
  10. data/config/README.md +83 -0
  11. data/config/loki-local-config.yaml +35 -0
  12. data/config/prometheus.yml +15 -0
  13. data/docker-compose.yml +78 -0
  14. data/docs/00-ICP-AND-TIMELINE.md +483 -0
  15. data/docs/01-SCALE-REQUIREMENTS.md +858 -0
  16. data/docs/ADR-001-architecture.md +2617 -0
  17. data/docs/ADR-002-metrics-yabeda.md +1395 -0
  18. data/docs/ADR-003-slo-observability.md +3337 -0
  19. data/docs/ADR-004-adapter-architecture.md +2385 -0
  20. data/docs/ADR-005-tracing-context.md +1372 -0
  21. data/docs/ADR-006-security-compliance.md +4143 -0
  22. data/docs/ADR-007-opentelemetry-integration.md +1385 -0
  23. data/docs/ADR-008-rails-integration.md +1911 -0
  24. data/docs/ADR-009-cost-optimization.md +2993 -0
  25. data/docs/ADR-010-developer-experience.md +2166 -0
  26. data/docs/ADR-011-testing-strategy.md +1836 -0
  27. data/docs/ADR-012-event-evolution.md +958 -0
  28. data/docs/ADR-013-reliability-error-handling.md +2750 -0
  29. data/docs/ADR-014-event-driven-slo.md +1533 -0
  30. data/docs/ADR-015-middleware-order.md +1061 -0
  31. data/docs/ADR-016-self-monitoring-slo.md +1234 -0
  32. data/docs/API-REFERENCE-L28.md +914 -0
  33. data/docs/COMPREHENSIVE-CONFIGURATION.md +2366 -0
  34. data/docs/IMPLEMENTATION_NOTES.md +2804 -0
  35. data/docs/IMPLEMENTATION_PLAN.md +1971 -0
  36. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +586 -0
  37. data/docs/PLAN.md +148 -0
  38. data/docs/QUICK-START.md +934 -0
  39. data/docs/README.md +296 -0
  40. data/docs/design/00-memory-optimization.md +593 -0
  41. data/docs/guides/MIGRATION-L27-L28.md +692 -0
  42. data/docs/guides/PERFORMANCE-BENCHMARKS.md +434 -0
  43. data/docs/guides/README.md +44 -0
  44. data/docs/prd/01-overview-vision.md +440 -0
  45. data/docs/use_cases/README.md +119 -0
  46. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +813 -0
  47. data/docs/use_cases/UC-002-business-event-tracking.md +1953 -0
  48. data/docs/use_cases/UC-003-pattern-based-metrics.md +1627 -0
  49. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +728 -0
  50. data/docs/use_cases/UC-005-sentry-integration.md +759 -0
  51. data/docs/use_cases/UC-006-trace-context-management.md +905 -0
  52. data/docs/use_cases/UC-007-pii-filtering.md +2648 -0
  53. data/docs/use_cases/UC-008-opentelemetry-integration.md +1153 -0
  54. data/docs/use_cases/UC-009-multi-service-tracing.md +1043 -0
  55. data/docs/use_cases/UC-010-background-job-tracking.md +1018 -0
  56. data/docs/use_cases/UC-011-rate-limiting.md +1906 -0
  57. data/docs/use_cases/UC-012-audit-trail.md +2301 -0
  58. data/docs/use_cases/UC-013-high-cardinality-protection.md +2127 -0
  59. data/docs/use_cases/UC-014-adaptive-sampling.md +1940 -0
  60. data/docs/use_cases/UC-015-cost-optimization.md +735 -0
  61. data/docs/use_cases/UC-016-rails-logger-migration.md +785 -0
  62. data/docs/use_cases/UC-017-local-development.md +867 -0
  63. data/docs/use_cases/UC-018-testing-events.md +1081 -0
  64. data/docs/use_cases/UC-019-tiered-storage-migration.md +562 -0
  65. data/docs/use_cases/UC-020-event-versioning.md +708 -0
  66. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +956 -0
  67. data/docs/use_cases/UC-022-event-registry.md +648 -0
  68. data/docs/use_cases/backlog.md +226 -0
  69. data/e11y.gemspec +76 -0
  70. data/lib/e11y/adapters/adaptive_batcher.rb +207 -0
  71. data/lib/e11y/adapters/audit_encrypted.rb +239 -0
  72. data/lib/e11y/adapters/base.rb +580 -0
  73. data/lib/e11y/adapters/file.rb +224 -0
  74. data/lib/e11y/adapters/in_memory.rb +216 -0
  75. data/lib/e11y/adapters/loki.rb +333 -0
  76. data/lib/e11y/adapters/otel_logs.rb +203 -0
  77. data/lib/e11y/adapters/registry.rb +141 -0
  78. data/lib/e11y/adapters/sentry.rb +230 -0
  79. data/lib/e11y/adapters/stdout.rb +108 -0
  80. data/lib/e11y/adapters/yabeda.rb +370 -0
  81. data/lib/e11y/buffers/adaptive_buffer.rb +339 -0
  82. data/lib/e11y/buffers/base_buffer.rb +40 -0
  83. data/lib/e11y/buffers/request_scoped_buffer.rb +246 -0
  84. data/lib/e11y/buffers/ring_buffer.rb +267 -0
  85. data/lib/e11y/buffers.rb +14 -0
  86. data/lib/e11y/console.rb +122 -0
  87. data/lib/e11y/current.rb +48 -0
  88. data/lib/e11y/event/base.rb +894 -0
  89. data/lib/e11y/event/value_sampling_config.rb +84 -0
  90. data/lib/e11y/events/base_audit_event.rb +43 -0
  91. data/lib/e11y/events/base_payment_event.rb +33 -0
  92. data/lib/e11y/events/rails/cache/delete.rb +21 -0
  93. data/lib/e11y/events/rails/cache/read.rb +23 -0
  94. data/lib/e11y/events/rails/cache/write.rb +22 -0
  95. data/lib/e11y/events/rails/database/query.rb +45 -0
  96. data/lib/e11y/events/rails/http/redirect.rb +21 -0
  97. data/lib/e11y/events/rails/http/request.rb +26 -0
  98. data/lib/e11y/events/rails/http/send_file.rb +21 -0
  99. data/lib/e11y/events/rails/http/start_processing.rb +26 -0
  100. data/lib/e11y/events/rails/job/completed.rb +22 -0
  101. data/lib/e11y/events/rails/job/enqueued.rb +22 -0
  102. data/lib/e11y/events/rails/job/failed.rb +22 -0
  103. data/lib/e11y/events/rails/job/scheduled.rb +23 -0
  104. data/lib/e11y/events/rails/job/started.rb +22 -0
  105. data/lib/e11y/events/rails/log.rb +56 -0
  106. data/lib/e11y/events/rails/view/render.rb +23 -0
  107. data/lib/e11y/events.rb +18 -0
  108. data/lib/e11y/instruments/active_job.rb +201 -0
  109. data/lib/e11y/instruments/rails_instrumentation.rb +141 -0
  110. data/lib/e11y/instruments/sidekiq.rb +175 -0
  111. data/lib/e11y/logger/bridge.rb +205 -0
  112. data/lib/e11y/metrics/cardinality_protection.rb +172 -0
  113. data/lib/e11y/metrics/cardinality_tracker.rb +134 -0
  114. data/lib/e11y/metrics/registry.rb +234 -0
  115. data/lib/e11y/metrics/relabeling.rb +226 -0
  116. data/lib/e11y/metrics.rb +102 -0
  117. data/lib/e11y/middleware/audit_signing.rb +174 -0
  118. data/lib/e11y/middleware/base.rb +140 -0
  119. data/lib/e11y/middleware/event_slo.rb +167 -0
  120. data/lib/e11y/middleware/pii_filter.rb +266 -0
  121. data/lib/e11y/middleware/pii_filtering.rb +280 -0
  122. data/lib/e11y/middleware/rate_limiting.rb +214 -0
  123. data/lib/e11y/middleware/request.rb +163 -0
  124. data/lib/e11y/middleware/routing.rb +157 -0
  125. data/lib/e11y/middleware/sampling.rb +254 -0
  126. data/lib/e11y/middleware/slo.rb +168 -0
  127. data/lib/e11y/middleware/trace_context.rb +131 -0
  128. data/lib/e11y/middleware/validation.rb +118 -0
  129. data/lib/e11y/middleware/versioning.rb +132 -0
  130. data/lib/e11y/middleware.rb +12 -0
  131. data/lib/e11y/pii/patterns.rb +90 -0
  132. data/lib/e11y/pii.rb +13 -0
  133. data/lib/e11y/pipeline/builder.rb +155 -0
  134. data/lib/e11y/pipeline/zone_validator.rb +110 -0
  135. data/lib/e11y/pipeline.rb +12 -0
  136. data/lib/e11y/presets/audit_event.rb +65 -0
  137. data/lib/e11y/presets/debug_event.rb +34 -0
  138. data/lib/e11y/presets/high_value_event.rb +51 -0
  139. data/lib/e11y/presets.rb +19 -0
  140. data/lib/e11y/railtie.rb +138 -0
  141. data/lib/e11y/reliability/circuit_breaker.rb +216 -0
  142. data/lib/e11y/reliability/dlq/file_storage.rb +277 -0
  143. data/lib/e11y/reliability/dlq/filter.rb +117 -0
  144. data/lib/e11y/reliability/retry_handler.rb +207 -0
  145. data/lib/e11y/reliability/retry_rate_limiter.rb +117 -0
  146. data/lib/e11y/sampling/error_spike_detector.rb +225 -0
  147. data/lib/e11y/sampling/load_monitor.rb +161 -0
  148. data/lib/e11y/sampling/stratified_tracker.rb +92 -0
  149. data/lib/e11y/sampling/value_extractor.rb +82 -0
  150. data/lib/e11y/self_monitoring/buffer_monitor.rb +79 -0
  151. data/lib/e11y/self_monitoring/performance_monitor.rb +97 -0
  152. data/lib/e11y/self_monitoring/reliability_monitor.rb +146 -0
  153. data/lib/e11y/slo/event_driven.rb +150 -0
  154. data/lib/e11y/slo/tracker.rb +119 -0
  155. data/lib/e11y/version.rb +9 -0
  156. data/lib/e11y.rb +283 -0
  157. metadata +452 -0
@@ -0,0 +1,1372 @@
1
+ # ADR-005: Tracing & Context Management
2
+
3
+ **Status:** Draft
4
+ **Date:** January 12, 2026
5
+ **Covers:** UC-006 (Trace Context Management), UC-009 (Multi-Service Tracing)
6
+ **Depends On:** ADR-001 (Core), ADR-008 (Rails Integration)
7
+
8
+ ---
9
+
10
+ ## 📋 Table of Contents
11
+
12
+ 1. [Context & Problem](#1-context--problem)
13
+ 2. [Architecture Overview](#2-architecture-overview)
14
+ 3. [Current (Thread-Local Storage)](#3-current-thread-local-storage)
15
+ 4. [Trace ID Generation](#4-trace-id-generation)
16
+ 5. [W3C Trace Context](#5-w3c-trace-context)
17
+ 6. [Context Propagation](#6-context-propagation)
18
+ 7. [Sampling Decisions](#7-sampling-decisions)
19
+ 8. [Context Inheritance](#8-context-inheritance)
20
+ - 8.3. [Background Job Tracing Strategy (C17 Resolution)](#83-background-job-tracing-strategy-c17-resolution) ⚠️ CRITICAL
21
+ - 8.3.1. The Problem: Unbounded Traces
22
+ - 8.3.2. Decision: Hybrid Model (New Trace + Parent Link)
23
+ - 8.3.3. SidekiqTraceMiddleware Implementation
24
+ - 8.3.4. Configuration
25
+ - 8.3.5. Querying Full Flow (Request → Job)
26
+ - 8.3.6. Schema Changes
27
+ - 8.3.7. Trade-offs (C17 Resolution)
28
+ 9. [Trade-offs](#9-trade-offs)
29
+
30
+ ---
31
+
32
+ ## 1. Context & Problem
33
+
34
+ ### 1.1. Problem Statement
35
+
36
+ **Current Pain Points:**
37
+
38
+ 1. **No Trace Correlation:**
39
+ ```ruby
40
+ # ❌ Can't correlate events across requests
41
+ Events::OrderCreated.track(order_id: 123)
42
+ # Later in Sidekiq:
43
+ Events::EmailSent.track(order_id: 123)
44
+ # → No way to link these two events
45
+ ```
46
+
47
+ 2. **Lost Context in Background Jobs:**
48
+ ```ruby
49
+ # ❌ User context lost when job executes
50
+ def create
51
+ @current_user = User.find(params[:user_id])
52
+ SendEmailJob.perform_later(order_id: 123)
53
+ # Job has no idea about @current_user
54
+ end
55
+ ```
56
+
57
+ 3. **No Cross-Service Tracing:**
58
+ ```ruby
59
+ # ❌ Can't trace requests across microservices
60
+ response = HTTP.get("https://payment-service/charge")
61
+ # Payment service has no idea this is part of the same trace
62
+ ```
63
+
64
+ 4. **Manual Context Passing:**
65
+ ```ruby
66
+ # ❌ Must manually pass context everywhere
67
+ def process_order(order_id, trace_id:, user_id:, request_id:)
68
+ Events::OrderProcessing.track(
69
+ order_id: order_id,
70
+ trace_id: trace_id,
71
+ user_id: user_id,
72
+ request_id: request_id
73
+ )
74
+ end
75
+ ```
76
+
77
+ ### 1.2. Goals
78
+
79
+ **Primary Goals:**
80
+ - ✅ **Automatic trace ID generation** for every request/job
81
+ - ✅ **W3C Trace Context** standard support
82
+ - ✅ **Thread-safe context storage** (Rails.current pattern)
83
+ - ✅ **Context propagation** to background jobs
84
+ - ✅ **Cross-service tracing** via HTTP headers
85
+ - ✅ **Sampling consistency** across distributed traces
86
+
87
+ **Non-Goals:**
88
+ - ❌ Full OpenTelemetry SDK (see ADR-007)
89
+ - ❌ Automatic span creation (manual spans only)
90
+ - ❌ Distributed transactions
91
+
92
+ ### 1.3. Success Metrics
93
+
94
+ | Metric | Target | Critical? |
95
+ |--------|--------|-----------|
96
+ | **Context lookup overhead** | <100ns p99 | ✅ Yes |
97
+ | **Trace ID collision rate** | <1 in 10^15 | ✅ Yes |
98
+ | **Context propagation rate** | >99.9% | ✅ Yes |
99
+ | **Cross-service trace coverage** | >95% | ✅ Yes |
100
+
101
+ ---
102
+
103
+ ## 2. Architecture Overview
104
+
105
+ ### 2.1. System Context
106
+
107
+ ```mermaid
108
+ C4Context
109
+ title Tracing & Context Management Context
110
+
111
+ Person(user, "User", "Makes HTTP request")
112
+
113
+ System(rails_app, "Rails App", "Web application")
114
+
115
+ System(e11y, "E11y Gem", "Context management")
116
+
117
+ System_Ext(sidekiq, "Sidekiq", "Background jobs")
118
+ System_Ext(payment_service, "Payment Service", "Microservice")
119
+ System_Ext(frontend, "Frontend", "SPA/Mobile")
120
+
121
+ Rel(user, frontend, "Uses")
122
+ Rel(frontend, rails_app, "HTTP request", "traceparent header")
123
+ Rel(rails_app, e11y, "Auto trace context", "E11y::Current")
124
+ Rel(e11y, sidekiq, "Propagate context", "Job metadata")
125
+ Rel(rails_app, payment_service, "HTTP request", "traceparent header")
126
+ Rel(payment_service, rails_app, "Uses E11y", "Same trace_id")
127
+ ```
128
+
129
+ ### 2.2. Component Architecture
130
+
131
+ ```mermaid
132
+ graph TB
133
+ subgraph "Request Entry Points"
134
+ HTTPRequest[HTTP Request] --> Middleware[Rack Middleware]
135
+ JobExecution[Job Execution] --> JobMiddleware[Sidekiq/ActiveJob MW]
136
+ ManualAPI[Manual API Call] --> CurrentAPI[E11y::Current.set]
137
+ end
138
+
139
+ subgraph "Context Storage (Thread-Local)"
140
+ Middleware --> Current[E11y::Current]
141
+ JobMiddleware --> Current
142
+ CurrentAPI --> Current
143
+
144
+ Current --> TraceID[trace_id]
145
+ Current --> SpanID[span_id]
146
+ Current --> ParentSpanID[parent_span_id]
147
+ Current --> UserID[user_id]
148
+ Current --> RequestID[request_id]
149
+ Current --> Sampled[sampled]
150
+ Current --> Baggage[baggage]
151
+ end
152
+
153
+ subgraph "Trace ID Management"
154
+ Current --> TraceGenerator[Trace ID Generator]
155
+ TraceGenerator --> W3CParser[W3C Trace Context Parser]
156
+ TraceGenerator --> UUIDGen[UUID Generator]
157
+ end
158
+
159
+ subgraph "Context Propagation"
160
+ Current --> HTTPPropagator[HTTP Header Propagator]
161
+ Current --> JobPropagator[Job Metadata Propagator]
162
+ Current --> LogPropagator[Structured Log Propagator]
163
+ end
164
+
165
+ subgraph "Event Pipeline"
166
+ Current --> EventTracking[Event Tracking]
167
+ EventTracking --> EnrichWithContext[Auto-enrich with Context]
168
+ end
169
+
170
+ style Current fill:#d1ecf1
171
+ style TraceGenerator fill:#fff3cd
172
+ style HTTPPropagator fill:#d4edda
173
+ ```
174
+
175
+ ### 2.3. Context Lifecycle
176
+
177
+ ```mermaid
178
+ sequenceDiagram
179
+ participant Browser as Browser
180
+ participant Rack as Rack Middleware
181
+ participant Current as E11y::Current
182
+ participant Controller as Rails Controller
183
+ participant Sidekiq as Sidekiq Job
184
+ participant Event as Event Tracking
185
+
186
+ Note over Browser: User makes request
187
+ Browser->>Rack: GET /orders<br/>traceparent: 00-abc123...
188
+
189
+ Rack->>Current: Extract or generate trace_id
190
+ Current->>Current: Store in thread-local
191
+
192
+ Note over Current: trace_id: abc123<br/>span_id: def456<br/>user_id: 42
193
+
194
+ Rack->>Controller: Process request
195
+
196
+ Controller->>Event: Track event
197
+ Event->>Current: Read context
198
+ Current-->>Event: Return trace_id, user_id, etc
199
+ Event->>Event: Enrich event with context
200
+
201
+ Controller->>Sidekiq: Enqueue job
202
+ Sidekiq->>Current: Read trace_id
203
+ Current-->>Sidekiq: abc123
204
+ Sidekiq->>Sidekiq: Store in job metadata
205
+
206
+ Note over Sidekiq: Later... job executes
207
+
208
+ Sidekiq->>Current: Restore trace_id from metadata
209
+ Current->>Current: Store in NEW thread-local
210
+
211
+ Note over Current: Same trace_id: abc123<br/>New span_id: ghi789<br/>parent_span_id: def456
212
+
213
+ Sidekiq->>Event: Track job event
214
+ Event->>Current: Read context
215
+ Current-->>Event: Same trace_id!
216
+
217
+ Rack->>Current: Reset context
218
+ Current->>Current: Clear thread-local
219
+ ```
220
+
221
+ ---
222
+
223
+ ## 3. Current (Thread-Local Storage)
224
+
225
+ ### 3.1. E11y::Current Implementation
226
+
227
+ **Design Decision:** Use `ActiveSupport::CurrentAttributes` for thread-safe storage.
228
+
229
+ ```ruby
230
+ # lib/e11y/current.rb
231
+ module E11y
232
+ class Current < ActiveSupport::CurrentAttributes
233
+ # Core trace attributes
234
+ attribute :trace_id
235
+ attribute :span_id
236
+ attribute :parent_span_id
237
+
238
+ # Request/job attributes
239
+ attribute :request_id
240
+ attribute :job_id
241
+ attribute :job_class
242
+
243
+ # User/tenant attributes
244
+ attribute :user_id
245
+ attribute :tenant_id
246
+ attribute :organization_id
247
+
248
+ # Sampling decision
249
+ attribute :sampled
250
+
251
+ # Custom baggage (key-value pairs)
252
+ attribute :baggage
253
+
254
+ # IP and user agent (for security/audit)
255
+ attribute :ip_address
256
+ attribute :user_agent
257
+
258
+ # Delegation methods for convenience
259
+ class << self
260
+ # Set multiple attributes at once
261
+ def set(attributes = {})
262
+ attributes.each do |key, value|
263
+ public_send(:"#{key}=", value)
264
+ end
265
+ end
266
+
267
+ # Get all current attributes as hash
268
+ def to_h
269
+ {
270
+ trace_id: trace_id,
271
+ span_id: span_id,
272
+ parent_span_id: parent_span_id,
273
+ request_id: request_id,
274
+ job_id: job_id,
275
+ job_class: job_class,
276
+ user_id: user_id,
277
+ tenant_id: tenant_id,
278
+ organization_id: organization_id,
279
+ sampled: sampled,
280
+ baggage: baggage,
281
+ ip_address: ip_address,
282
+ user_agent: user_agent
283
+ }.compact
284
+ end
285
+
286
+ # Check if we're in a traced context
287
+ def traced?
288
+ trace_id.present?
289
+ end
290
+
291
+ # Create a new span (child of current span)
292
+ def create_span(name)
293
+ new_span_id = E11y::TraceContext.generate_span_id
294
+
295
+ span = Span.new(
296
+ trace_id: trace_id || E11y::TraceContext.generate_id,
297
+ span_id: new_span_id,
298
+ parent_span_id: span_id,
299
+ name: name,
300
+ started_at: Time.now
301
+ )
302
+
303
+ # Update current span_id
304
+ self.span_id = new_span_id
305
+
306
+ span
307
+ end
308
+
309
+ # Add baggage (key-value metadata)
310
+ def add_baggage(key, value)
311
+ self.baggage ||= {}
312
+ self.baggage[key.to_s] = value.to_s
313
+ end
314
+
315
+ # Get baggage value
316
+ def get_baggage(key)
317
+ baggage&.[](key.to_s)
318
+ end
319
+ end
320
+
321
+ # Span helper class
322
+ class Span
323
+ attr_reader :trace_id, :span_id, :parent_span_id, :name, :started_at
324
+ attr_accessor :finished_at, :status, :attributes
325
+
326
+ def initialize(trace_id:, span_id:, parent_span_id:, name:, started_at:)
327
+ @trace_id = trace_id
328
+ @span_id = span_id
329
+ @parent_span_id = parent_span_id
330
+ @name = name
331
+ @started_at = started_at
332
+ @attributes = {}
333
+ end
334
+
335
+ def finish(status: :ok)
336
+ @finished_at = Time.now
337
+ @status = status
338
+
339
+ duration = (@finished_at - @started_at) * 1000 # ms
340
+
341
+ # Track span as event
342
+ Events::Span.track(
343
+ span_name: @name,
344
+ trace_id: @trace_id,
345
+ span_id: @span_id,
346
+ parent_span_id: @parent_span_id,
347
+ duration: duration,
348
+ status: status,
349
+ attributes: @attributes
350
+ )
351
+
352
+ self
353
+ end
354
+
355
+ def add_attribute(key, value)
356
+ @attributes[key] = value
357
+ end
358
+ end
359
+ end
360
+ end
361
+ ```
362
+
363
+ ### 3.2. Usage Examples
364
+
365
+ ```ruby
366
+ # Read current context
367
+ E11y::Current.trace_id # => "abc123..."
368
+ E11y::Current.user_id # => 42
369
+
370
+ # Set context manually
371
+ E11y::Current.set(
372
+ trace_id: 'custom-trace-123',
373
+ user_id: 99,
374
+ tenant_id: 'acme-corp'
375
+ )
376
+
377
+ # Check if traced
378
+ if E11y::Current.traced?
379
+ puts "We're in a trace!"
380
+ end
381
+
382
+ # Get all context
383
+ E11y::Current.to_h
384
+ # => { trace_id: "abc...", user_id: 42, ... }
385
+
386
+ # Create a manual span
387
+ span = E11y::Current.create_span('database_query')
388
+ span.add_attribute('table', 'orders')
389
+ # ... do work ...
390
+ span.finish(status: :ok)
391
+
392
+ # Baggage (propagated metadata)
393
+ E11y::Current.add_baggage('experiment_id', 'exp-42')
394
+ E11y::Current.get_baggage('experiment_id') # => "exp-42"
395
+ ```
396
+
397
+ ---
398
+
399
+ ## 4. Trace ID Generation
400
+
401
+ ### 4.1. Trace ID Generator
402
+
403
+ ```ruby
404
+ # lib/e11y/trace_context/id_generator.rb
405
+ module E11y
406
+ module TraceContext
407
+ module IDGenerator
408
+ # W3C Trace Context format:
409
+ # trace-id: 32 hex chars (128 bits)
410
+ # span-id: 16 hex chars (64 bits)
411
+
412
+ # Generate trace ID (128 bits = 16 bytes)
413
+ def self.generate_trace_id
414
+ SecureRandom.hex(16) # 32 hex chars
415
+ end
416
+
417
+ # Generate span ID (64 bits = 8 bytes)
418
+ def self.generate_span_id
419
+ SecureRandom.hex(8) # 16 hex chars
420
+ end
421
+
422
+ # Validate trace ID format
423
+ def self.valid_trace_id?(trace_id)
424
+ trace_id.is_a?(String) &&
425
+ trace_id.match?(/\A[0-9a-f]{32}\z/) &&
426
+ trace_id != '00000000000000000000000000000000' # Not all zeros
427
+ end
428
+
429
+ # Validate span ID format
430
+ def self.valid_span_id?(span_id)
431
+ span_id.is_a?(String) &&
432
+ span_id.match?(/\A[0-9a-f]{16}\z/) &&
433
+ span_id != '0000000000000000' # Not all zeros
434
+ end
435
+
436
+ # Convert UUID to trace ID (for compatibility)
437
+ def self.uuid_to_trace_id(uuid)
438
+ uuid.delete('-').downcase
439
+ end
440
+ end
441
+
442
+ # Convenience methods
443
+ def self.generate_id
444
+ IDGenerator.generate_trace_id
445
+ end
446
+
447
+ def self.generate_span_id
448
+ IDGenerator.generate_span_id
449
+ end
450
+ end
451
+ end
452
+ ```
453
+
454
+ ---
455
+
456
+ ## 5. W3C Trace Context
457
+
458
+ ### 5.1. W3C Trace Context Standard
459
+
460
+ **Format:** `traceparent: 00-{trace-id}-{parent-id}-{trace-flags}`
461
+
462
+ **Example:** `traceparent: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01`
463
+
464
+ - `00` = version
465
+ - `0af7651916cd43dd8448eb211c80319c` = trace-id (32 hex chars)
466
+ - `b7ad6b7169203331` = parent-id (16 hex chars)
467
+ - `01` = trace-flags (01 = sampled)
468
+
469
+ ### 5.2. W3C Parser & Generator
470
+
471
+ ```ruby
472
+ # lib/e11y/trace_context/w3c.rb
473
+ module E11y
474
+ module TraceContext
475
+ module W3C
476
+ VERSION = '00'
477
+
478
+ # Parse W3C traceparent header
479
+ def self.parse_traceparent(header)
480
+ return nil unless header.is_a?(String)
481
+
482
+ parts = header.split('-')
483
+ return nil unless parts.size == 4
484
+
485
+ version, trace_id, parent_id, flags = parts
486
+
487
+ # Validate format
488
+ return nil unless version == VERSION
489
+ return nil unless IDGenerator.valid_trace_id?(trace_id)
490
+ return nil unless IDGenerator.valid_span_id?(parent_id)
491
+
492
+ sampled = (flags.to_i(16) & 0x01) == 1
493
+
494
+ {
495
+ version: version,
496
+ trace_id: trace_id,
497
+ parent_span_id: parent_id,
498
+ sampled: sampled,
499
+ flags: flags
500
+ }
501
+ end
502
+
503
+ # Generate W3C traceparent header
504
+ def self.generate_traceparent(trace_id:, span_id:, sampled: true)
505
+ flags = sampled ? '01' : '00'
506
+
507
+ "#{VERSION}-#{trace_id}-#{span_id}-#{flags}"
508
+ end
509
+
510
+ # Parse W3C tracestate header (optional)
511
+ def self.parse_tracestate(header)
512
+ return {} unless header.is_a?(String)
513
+
514
+ header.split(',').each_with_object({}) do |entry, hash|
515
+ key, value = entry.split('=', 2)
516
+ hash[key.strip] = value.strip if key && value
517
+ end
518
+ end
519
+
520
+ # Generate W3C tracestate header
521
+ def self.generate_tracestate(state_hash)
522
+ state_hash.map { |k, v| "#{k}=#{v}" }.join(',')
523
+ end
524
+ end
525
+ end
526
+ end
527
+ ```
528
+
529
+ ### 5.3. HTTP Header Extraction
530
+
531
+ ```ruby
532
+ # lib/e11y/trace_context/http_extractor.rb
533
+ module E11y
534
+ module TraceContext
535
+ class HTTPExtractor
536
+ TRACEPARENT_HEADER = 'HTTP_TRACEPARENT'
537
+ TRACESTATE_HEADER = 'HTTP_TRACESTATE'
538
+
539
+ # Legacy headers (fallback)
540
+ X_REQUEST_ID = 'HTTP_X_REQUEST_ID'
541
+ X_TRACE_ID = 'HTTP_X_TRACE_ID'
542
+ X_CORRELATION_ID = 'HTTP_X_CORRELATION_ID'
543
+
544
+ def self.extract(env)
545
+ # Try W3C Trace Context first
546
+ if env[TRACEPARENT_HEADER]
547
+ extract_w3c(env)
548
+ # Fallback to legacy headers
549
+ elsif env[X_TRACE_ID]
550
+ extract_legacy(env)
551
+ else
552
+ # No trace context → generate new
553
+ generate_new
554
+ end
555
+ end
556
+
557
+ private
558
+
559
+ def self.extract_w3c(env)
560
+ context = W3C.parse_traceparent(env[TRACEPARENT_HEADER])
561
+ return generate_new unless context
562
+
563
+ tracestate = W3C.parse_tracestate(env[TRACESTATE_HEADER])
564
+
565
+ {
566
+ trace_id: context[:trace_id],
567
+ parent_span_id: context[:parent_span_id],
568
+ span_id: IDGenerator.generate_span_id, # New span for this service
569
+ sampled: context[:sampled],
570
+ tracestate: tracestate,
571
+ format: :w3c
572
+ }
573
+ end
574
+
575
+ def self.extract_legacy(env)
576
+ trace_id = env[X_TRACE_ID] ||
577
+ env[X_REQUEST_ID] ||
578
+ env[X_CORRELATION_ID]
579
+
580
+ # Convert to W3C format if needed
581
+ trace_id = normalize_trace_id(trace_id)
582
+
583
+ {
584
+ trace_id: trace_id,
585
+ span_id: IDGenerator.generate_span_id,
586
+ sampled: true, # Assume sampled for legacy
587
+ format: :legacy
588
+ }
589
+ end
590
+
591
+ def self.generate_new
592
+ {
593
+ trace_id: IDGenerator.generate_trace_id,
594
+ span_id: IDGenerator.generate_span_id,
595
+ sampled: true,
596
+ format: :new
597
+ }
598
+ end
599
+
600
+ def self.normalize_trace_id(trace_id)
601
+ # If UUID format, convert to W3C
602
+ if trace_id.include?('-') && trace_id.length == 36
603
+ IDGenerator.uuid_to_trace_id(trace_id)
604
+ # If already 32 hex chars, use as-is
605
+ elsif trace_id.match?(/\A[0-9a-f]{32}\z/i)
606
+ trace_id.downcase
607
+ # Otherwise, hash it to 32 hex chars
608
+ else
609
+ Digest::SHA256.hexdigest(trace_id)[0...32]
610
+ end
611
+ end
612
+ end
613
+ end
614
+ end
615
+ ```
616
+
617
+ ---
618
+
619
+ ## 6. Context Propagation
620
+
621
+ ### 6.1. HTTP Propagator (Outgoing Requests)
622
+
623
+ ```ruby
624
+ # lib/e11y/trace_context/http_propagator.rb
625
+ module E11y
626
+ module TraceContext
627
+ class HTTPPropagator
628
+ # Inject trace context into HTTP headers
629
+ def self.inject(headers = {})
630
+ return headers unless E11y::Current.traced?
631
+
632
+ trace_id = E11y::Current.trace_id
633
+ span_id = E11y::Current.span_id
634
+ sampled = E11y::Current.sampled
635
+
636
+ # W3C Trace Context
637
+ headers['traceparent'] = W3C.generate_traceparent(
638
+ trace_id: trace_id,
639
+ span_id: span_id,
640
+ sampled: sampled
641
+ )
642
+
643
+ # Add tracestate if baggage present
644
+ if E11y::Current.baggage&.any?
645
+ headers['tracestate'] = W3C.generate_tracestate(
646
+ E11y::Current.baggage
647
+ )
648
+ end
649
+
650
+ # Legacy headers (for backwards compatibility)
651
+ headers['X-Request-ID'] = E11y::Current.request_id if E11y::Current.request_id
652
+ headers['X-Trace-ID'] = trace_id
653
+
654
+ headers
655
+ end
656
+
657
+ # Helper for common HTTP clients
658
+ def self.wrap_faraday(conn)
659
+ conn.use :instrumentation do |faraday|
660
+ faraday.request :headers do |req|
661
+ inject(req.headers)
662
+ end
663
+ end
664
+ end
665
+
666
+ def self.wrap_http_rb(http)
667
+ headers = inject
668
+ headers.each { |k, v| http = http.headers(k => v) }
669
+ http
670
+ end
671
+ end
672
+ end
673
+ end
674
+ ```
675
+
676
+ ### 6.2. Job Propagator (Sidekiq/ActiveJob)
677
+
678
+ Already implemented in ADR-008, but here's the core logic:
679
+
680
+ ```ruby
681
+ # lib/e11y/trace_context/job_propagator.rb
682
+ module E11y
683
+ module TraceContext
684
+ class JobPropagator
685
+ # Inject trace context into job metadata
686
+ def self.inject(job_metadata = {})
687
+ return job_metadata unless E11y::Current.traced?
688
+
689
+ job_metadata['e11y_trace_id'] = E11y::Current.trace_id
690
+ job_metadata['e11y_span_id'] = E11y::Current.span_id
691
+ job_metadata['e11y_sampled'] = E11y::Current.sampled
692
+
693
+ # Propagate baggage
694
+ if E11y::Current.baggage&.any?
695
+ job_metadata['e11y_baggage'] = E11y::Current.baggage
696
+ end
697
+
698
+ # Propagate user/tenant context
699
+ job_metadata['e11y_user_id'] = E11y::Current.user_id if E11y::Current.user_id
700
+ job_metadata['e11y_tenant_id'] = E11y::Current.tenant_id if E11y::Current.tenant_id
701
+
702
+ job_metadata
703
+ end
704
+
705
+ # Extract trace context from job metadata
706
+ def self.extract(job_metadata)
707
+ return {} unless job_metadata['e11y_trace_id']
708
+
709
+ {
710
+ trace_id: job_metadata['e11y_trace_id'],
711
+ parent_span_id: job_metadata['e11y_span_id'],
712
+ span_id: IDGenerator.generate_span_id, # New span for job
713
+ sampled: job_metadata['e11y_sampled'],
714
+ baggage: job_metadata['e11y_baggage'],
715
+ user_id: job_metadata['e11y_user_id'],
716
+ tenant_id: job_metadata['e11y_tenant_id']
717
+ }
718
+ end
719
+ end
720
+ end
721
+ end
722
+ ```
723
+
724
+ ### 6.3. Structured Log Propagator
725
+
726
+ ```ruby
727
+ # lib/e11y/trace_context/log_propagator.rb
728
+ module E11y
729
+ module TraceContext
730
+ class LogPropagator
731
+ # Add trace context to structured log entry
732
+ def self.inject(log_entry = {})
733
+ return log_entry unless E11y::Current.traced?
734
+
735
+ log_entry.merge(
736
+ trace_id: E11y::Current.trace_id,
737
+ span_id: E11y::Current.span_id,
738
+ parent_span_id: E11y::Current.parent_span_id,
739
+ user_id: E11y::Current.user_id,
740
+ tenant_id: E11y::Current.tenant_id
741
+ ).compact
742
+ end
743
+ end
744
+ end
745
+ end
746
+ ```
747
+
748
+ ---
749
+
750
+ ## 7. Sampling Decisions
751
+
752
+ ### 7.1. Trace-Consistent Sampling
753
+
754
+ **Design Decision:** Sampling decision is made at trace entry point and propagated.
755
+
756
+ ```ruby
757
+ # lib/e11y/trace_context/sampler.rb
758
+ module E11y
759
+ module TraceContext
760
+ class Sampler
761
+ def initialize(config)
762
+ @default_rate = config.default_sample_rate
763
+ @per_event_rates = config.per_event_sample_rates
764
+ end
765
+
766
+ # Decide if this trace should be sampled
767
+ def should_sample?(context = {})
768
+ # If sampling decision already made (from parent), respect it
769
+ return context[:sampled] if context.key?(:sampled)
770
+
771
+ # Apply sampling rules
772
+ sample_rate = determine_sample_rate(context)
773
+
774
+ # Random sampling
775
+ rand < sample_rate
776
+ end
777
+
778
+ private
779
+
780
+ def determine_sample_rate(context)
781
+ # Priority 1: Always sample errors
782
+ return 1.0 if context[:error]
783
+
784
+ # Priority 2: Per-event sampling
785
+ if context[:event_name]
786
+ rate = @per_event_rates[context[:event_name]]
787
+ return rate if rate
788
+ end
789
+
790
+ # Priority 3: Per-user sampling (for debugging)
791
+ if context[:user_id] && debug_user?(context[:user_id])
792
+ return 1.0
793
+ end
794
+
795
+ # Default sampling rate
796
+ @default_rate
797
+ end
798
+
799
+ def debug_user?(user_id)
800
+ # Check if user is in debug mode (e.g., via feature flag)
801
+ E11y.config.debug_users.include?(user_id)
802
+ end
803
+ end
804
+ end
805
+ end
806
+ ```
807
+
808
+ ### 7.2. Sampling Configuration
809
+
810
+ ```ruby
811
+ # config/initializers/e11y.rb
812
+ E11y.configure do |config|
813
+ config.tracing do
814
+ # Default sample rate (10% of traces)
815
+ default_sample_rate 0.1
816
+
817
+ # Per-event sampling
818
+ per_event_sample_rates do
819
+ event 'payment.processed', sample_rate: 1.0 # Always sample
820
+ event 'order.created', sample_rate: 0.5 # 50%
821
+ event 'health_check', sample_rate: 0.01 # 1%
822
+ end
823
+
824
+ # Always sample for debug users
825
+ debug_users [123, 456] # User IDs
826
+
827
+ # Respect parent sampling decision
828
+ respect_parent_sampling true # Default: true
829
+ end
830
+ end
831
+ ```
832
+
833
+ ---
834
+
835
+ ## 8. Context Inheritance
836
+
837
+ ### 8.1. Context Inheritance Patterns
838
+
839
+ ```ruby
840
+ # lib/e11y/trace_context/inheritance.rb
841
+ module E11y
842
+ module TraceContext
843
+ module Inheritance
844
+ # Execute block with inherited context
845
+ def self.with_inherited_context(parent_context, &block)
846
+ previous_context = E11y::Current.attributes
847
+
848
+ begin
849
+ # Inherit from parent, but create new span
850
+ E11y::Current.set(
851
+ trace_id: parent_context[:trace_id],
852
+ parent_span_id: parent_context[:span_id],
853
+ span_id: IDGenerator.generate_span_id,
854
+ sampled: parent_context[:sampled],
855
+ baggage: parent_context[:baggage]&.dup,
856
+ user_id: parent_context[:user_id],
857
+ tenant_id: parent_context[:tenant_id]
858
+ )
859
+
860
+ yield
861
+ ensure
862
+ E11y::Current.set(previous_context)
863
+ end
864
+ end
865
+
866
+ # Fork context for parallel execution (e.g., Thread, Fiber)
867
+ def self.fork_context(&block)
868
+ parent_context = E11y::Current.to_h
869
+
870
+ Thread.new do
871
+ with_inherited_context(parent_context, &block)
872
+ end
873
+ end
874
+ end
875
+ end
876
+ end
877
+ ```
878
+
879
+ ### 8.2. Usage Examples
880
+
881
+ ```ruby
882
+ # Execute with inherited context
883
+ parent_context = E11y::Current.to_h
884
+
885
+ E11y::TraceContext::Inheritance.with_inherited_context(parent_context) do
886
+ # This block runs with parent's trace_id but new span_id
887
+ Events::ChildTask.track(task_id: 42)
888
+ end
889
+
890
+ # Fork context for parallel execution
891
+ threads = 5.times.map do |i|
892
+ E11y::TraceContext::Inheritance.fork_context do
893
+ # Each thread gets its own span but shares trace_id
894
+ Events::ParallelTask.track(index: i)
895
+ end
896
+ end
897
+
898
+ threads.each(&:join)
899
+ ```
900
+
901
+ ### 8.3. Background Job Tracing Strategy (C17 Resolution)
902
+
903
+ > **⚠️ CRITICAL: C17 Conflict Resolution - Background Job Tracing Strategy**
904
+ > **See:** [CONFLICT-ANALYSIS.md C17](researches/CONFLICT-ANALYSIS.md#c17-sidekiq-job-trace-context--parent-request-trace-uc-010--uc-009) for detailed analysis
905
+ > **Problem:** Should Sidekiq jobs inherit parent trace_id or start new trace?
906
+ > **Solution:** Hybrid model - jobs start NEW trace but LINK to parent
907
+
908
+ #### 8.3.1. The Problem: Unbounded Traces
909
+
910
+ **When a web request enqueues a background job, two competing models exist:**
911
+
912
+ ```ruby
913
+ # Scenario:
914
+ # Web request (trace_id: abc-123) enqueues Sidekiq job
915
+
916
+ # Model A: Job INHERITS parent trace_id (same trace_id)
917
+ # Result: ONE continuous trace (request → job)
918
+ # Problem: Trace duration UNBOUNDED (job may run hours later!)
919
+ # Problem: SLO metrics SKEWED (trace includes async work)
920
+
921
+ # Model B: Job STARTS new trace_id (new trace)
922
+ # Result: TWO separate traces (request trace + job trace)
923
+ # Problem: Can't see full end-to-end flow in single trace
924
+ # Problem: Lost context (job doesn't know parent)
925
+ ```
926
+
927
+ **Architectural Trade-off:**
928
+ - ✅ **Model A (inherit):** Complete trace, easy debugging
929
+ - ❌ **Model A (inherit):** Unbounded duration, skewed SLOs
930
+ - ✅ **Model B (new trace):** Bounded traces, accurate SLOs
931
+ - ❌ **Model B (new trace):** Lost parent context, complex querying
932
+
933
+ #### 8.3.2. Decision: Hybrid Model (New Trace + Parent Link)
934
+
935
+ **Approved Solution:**
936
+ Jobs start **NEW trace** (`trace_id`) but **LINK to parent** (`parent_trace_id` field).
937
+
938
+ ```ruby
939
+ # lib/e11y/trace_context/job_strategy.rb
940
+ module E11y
941
+ module TraceContext
942
+ class JobStrategy
943
+ # Trace strategies for background jobs
944
+ STRATEGIES = {
945
+ # Job starts NEW trace, stores link to parent (RECOMMENDED)
946
+ start_new_with_link: -> (parent_context) {
947
+ {
948
+ trace_id: IDGenerator.generate_trace_id, # ← NEW trace!
949
+ span_id: IDGenerator.generate_span_id,
950
+ parent_trace_id: parent_context[:trace_id], # ← Link to parent
951
+ parent_span_id: parent_context[:span_id],
952
+ sampled: parent_context[:sampled], # Inherit sampling
953
+ baggage: parent_context[:baggage],
954
+ user_id: parent_context[:user_id],
955
+ tenant_id: parent_context[:tenant_id]
956
+ }
957
+ },
958
+
959
+ # Job INHERITS parent trace_id (same trace)
960
+ inherit_parent: -> (parent_context) {
961
+ {
962
+ trace_id: parent_context[:trace_id], # ← SAME trace
963
+ parent_span_id: parent_context[:span_id],
964
+ span_id: IDGenerator.generate_span_id, # New span
965
+ sampled: parent_context[:sampled],
966
+ baggage: parent_context[:baggage],
967
+ user_id: parent_context[:user_id],
968
+ tenant_id: parent_context[:tenant_id]
969
+ }
970
+ },
971
+
972
+ # Job starts NEW trace, NO link (isolated)
973
+ start_new_isolated: -> (parent_context) {
974
+ {
975
+ trace_id: IDGenerator.generate_trace_id, # ← NEW trace
976
+ span_id: IDGenerator.generate_span_id,
977
+ parent_trace_id: nil, # ← NO link
978
+ sampled: parent_context[:sampled], # Still inherit sampling
979
+ baggage: parent_context[:baggage],
980
+ user_id: parent_context[:user_id],
981
+ tenant_id: parent_context[:tenant_id]
982
+ }
983
+ }
984
+ }.freeze
985
+
986
+ # Apply strategy to create job trace context
987
+ def self.apply(strategy, parent_context)
988
+ strategy_fn = STRATEGIES.fetch(strategy) do
989
+ raise ArgumentError, "Unknown strategy: #{strategy}"
990
+ end
991
+
992
+ strategy_fn.call(parent_context)
993
+ end
994
+ end
995
+ end
996
+ end
997
+ ```
998
+
999
+ #### 8.3.3. SidekiqTraceMiddleware Implementation
1000
+
1001
+ **Sidekiq server middleware (job execution):**
1002
+
1003
+ ```ruby
1004
+ # lib/e11y/middleware/sidekiq_trace_middleware.rb
1005
+ module E11y
1006
+ module Middleware
1007
+ class SidekiqTraceMiddleware
1008
+ def call(worker, job, queue)
1009
+ # Extract parent context from job metadata
1010
+ parent_context = extract_parent_context(job)
1011
+
1012
+ # Determine trace strategy (default: start_new_with_link)
1013
+ strategy = worker.class.e11y_trace_strategy || :start_new_with_link
1014
+
1015
+ # Apply strategy to create job trace context
1016
+ job_context = E11y::TraceContext::JobStrategy.apply(
1017
+ strategy,
1018
+ parent_context
1019
+ )
1020
+
1021
+ # Set trace context for job execution
1022
+ E11y::Current.set(job_context)
1023
+
1024
+ # Track job execution start
1025
+ Events::JobStarted.track(
1026
+ job_class: worker.class.name,
1027
+ job_id: job['jid'],
1028
+ queue: queue,
1029
+ parent_trace_id: job_context[:parent_trace_id] # ← Link!
1030
+ )
1031
+
1032
+ yield
1033
+
1034
+ # Track job success
1035
+ Events::JobCompleted.track(
1036
+ job_class: worker.class.name,
1037
+ job_id: job['jid'],
1038
+ queue: queue
1039
+ )
1040
+ rescue => e
1041
+ # Track job failure
1042
+ Events::JobFailed.track(
1043
+ job_class: worker.class.name,
1044
+ job_id: job['jid'],
1045
+ queue: queue,
1046
+ error_class: e.class.name,
1047
+ error_message: e.message
1048
+ )
1049
+ raise
1050
+ ensure
1051
+ E11y::Current.reset
1052
+ end
1053
+
1054
+ private
1055
+
1056
+ def extract_parent_context(job)
1057
+ {
1058
+ trace_id: job['e11y_trace_id'],
1059
+ span_id: job['e11y_span_id'],
1060
+ sampled: job['e11y_sampled'],
1061
+ baggage: job['e11y_baggage'],
1062
+ user_id: job['e11y_user_id'],
1063
+ tenant_id: job['e11y_tenant_id']
1064
+ }.compact
1065
+ end
1066
+ end
1067
+ end
1068
+ end
1069
+
1070
+ # Configure Sidekiq server
1071
+ Sidekiq.configure_server do |config|
1072
+ config.server_middleware do |chain|
1073
+ chain.add E11y::Middleware::SidekiqTraceMiddleware
1074
+ end
1075
+ end
1076
+ ```
1077
+
1078
+ **Sidekiq client middleware (job enqueue):**
1079
+
1080
+ ```ruby
1081
+ # lib/e11y/middleware/sidekiq_client_middleware.rb
1082
+ module E11y
1083
+ module Middleware
1084
+ class SidekiqClientMiddleware
1085
+ def call(worker_class, job, queue, redis_pool)
1086
+ # Inject current trace context into job metadata
1087
+ if E11y::Current.traced?
1088
+ job['e11y_trace_id'] = E11y::Current.trace_id
1089
+ job['e11y_span_id'] = E11y::Current.span_id
1090
+ job['e11y_sampled'] = E11y::Current.sampled
1091
+ job['e11y_baggage'] = E11y::Current.baggage if E11y::Current.baggage&.any?
1092
+ job['e11y_user_id'] = E11y::Current.user_id if E11y::Current.user_id
1093
+ job['e11y_tenant_id'] = E11y::Current.tenant_id if E11y::Current.tenant_id
1094
+ end
1095
+
1096
+ yield
1097
+ end
1098
+ end
1099
+ end
1100
+ end
1101
+
1102
+ # Configure Sidekiq client
1103
+ Sidekiq.configure_client do |config|
1104
+ config.client_middleware do |chain|
1105
+ chain.add E11y::Middleware::SidekiqClientMiddleware
1106
+ end
1107
+ end
1108
+ ```
1109
+
1110
+ #### 8.3.4. Configuration
1111
+
1112
+ **Global default strategy:**
1113
+
1114
+ ```ruby
1115
+ # config/initializers/e11y.rb
1116
+ E11y.configure do |config|
1117
+ config.tracing do |tracing|
1118
+ # Default strategy for ALL jobs
1119
+ tracing.background_jobs.default_strategy = :start_new_with_link
1120
+
1121
+ # Alternative strategies:
1122
+ # - :inherit_parent (job uses same trace_id as parent)
1123
+ # - :start_new_isolated (job gets new trace, no link)
1124
+ end
1125
+ end
1126
+ ```
1127
+
1128
+ **Per-job strategy override:**
1129
+
1130
+ ```ruby
1131
+ # app/jobs/urgent_email_job.rb
1132
+ class UrgentEmailJob < ApplicationJob
1133
+ include Sidekiq::Job
1134
+
1135
+ # Override: Fast jobs (< 1 sec) can inherit parent trace
1136
+ e11y_trace_strategy :inherit_parent
1137
+
1138
+ def perform(order_id)
1139
+ # This job runs in SAME trace as parent request
1140
+ Events::EmailSent.track(order_id: order_id)
1141
+ end
1142
+ end
1143
+
1144
+ # app/jobs/batch_report_job.rb
1145
+ class BatchReportJob < ApplicationJob
1146
+ include Sidekiq::Job
1147
+
1148
+ # Override: Slow jobs (hours later) should start new trace
1149
+ e11y_trace_strategy :start_new_with_link # (default)
1150
+
1151
+ def perform(report_id)
1152
+ # This job runs in NEW trace, linked to parent
1153
+ Events::ReportGenerated.track(report_id: report_id)
1154
+ end
1155
+ end
1156
+ ```
1157
+
1158
+ #### 8.3.5. Querying Full Flow (Request → Job)
1159
+
1160
+ **How to reconstruct full end-to-end flow:**
1161
+
1162
+ ```ruby
1163
+ # Find parent request trace
1164
+ parent_trace = Trace.find_by(trace_id: 'abc-123')
1165
+
1166
+ # Find all child job traces (via parent_trace_id link)
1167
+ child_traces = Trace.where(parent_trace_id: 'abc-123')
1168
+
1169
+ # Result:
1170
+ # Parent trace: abc-123 (request)
1171
+ # → Child trace: xyz-789 (SendOrderEmailJob)
1172
+ # → Child trace: def-456 (ProcessPaymentJob)
1173
+
1174
+ # Query for full flow:
1175
+ SELECT * FROM events
1176
+ WHERE trace_id = 'abc-123' -- Parent request events
1177
+ OR parent_trace_id = 'abc-123' -- Child job events
1178
+ ORDER BY created_at;
1179
+ ```
1180
+
1181
+ **Example flow with hybrid model:**
1182
+
1183
+ ```ruby
1184
+ # 1. Web request (trace_id: abc-123)
1185
+ POST /orders
1186
+ → Events::OrderCreated (trace_id: abc-123, span_id: span-001)
1187
+ → Enqueue SendOrderEmailJob (metadata: {e11y_trace_id: 'abc-123'})
1188
+
1189
+ # 2. Sidekiq job execution (NEW trace_id: xyz-789)
1190
+ SendOrderEmailJob#perform
1191
+ → SidekiqTraceMiddleware applies :start_new_with_link strategy
1192
+ → NEW trace_id: xyz-789, parent_trace_id: abc-123
1193
+ → Events::JobStarted (trace_id: xyz-789, parent_trace_id: abc-123)
1194
+ → Events::EmailSent (trace_id: xyz-789, span_id: span-001)
1195
+ → Events::JobCompleted (trace_id: xyz-789)
1196
+
1197
+ # Result: TWO traces with LINK
1198
+ # Trace abc-123: OrderCreated (request)
1199
+ # Trace xyz-789: JobStarted, EmailSent, JobCompleted (linked via parent_trace_id)
1200
+ ```
1201
+
1202
+ #### 8.3.6. Schema Changes
1203
+
1204
+ **Add `parent_trace_id` field to events table:**
1205
+
1206
+ ```ruby
1207
+ # db/migrate/XXXXXX_add_parent_trace_id_to_events.rb
1208
+ class AddParentTraceIdToEvents < ActiveRecord::Migration[8.0]
1209
+ def change
1210
+ add_column :events, :parent_trace_id, :string, limit: 32, null: true
1211
+ add_index :events, :parent_trace_id
1212
+
1213
+ # For querying full flow: WHERE trace_id = X OR parent_trace_id = X
1214
+ add_index :events, [:trace_id, :parent_trace_id]
1215
+ end
1216
+ end
1217
+ ```
1218
+
1219
+ **Update Event base class:**
1220
+
1221
+ ```ruby
1222
+ # lib/e11y/event.rb
1223
+ module E11y
1224
+ class Event
1225
+ attribute :parent_trace_id, :string # ← NEW field
1226
+
1227
+ # Auto-populate from E11y::Current
1228
+ def initialize(attributes = {})
1229
+ super
1230
+
1231
+ self.trace_id ||= E11y::Current.trace_id
1232
+ self.span_id ||= E11y::Current.span_id
1233
+ self.parent_trace_id ||= E11y::Current.parent_trace_id # ← NEW!
1234
+ self.user_id ||= E11y::Current.user_id
1235
+ self.tenant_id ||= E11y::Current.tenant_id
1236
+ end
1237
+ end
1238
+ end
1239
+ ```
1240
+
1241
+ #### 8.3.7. Trade-offs (C17 Resolution)
1242
+
1243
+ | Aspect | Hybrid Model (start_new_with_link) | Inherit Parent | Start New Isolated |
1244
+ |--------|-------------------------------------|----------------|--------------------|
1245
+ | **Trace Boundaries** | ✅ Clear (request vs job) | ❌ Unbounded (spans hours) | ✅ Clear (no link) |
1246
+ | **SLO Accuracy** | ✅ Accurate (separate latencies) | ❌ Skewed (includes job time) | ✅ Accurate |
1247
+ | **End-to-End Visibility** | ✅ Can reconstruct (via link) | ✅ Single trace view | ❌ Lost (no link) |
1248
+ | **Querying Complexity** | ⚠️ Must follow links (JOIN) | ✅ Simple (single trace_id) | ✅ Simple (isolated) |
1249
+ | **Storage Cost** | ⚠️ Two trace IDs to store | ✅ Single trace_id | ✅ Single trace_id |
1250
+ | **Use Case** | ✅ **RECOMMENDED (default)** | ⚠️ Fast jobs only (< 1s) | ⚠️ Isolated jobs only |
1251
+
1252
+ **Why Hybrid Model is Default:**
1253
+ 1. ✅ **Clear trace boundaries** - Request SLO ≠ Job SLO
1254
+ 2. ✅ **Accurate metrics** - Can measure request latency separately from job latency
1255
+ 3. ✅ **Bounded traces** - Traces have clear start/end (not hours long)
1256
+ 4. ✅ **Still linked** - Can reconstruct full flow via `parent_trace_id`
1257
+ 5. ✅ **Flexible** - Can override per-job if needed
1258
+
1259
+ **Related Conflicts:**
1260
+ - **C05:** Trace-aware sampling (see ADR-009 §3.6)
1261
+ - **C11:** Stratified sampling (see ADR-009 §3.7)
1262
+ - **UC-010:** Background Job Tracking
1263
+ - **UC-009:** Multi-Service Tracing
1264
+
1265
+ ---
1266
+
1267
+ ## 9. Trade-offs
1268
+
1269
+ ### 9.1. Key Decisions
1270
+
1271
+ | Decision | Pro | Con | Rationale |
1272
+ |----------|-----|-----|-----------|
1273
+ | **ActiveSupport::CurrentAttributes** | Rails-native, thread-safe | Rails dependency | Perfect fit for Rails 8+ |
1274
+ | **W3C Trace Context** | Industry standard | More complex than UUID | Future-proof, interop |
1275
+ | **128-bit trace ID** | No collisions | Longer strings | W3C requirement |
1276
+ | **Trace-consistent sampling** | Distributed traces work | Complex propagation | Critical for multi-service |
1277
+ | **Auto-enrich events** | Zero boilerplate | Implicit behavior | DX > explicit |
1278
+ | **Baggage propagation** | Flexible metadata | Size overhead | Limited use, opt-in |
1279
+ | **Manual spans** | Simple | Less automation | v1.0 scope |
1280
+ | **Hybrid job tracing (C17)** ⚠️ | Clear boundaries, accurate SLOs | More complex queries | Prevents unbounded traces |
1281
+
1282
+ ### 9.2. Alternatives Considered
1283
+
1284
+ **A) Global variables for context**
1285
+ - ❌ Rejected: Not thread-safe
1286
+
1287
+ **B) OpenTelemetry SDK**
1288
+ - ❌ Rejected for v1.0: Too heavy, see ADR-007
1289
+
1290
+ **C) UUID v4 for trace ID**
1291
+ - ❌ Rejected: Not W3C compliant
1292
+
1293
+ **D) Automatic span creation**
1294
+ - ❌ Rejected for v1.0: Complexity, performance
1295
+
1296
+ **E) Context stored in Database**
1297
+ - ❌ Rejected: Too slow, high overhead
1298
+
1299
+ ---
1300
+
1301
+ ## 10. Configuration Reference
1302
+
1303
+ ```ruby
1304
+ # config/initializers/e11y.rb
1305
+ E11y.configure do |config|
1306
+ config.tracing do
1307
+ # Enable tracing
1308
+ enabled true
1309
+
1310
+ # Trace ID generation
1311
+ id_generator :secure_random # or :uuid, :custom
1312
+
1313
+ # W3C Trace Context
1314
+ w3c_trace_context do
1315
+ enabled true
1316
+ version '00'
1317
+
1318
+ # Legacy header support (fallback)
1319
+ support_legacy_headers true
1320
+ legacy_headers ['X-Request-ID', 'X-Trace-ID', 'X-Correlation-ID']
1321
+ end
1322
+
1323
+ # Sampling
1324
+ sampling do
1325
+ default_sample_rate 0.1
1326
+
1327
+ per_event_sample_rates do
1328
+ event 'payment.*', sample_rate: 1.0
1329
+ event 'health_check', sample_rate: 0.01
1330
+ end
1331
+
1332
+ debug_users [123, 456]
1333
+ respect_parent_sampling true
1334
+ end
1335
+
1336
+ # Context propagation
1337
+ propagation do
1338
+ # HTTP outgoing requests
1339
+ http do
1340
+ enabled true
1341
+ inject_headers ['traceparent', 'tracestate', 'X-Request-ID']
1342
+ end
1343
+
1344
+ # Background jobs
1345
+ jobs do
1346
+ enabled true
1347
+ propagate_baggage true
1348
+ propagate_user_context true
1349
+ end
1350
+
1351
+ # Structured logs
1352
+ logs do
1353
+ enabled true
1354
+ fields [:trace_id, :span_id, :user_id]
1355
+ end
1356
+ end
1357
+
1358
+ # Baggage (optional metadata)
1359
+ baggage do
1360
+ enabled true
1361
+ max_size 1024 # bytes
1362
+ max_entries 10
1363
+ end
1364
+ end
1365
+ end
1366
+ ```
1367
+
1368
+ ---
1369
+
1370
+ **Status:** ✅ Draft Complete
1371
+ **Next:** ADR-011 (Testing Strategy) or ADR-012 (Event Evolution)
1372
+ **Estimated Implementation:** 2 weeks