e11y 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +130 -10
  3. data/CHANGELOG.md +56 -1
  4. data/CLAUDE.md +168 -0
  5. data/CONTRIBUTING.md +640 -0
  6. data/README.md +134 -702
  7. data/RELEASE.md +18 -3
  8. data/Rakefile +108 -29
  9. data/config/README.md +1 -1
  10. data/config/loki-local-config.yaml +12 -0
  11. data/config/otel-collector-config.yaml +44 -0
  12. data/cucumber.yml +1 -0
  13. data/docker-compose.yml +18 -2
  14. data/docs/ADAPTERS.md +76 -0
  15. data/docs/ADAPTIVE_SAMPLING.md +59 -0
  16. data/docs/COMPARISON.md +104 -0
  17. data/docs/CONFIGURATION.md +52 -0
  18. data/docs/DISTRIBUTED_TRACING.md +44 -0
  19. data/docs/LIMITATIONS.md +13 -0
  20. data/docs/METRICS_DSL.md +84 -0
  21. data/docs/PERFORMANCE.md +60 -0
  22. data/docs/PII_FILTERING.md +40 -0
  23. data/docs/PRESETS.md +65 -0
  24. data/docs/QUICK-START.md +546 -587
  25. data/docs/RAILS_INTEGRATION.md +29 -0
  26. data/docs/SCHEMA_VALIDATION.md +63 -0
  27. data/docs/SLO-PROMQL-ALERTS.md +161 -0
  28. data/docs/TESTING.md +69 -0
  29. data/docs/{ADR-001-architecture.md → architecture/ADR-001-architecture.md} +35 -64
  30. data/docs/{ADR-002-metrics-yabeda.md → architecture/ADR-002-metrics-yabeda.md} +62 -236
  31. data/docs/{ADR-003-slo-observability.md → architecture/ADR-003-slo-observability.md} +27 -466
  32. data/docs/{ADR-004-adapter-architecture.md → architecture/ADR-004-adapter-architecture.md} +163 -146
  33. data/docs/{ADR-005-tracing-context.md → architecture/ADR-005-tracing-context.md} +10 -9
  34. data/docs/{ADR-006-security-compliance.md → architecture/ADR-006-security-compliance.md} +184 -191
  35. data/docs/{ADR-007-opentelemetry-integration.md → architecture/ADR-007-opentelemetry-integration.md} +3 -21
  36. data/docs/{ADR-008-rails-integration.md → architecture/ADR-008-rails-integration.md} +209 -339
  37. data/docs/{ADR-009-cost-optimization.md → architecture/ADR-009-cost-optimization.md} +45 -54
  38. data/docs/architecture/ADR-010-developer-experience.md +522 -0
  39. data/docs/{ADR-011-testing-strategy.md → architecture/ADR-011-testing-strategy.md} +41 -83
  40. data/docs/{ADR-013-reliability-error-handling.md → architecture/ADR-013-reliability-error-handling.md} +37 -12
  41. data/docs/{ADR-014-event-driven-slo.md → architecture/ADR-014-event-driven-slo.md} +12 -24
  42. data/docs/{ADR-015-middleware-order.md → architecture/ADR-015-middleware-order.md} +23 -41
  43. data/docs/{ADR-016-self-monitoring-slo.md → architecture/ADR-016-self-monitoring-slo.md} +52 -349
  44. data/docs/{ADR-017-multi-rails-compatibility.md → architecture/ADR-017-multi-rails-compatibility.md} +4 -11
  45. data/docs/architecture/ADR-018-memory-optimization.md +366 -0
  46. data/docs/{ADR-INDEX.md → architecture/ADR-INDEX.md} +11 -6
  47. data/docs/{00-ICP-AND-TIMELINE.md → prd/00-ICP-AND-TIMELINE.md} +6 -6
  48. data/docs/{01-SCALE-REQUIREMENTS.md → prd/01-SCALE-REQUIREMENTS.md} +6 -6
  49. data/docs/prd/01-overview-vision.md +19 -14
  50. data/docs/use_cases/README.md +22 -23
  51. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +50 -44
  52. data/docs/use_cases/UC-002-business-event-tracking.md +26 -95
  53. data/docs/use_cases/UC-003-event-metrics.md +66 -0
  54. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +42 -101
  55. data/docs/use_cases/UC-005-sentry-integration.md +13 -15
  56. data/docs/use_cases/UC-006-trace-context-management.md +30 -28
  57. data/docs/use_cases/UC-007-pii-filtering.md +35 -87
  58. data/docs/use_cases/UC-008-opentelemetry-integration.md +51 -89
  59. data/docs/use_cases/UC-009-multi-service-tracing.md +4 -4
  60. data/docs/use_cases/UC-010-background-job-tracking.md +5 -5
  61. data/docs/use_cases/UC-011-rate-limiting.md +95 -168
  62. data/docs/use_cases/UC-012-audit-trail.md +21 -46
  63. data/docs/use_cases/UC-013-high-cardinality-protection.md +29 -167
  64. data/docs/use_cases/UC-014-adaptive-sampling.md +2 -2
  65. data/docs/use_cases/UC-015-cost-optimization.md +46 -99
  66. data/docs/use_cases/UC-016-rails-logger-migration.md +39 -213
  67. data/docs/use_cases/UC-017-local-development.md +203 -777
  68. data/docs/use_cases/UC-018-testing-events.md +3 -3
  69. data/docs/use_cases/UC-019-retention-based-routing.md +53 -106
  70. data/docs/use_cases/UC-020-event-versioning.md +8 -9
  71. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +18 -22
  72. data/docs/use_cases/UC-022-event-registry.md +15 -21
  73. data/docs/use_cases/backlog.md +119 -87
  74. data/e11y.gemspec +2 -2
  75. data/gems/e11y-devtools/README.md +136 -0
  76. data/gems/e11y-devtools/config/routes.rb +8 -0
  77. data/gems/e11y-devtools/e11y-devtools.gemspec +25 -0
  78. data/gems/e11y-devtools/exe/e11y +34 -0
  79. data/gems/e11y-devtools/lib/e11y/devtools/mcp/server.rb +96 -0
  80. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tool_base.rb +25 -0
  81. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/clear.rb +31 -0
  82. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/errors.rb +35 -0
  83. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/event_detail.rb +33 -0
  84. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/events_by_trace.rb +33 -0
  85. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/interactions.rb +40 -0
  86. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/recent_events.rb +34 -0
  87. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/search.rb +34 -0
  88. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/stats.rb +30 -0
  89. data/gems/e11y-devtools/lib/e11y/devtools/overlay/assets/overlay.js +115 -0
  90. data/gems/e11y-devtools/lib/e11y/devtools/overlay/controller.rb +54 -0
  91. data/gems/e11y-devtools/lib/e11y/devtools/overlay/engine.rb +26 -0
  92. data/gems/e11y-devtools/lib/e11y/devtools/overlay/middleware.rb +80 -0
  93. data/gems/e11y-devtools/lib/e11y/devtools/overlay/rails_controller.rb +42 -0
  94. data/gems/e11y-devtools/lib/e11y/devtools/tui/app.rb +262 -0
  95. data/gems/e11y-devtools/lib/e11y/devtools/tui/grouping.rb +66 -0
  96. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_detail.rb +62 -0
  97. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_list.rb +70 -0
  98. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/interaction_list.rb +47 -0
  99. data/gems/e11y-devtools/lib/e11y/devtools/version.rb +8 -0
  100. data/gems/e11y-devtools/lib/e11y/devtools.rb +13 -0
  101. data/gems/e11y-devtools/spec/e11y/devtools/mcp/tools_spec.rb +107 -0
  102. data/gems/e11y-devtools/spec/e11y/devtools/overlay/controller_spec.rb +58 -0
  103. data/gems/e11y-devtools/spec/e11y/devtools/overlay/middleware_spec.rb +46 -0
  104. data/gems/e11y-devtools/spec/e11y/devtools/tui/app_spec.rb +85 -0
  105. data/gems/e11y-devtools/spec/e11y/devtools/tui/grouping_spec.rb +64 -0
  106. data/gems/e11y-devtools/spec/spec_helper.rb +5 -0
  107. data/gems/e11y-devtools/spec/tui/widgets/event_list_spec.rb +44 -0
  108. data/gems/e11y-devtools/spec/tui/widgets/interaction_list_spec.rb +62 -0
  109. data/lib/e11y/adapters/audit_encrypted.rb +53 -11
  110. data/lib/e11y/adapters/base.rb +33 -34
  111. data/lib/e11y/adapters/dev_log/file_store.rb +143 -0
  112. data/lib/e11y/adapters/dev_log/query.rb +219 -0
  113. data/lib/e11y/adapters/dev_log.rb +118 -0
  114. data/lib/e11y/adapters/file.rb +3 -6
  115. data/lib/e11y/adapters/in_memory.rb +52 -5
  116. data/lib/e11y/adapters/in_memory_test.rb +29 -0
  117. data/lib/e11y/adapters/loki.rb +58 -23
  118. data/lib/e11y/adapters/null.rb +82 -0
  119. data/lib/e11y/adapters/opentelemetry_collector.rb +183 -0
  120. data/lib/e11y/adapters/otel_logs.rb +136 -23
  121. data/lib/e11y/adapters/sentry.rb +4 -7
  122. data/lib/e11y/adapters/stdout.rb +73 -7
  123. data/lib/e11y/adapters/yabeda.rb +153 -29
  124. data/lib/e11y/buffers/adaptive_buffer.rb +3 -17
  125. data/lib/e11y/buffers/{request_scoped_buffer.rb → ephemeral_buffer.rb} +72 -58
  126. data/lib/e11y/buffers/ring_buffer.rb +3 -16
  127. data/lib/e11y/configuration.rb +272 -0
  128. data/lib/e11y/console.rb +10 -17
  129. data/lib/e11y/current.rb +53 -1
  130. data/lib/e11y/debug/pipeline_inspector.rb +96 -0
  131. data/lib/e11y/documentation/generator.rb +48 -0
  132. data/lib/e11y/event/base.rb +176 -82
  133. data/lib/e11y/event/value_sampling_config.rb +1 -5
  134. data/lib/e11y/events/rails/database/query.rb +1 -4
  135. data/lib/e11y/events/rails/job/failed.rb +2 -0
  136. data/lib/e11y/instruments/active_job.rb +46 -12
  137. data/lib/e11y/instruments/rails_instrumentation.rb +49 -24
  138. data/lib/e11y/instruments/sidekiq.rb +137 -31
  139. data/lib/e11y/linters/base.rb +11 -0
  140. data/lib/e11y/linters/pii/pii_declaration_linter.rb +120 -0
  141. data/lib/e11y/linters/slo/config_consistency_linter.rb +76 -0
  142. data/lib/e11y/linters/slo/explicit_declaration_linter.rb +36 -0
  143. data/lib/e11y/linters/slo/slo_status_from_linter.rb +41 -0
  144. data/lib/e11y/logger/bridge.rb +26 -7
  145. data/lib/e11y/metrics/cardinality_protection.rb +10 -15
  146. data/lib/e11y/metrics/cardinality_tracker.rb +16 -6
  147. data/lib/e11y/metrics/registry.rb +3 -5
  148. data/lib/e11y/metrics/test_backend.rb +62 -0
  149. data/lib/e11y/metrics.rb +56 -10
  150. data/lib/e11y/middleware/adapter_resolver.rb +40 -0
  151. data/lib/e11y/middleware/audit_signing.rb +43 -6
  152. data/lib/e11y/middleware/baggage_protection.rb +75 -0
  153. data/lib/e11y/middleware/dev_log_source.rb +24 -0
  154. data/lib/e11y/middleware/event_slo.rb +23 -9
  155. data/lib/e11y/middleware/otel_span.rb +23 -0
  156. data/lib/e11y/middleware/pii_filter.rb +104 -75
  157. data/lib/e11y/middleware/rate_limiting.rb +54 -27
  158. data/lib/e11y/middleware/request.rb +70 -23
  159. data/lib/e11y/middleware/routing.rb +78 -21
  160. data/lib/e11y/middleware/sampling.rb +66 -17
  161. data/lib/e11y/middleware/self_monitoring_emit.rb +39 -0
  162. data/lib/e11y/middleware/trace_context.rb +45 -10
  163. data/lib/e11y/middleware/track_latency.rb +34 -0
  164. data/lib/e11y/middleware/validation.rb +7 -16
  165. data/lib/e11y/middleware/versioning.rb +26 -22
  166. data/lib/e11y/opentelemetry/semantic_conventions.rb +109 -0
  167. data/lib/e11y/opentelemetry/span_creator.rb +142 -0
  168. data/lib/e11y/pii/patterns.rb +12 -1
  169. data/lib/e11y/pipeline/builder.rb +1 -1
  170. data/lib/e11y/presets/audit_event.rb +13 -2
  171. data/lib/e11y/railtie.rb +52 -15
  172. data/lib/e11y/registry.rb +306 -0
  173. data/lib/e11y/reliability/circuit_breaker.rb +19 -21
  174. data/lib/e11y/reliability/dlq/base.rb +71 -0
  175. data/lib/e11y/reliability/dlq/file_adapter.rb +301 -0
  176. data/lib/e11y/reliability/dlq/file_storage.rb +63 -34
  177. data/lib/e11y/reliability/dlq/filter.rb +37 -54
  178. data/lib/e11y/reliability/retry_handler.rb +26 -29
  179. data/lib/e11y/reliability/retry_rate_limiter.rb +3 -11
  180. data/lib/e11y/sampling/error_spike_detector.rb +0 -2
  181. data/lib/e11y/sampling/load_monitor.rb +5 -9
  182. data/lib/e11y/sampling/stratified_tracker.rb +18 -0
  183. data/lib/e11y/self_monitoring/buffer_monitor.rb +2 -0
  184. data/lib/e11y/self_monitoring/performance_monitor.rb +19 -61
  185. data/lib/e11y/self_monitoring/reliability_monitor.rb +4 -74
  186. data/lib/e11y/slo/config_loader.rb +40 -0
  187. data/lib/e11y/slo/config_validator.rb +58 -0
  188. data/lib/e11y/slo/dashboard_generator.rb +122 -0
  189. data/lib/e11y/slo/event_driven.rb +8 -0
  190. data/lib/e11y/slo/tracker.rb +31 -4
  191. data/lib/e11y/testing/have_tracked_event_matcher.rb +190 -0
  192. data/lib/e11y/testing/rspec_matchers.rb +21 -0
  193. data/lib/e11y/testing/snapshot_matcher.rb +86 -0
  194. data/lib/e11y/trace_context/sampler.rb +35 -0
  195. data/lib/e11y/tracing/faraday_middleware.rb +31 -0
  196. data/lib/e11y/tracing/net_http_patch.rb +33 -0
  197. data/lib/e11y/tracing/propagator.rb +116 -0
  198. data/lib/e11y/tracing.rb +47 -0
  199. data/lib/e11y/version.rb +1 -1
  200. data/lib/e11y/versioning/version_extractor.rb +32 -0
  201. data/lib/e11y.rb +141 -265
  202. data/lib/generators/e11y/event/event_generator.rb +22 -0
  203. data/lib/generators/e11y/event/templates/event.rb.tt +16 -0
  204. data/lib/generators/e11y/grafana_dashboard/grafana_dashboard_generator.rb +30 -0
  205. data/lib/generators/e11y/grafana_dashboard/templates/e11y_dashboard.json +81 -0
  206. data/lib/generators/e11y/install/install_generator.rb +34 -0
  207. data/lib/generators/e11y/install/templates/e11y.rb +239 -0
  208. data/lib/generators/e11y/prometheus_alerts/prometheus_alerts_generator.rb +29 -0
  209. data/lib/generators/e11y/prometheus_alerts/templates/e11y_alerts.yml +28 -0
  210. data/lib/tasks/e11y_docs.rake +30 -0
  211. data/lib/tasks/e11y_events.rake +71 -0
  212. data/lib/tasks/e11y_lint.rake +91 -0
  213. data/lib/tasks/e11y_slo.rake +29 -0
  214. metadata +129 -39
  215. data/docs/ADR-010-developer-experience.md +0 -2166
  216. data/docs/API-REFERENCE-L28.md +0 -914
  217. data/docs/COMPREHENSIVE-CONFIGURATION.md +0 -2366
  218. data/docs/CONTRIBUTING.md +0 -312
  219. data/docs/IMPLEMENTATION_NOTES.md +0 -2804
  220. data/docs/IMPLEMENTATION_PLAN.md +0 -1971
  221. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +0 -586
  222. data/docs/PLAN.md +0 -148
  223. data/docs/README.md +0 -296
  224. data/docs/design/00-memory-optimization.md +0 -593
  225. data/docs/guides/MIGRATION-L27-L28.md +0 -692
  226. data/docs/guides/PERFORMANCE-BENCHMARKS.md +0 -434
  227. data/docs/guides/README.md +0 -44
  228. data/docs/use_cases/UC-003-pattern-based-metrics.md +0 -1627
  229. data/lib/e11y/adapters/registry.rb +0 -141
  230. /data/docs/{ADR-012-event-evolution.md → architecture/ADR-012-event-evolution.md} +0 -0
@@ -29,13 +29,18 @@ module E11y
29
29
  # Store current trace as parent (job will create NEW trace)
30
30
  job.e11y_parent_trace_id = E11y::Current.trace_id if E11y::Current.trace_id
31
31
  job.e11y_parent_span_id = E11y::Current.span_id if E11y::Current.span_id
32
+ job.e11y_sampled = E11y::Current.sampled if E11y::Current.respond_to?(:sampled) && !E11y::Current.sampled.nil?
33
+ if E11y::Current.respond_to?(:baggage) && E11y::Current.baggage&.any?
34
+ filtered = E11y::Tracing::Propagator.filter_baggage_for_propagation(E11y::Current.baggage)
35
+ job.e11y_baggage = filtered if filtered.any?
36
+ end
32
37
  end
33
38
 
34
39
  # Set up job-scoped context around job execution (C17 Hybrid Tracing + C18 Non-Failing)
35
40
  around_perform do |job, block|
36
41
  # C18: Disable fail_on_error for jobs (observability should not block business logic)
37
- original_fail_on_error = E11y.config.error_handling.fail_on_error
38
- E11y.config.error_handling.fail_on_error = false
42
+ original_fail_on_error = E11y.config.error_handling_fail_on_error
43
+ E11y.config.error_handling_fail_on_error = false
39
44
 
40
45
  setup_job_context_active_job(job)
41
46
  setup_job_buffer_active_job
@@ -59,7 +64,7 @@ module E11y
59
64
  cleanup_job_context_active_job
60
65
 
61
66
  # Restore original setting
62
- E11y.config.error_handling.fail_on_error = original_fail_on_error
67
+ E11y.config.error_handling_fail_on_error = original_fail_on_error
63
68
  end
64
69
  end
65
70
 
@@ -79,13 +84,28 @@ module E11y
79
84
  E11y::Current.span_id = span_id
80
85
  E11y::Current.parent_trace_id = parent_trace_id
81
86
  E11y::Current.request_id = job.job_id
87
+ E11y::Current.baggage = job.e11y_baggage if job.respond_to?(:e11y_baggage) && job.e11y_baggage.is_a?(Hash)
88
+
89
+ # Restore or compute sampling decision (ADR-005 §7)
90
+ if job.respond_to?(:e11y_sampled) && !job.e11y_sampled.nil?
91
+ E11y::Current.sampled = job.e11y_sampled
92
+ else
93
+ require "e11y/trace_context/sampler"
94
+ ctx = E11y::Current.to_context.merge(
95
+ job_class: job.class.name,
96
+ queue: job.queue_name
97
+ ).compact
98
+ E11y::Current.sampled = E11y::TraceContext::Sampler.should_sample?(ctx)
99
+ end
82
100
  end
83
101
 
84
102
  # Setup job-scoped buffer
85
103
  def setup_job_buffer_active_job
86
- return unless E11y.config.request_buffer&.enabled
104
+ return unless E11y.config.ephemeral_buffer_enabled
87
105
 
88
- E11y::Buffers::RequestScopedBuffer.initialize!
106
+ limit = E11y.config.ephemeral_buffer_job_buffer_limit ||
107
+ E11y::Buffers::EphemeralBuffer::DEFAULT_BUFFER_LIMIT
108
+ E11y::Buffers::EphemeralBuffer.initialize!(buffer_limit: limit)
89
109
  rescue StandardError => e
90
110
  # C18: Don't fail job if buffer setup fails
91
111
  warn "[E11y] Failed to start job buffer: #{e.message}"
@@ -93,9 +113,9 @@ module E11y
93
113
 
94
114
  # Handle job error (C18: Non-Failing Event Tracking)
95
115
  def handle_job_error_active_job(_error)
96
- return unless E11y.config.request_buffer&.enabled
116
+ return unless E11y.config.ephemeral_buffer_enabled
97
117
 
98
- E11y::Buffers::RequestScopedBuffer.flush_on_error
118
+ E11y::Buffers::EphemeralBuffer.flush_on_error
99
119
  rescue StandardError => e
100
120
  # C18: Don't fail job if buffer flush fails
101
121
  warn "[E11y] Failed to flush job buffer on error: #{e.message}"
@@ -104,9 +124,9 @@ module E11y
104
124
  # Cleanup job-scoped context
105
125
  def cleanup_job_context_active_job
106
126
  # Flush buffer on success (not on error, already flushed in rescue)
107
- if !$ERROR_INFO && E11y.config.request_buffer&.enabled
127
+ if !$ERROR_INFO && E11y.config.ephemeral_buffer_enabled
108
128
  begin
109
- E11y::Buffers::RequestScopedBuffer.discard
129
+ E11y::Buffers::EphemeralBuffer.discard
110
130
  rescue StandardError => e
111
131
  # C18: Don't fail job if buffer flush fails
112
132
  warn "[E11y] Failed to flush job buffer: #{e.message}"
@@ -139,10 +159,9 @@ module E11y
139
159
  # @param start_time [Time] Job start time
140
160
  # @return [void]
141
161
  # @api private
142
- # rubocop:disable Metrics/AbcSize
143
162
  # SLO tracking requires config check, duration calculation, method call, and error handling
144
163
  def track_job_slo_active_job(job, status, start_time)
145
- return unless E11y.config.slo_tracking&.enabled
164
+ return unless E11y.config.slo_tracking_enabled
146
165
 
147
166
  duration_ms = ((Time.now - start_time) * 1000).round(2)
148
167
 
@@ -157,7 +176,6 @@ module E11y
157
176
  # C18: Don't fail if SLO tracking fails
158
177
  E11y.logger.warn("[E11y] SLO tracking error: #{e.message}", error: e.class.name)
159
178
  end
160
- # rubocop:enable Metrics/AbcSize
161
179
  end
162
180
 
163
181
  # Custom attribute accessors for trace context (C17 Hybrid Tracing)
@@ -195,6 +213,22 @@ module E11y
195
213
  def e11y_span_id=(value)
196
214
  @e11y_span_id = value
197
215
  end
216
+
217
+ def e11y_sampled
218
+ @e11y_sampled
219
+ end
220
+
221
+ def e11y_sampled=(value)
222
+ @e11y_sampled = value
223
+ end
224
+
225
+ def e11y_baggage
226
+ @e11y_baggage
227
+ end
228
+
229
+ def e11y_baggage=(value)
230
+ @e11y_baggage = value
231
+ end
198
232
  end
199
233
  end
200
234
  end
@@ -10,15 +10,14 @@ module E11y
10
10
  # **Unidirectional Flow:** ASN → E11y
11
11
  #
12
12
  # @example Basic usage
13
- # # Automatically enabled by E11y::Railtie if config.rails_instrumentation.enabled = true
13
+ # # Automatically enabled by E11y::Railtie if config.rails_instrumentation_enabled = true
14
14
  # E11y::Instruments::RailsInstrumentation.setup!
15
15
  #
16
16
  # @example Custom event mapping
17
17
  # E11y.configure do |config|
18
- # config.rails_instrumentation do
19
- # event_class_for 'sql.active_record', MyApp::CustomQueryEvent
20
- # ignore_event 'cache_read.active_support'
21
- # end
18
+ # config.rails_instrumentation_enabled = true
19
+ # config.rails_instrumentation_custom_mappings['sql.active_record'] = MyApp::CustomQueryEvent
20
+ # config.rails_instrumentation_ignore_events << 'cache_read.active_support'
22
21
  # end
23
22
  #
24
23
  # @see ADR-008 §4.1 (Unidirectional Flow ASN → E11y)
@@ -41,6 +40,7 @@ module E11y
41
40
  "enqueue.active_job" => "E11y::Events::Rails::Job::Enqueued",
42
41
  "enqueue_at.active_job" => "E11y::Events::Rails::Job::Scheduled",
43
42
  "perform_start.active_job" => "E11y::Events::Rails::Job::Started",
43
+ # perform.active_job: Completed on success, Failed on exception (routed in track_rails_event)
44
44
  "perform.active_job" => "E11y::Events::Rails::Job::Completed"
45
45
  }.freeze
46
46
 
@@ -50,7 +50,7 @@ module E11y
50
50
  #
51
51
  # @return [void]
52
52
  def self.setup!
53
- return unless E11y.config.rails_instrumentation&.enabled
53
+ return unless E11y.config.rails_instrumentation_enabled
54
54
 
55
55
  # Subscribe to each configured event pattern
56
56
  event_mapping.each do |asn_pattern, e11y_event_class_name|
@@ -81,25 +81,50 @@ module E11y
81
81
  # # Result: { controller: "Users", action: "index" } - password filtered by schema
82
82
  def self.subscribe_to_event(asn_pattern, e11y_event_class_name)
83
83
  ActiveSupport::Notifications.subscribe(asn_pattern) do |name, start, finish, _id, payload|
84
- # Convert ASN event → E11y event
85
- duration = (finish - start) * 1000 # Convert to milliseconds
84
+ track_rails_event(name, start, finish, payload, e11y_event_class_name)
85
+ rescue StandardError => e
86
+ warn "[E11y] Failed to track Rails event #{name}: #{e.message}"
87
+ end
88
+ end
86
89
 
87
- # Resolve event class (string → constant)
90
+ def self.track_rails_event(name, start, finish, payload, e11y_event_class_name)
91
+ duration = (finish - start) * 1000
92
+ extracted_payload = extract_job_info_from_object(payload)
93
+
94
+ # perform.active_job: route to Failed when job raised exception
95
+ if name == "perform.active_job" && job_failed?(payload)
96
+ e11y_event_class = resolve_event_class("E11y::Events::Rails::Job::Failed")
97
+ extracted_payload = extracted_payload.merge(extract_job_exception_info(payload))
98
+ else
88
99
  e11y_event_class = resolve_event_class(e11y_event_class_name)
89
- next unless e11y_event_class
100
+ extracted_payload = extracted_payload.merge(severity: :error) if process_action_error?(name, payload)
101
+ end
90
102
 
91
- # Extract job info from job object if present (ActiveJob events)
92
- extracted_payload = extract_job_info_from_object(payload)
103
+ return unless e11y_event_class
93
104
 
94
- # Track E11y event - schema will filter relevant fields
95
- e11y_event_class.track(
96
- event_name: name,
97
- duration: duration,
98
- **extracted_payload # Pass all payload, schema filters
99
- )
100
- rescue StandardError => e
101
- # Don't crash the app if event tracking fails
102
- warn "[E11y] Failed to track Rails event #{name}: #{e.message}"
105
+ e11y_event_class.track(event_name: name, duration: duration, **extracted_payload)
106
+ end
107
+
108
+ def self.process_action_error?(name, payload)
109
+ name == "process_action.action_controller" && (payload[:exception] || payload["exception"])
110
+ end
111
+
112
+ def self.job_failed?(payload)
113
+ payload[:exception].present? || payload["exception"].present?
114
+ end
115
+
116
+ # Extract error_class and error_message from ActiveJob exception payload.
117
+ # Rails passes exception as ["ErrorClass", "message"] or exception_object.
118
+ def self.extract_job_exception_info(payload)
119
+ ex = payload[:exception] || payload["exception"]
120
+ return {} unless ex
121
+
122
+ if ex.is_a?(Array) && ex.size >= 2
123
+ { error_class: ex[0].to_s, error_message: ex[1].to_s }
124
+ elsif ex.respond_to?(:class) && ex.respond_to?(:message)
125
+ { error_class: ex.class.name, error_message: ex.message.to_s }
126
+ else
127
+ {}
103
128
  end
104
129
  end
105
130
 
@@ -110,9 +135,9 @@ module E11y
110
135
  mapping = DEFAULT_RAILS_EVENT_MAPPING.dup
111
136
 
112
137
  # Apply custom mappings from config (Devise-style overrides)
113
- custom_mappings = E11y.config.rails_instrumentation&.custom_mappings || {}
138
+ custom_mappings = E11y.config.rails_instrumentation_custom_mappings || {}
114
139
  custom_mappings.each do |pattern, event_class|
115
- mapping[pattern] = event_class.name
140
+ mapping[pattern] = event_class.respond_to?(:name) ? event_class.name : event_class.to_s
116
141
  end
117
142
 
118
143
  mapping
@@ -123,7 +148,7 @@ module E11y
123
148
  # @param pattern [String] ASN event pattern
124
149
  # @return [Boolean] true if should be ignored
125
150
  def self.ignored?(pattern)
126
- ignore_list = E11y.config.rails_instrumentation&.ignore_events || []
151
+ ignore_list = E11y.config.rails_instrumentation_ignore_events || []
127
152
  ignore_list.include?(pattern)
128
153
  end
129
154
 
@@ -23,62 +23,153 @@ module E11y
23
23
  #
24
24
  # @see ADR-008 §9 (Sidekiq Integration)
25
25
  module Sidekiq
26
+ # Shared helper: detect raw Sidekiq jobs (not ActiveJob-wrapped)
27
+ module RawSidekiqJob
28
+ def raw_sidekiq_job?(job)
29
+ job_class = job["class"].to_s
30
+ return false if job_class.include?("ActiveJob::QueueAdapters::SidekiqAdapter")
31
+ return false if job["wrapped"].present?
32
+
33
+ true
34
+ end
35
+ end
36
+
37
+ # Emits job lifecycle events (Started, Completed, Failed) for ServerMiddleware
38
+ module JobEventEmitter
39
+ def emit_job_started(job, queue)
40
+ Events::Rails::Job::Started.track(
41
+ event_name: "sidekiq.perform_start",
42
+ duration: 0,
43
+ job_class: job["class"],
44
+ job_id: job["jid"],
45
+ queue: queue
46
+ )
47
+ rescue StandardError => e
48
+ warn "[E11y] Failed to emit job Started: #{e.message}"
49
+ end
50
+
51
+ def emit_job_completed(job, queue, start_time)
52
+ duration_ms = ((Time.now - start_time) * 1000).round(2)
53
+ Events::Rails::Job::Completed.track(
54
+ event_name: "sidekiq.perform",
55
+ duration: duration_ms,
56
+ job_class: job["class"],
57
+ job_id: job["jid"],
58
+ queue: queue
59
+ )
60
+ rescue StandardError => e
61
+ warn "[E11y] Failed to emit job Completed: #{e.message}"
62
+ end
63
+
64
+ def emit_job_failed(job, queue, start_time, error)
65
+ duration_ms = ((Time.now - start_time) * 1000).round(2)
66
+ Events::Rails::Job::Failed.track(
67
+ event_name: "sidekiq.perform",
68
+ duration: duration_ms,
69
+ job_class: job["class"],
70
+ job_id: job["jid"],
71
+ queue: queue,
72
+ error_class: error.class.name,
73
+ error_message: error.message
74
+ )
75
+ rescue StandardError => e
76
+ warn "[E11y] Failed to emit job Failed: #{e.message}"
77
+ end
78
+ end
79
+
26
80
  # Client-side middleware: Inject trace context when enqueueing job
27
81
  #
28
82
  # **C17 Hybrid Tracing**: Propagates parent_trace_id to job metadata.
29
83
  # Job will create NEW trace_id but keep link to parent.
84
+ #
85
+ # **Job lifecycle events**: Emits Events::Rails::Job::Enqueued for raw Sidekiq jobs only.
86
+ # ActiveJob jobs are handled by RailsInstrumentation (ASN).
30
87
  class ClientMiddleware
31
- def call(_worker_class, job, _queue, _redis_pool)
88
+ include RawSidekiqJob
89
+
90
+ def call(worker_class, job, queue, _redis_pool)
32
91
  # Inject current trace context into job metadata as parent trace
33
92
  # Job will generate NEW trace_id but keep parent link (C17)
34
93
  job["e11y_parent_trace_id"] = E11y::Current.trace_id if E11y::Current.trace_id
35
94
  job["e11y_parent_span_id"] = E11y::Current.span_id if E11y::Current.span_id
95
+ job["e11y_sampled"] = E11y::Current.sampled if E11y::Current.respond_to?(:sampled) && !E11y::Current.sampled.nil?
96
+ if E11y::Current.respond_to?(:baggage) && E11y::Current.baggage&.any?
97
+ filtered = E11y::Tracing::Propagator.filter_baggage_for_propagation(E11y::Current.baggage)
98
+ job["e11y_baggage"] = filtered if filtered.any?
99
+ end
100
+
101
+ # Emit Enqueued for raw Sidekiq jobs only (ActiveJob emits via ASN)
102
+ emit_job_enqueued(worker_class, job, queue) if raw_sidekiq_job?(job)
36
103
 
37
104
  yield
38
105
  end
106
+
107
+ private
108
+
109
+ def emit_job_enqueued(worker_class, job, queue)
110
+ Events::Rails::Job::Enqueued.track(
111
+ event_name: "sidekiq.enqueue",
112
+ duration: 0,
113
+ job_class: worker_class.to_s,
114
+ job_id: job["jid"],
115
+ queue: queue
116
+ )
117
+ rescue StandardError => e
118
+ warn "[E11y] Failed to emit job Enqueued: #{e.message}"
119
+ end
39
120
  end
40
121
 
41
122
  # Server-side middleware: Set up job-scoped context when executing job
42
123
  #
43
124
  # **C17 Hybrid Tracing**: Creates NEW trace_id for job, but preserves parent link.
44
125
  # **C18 Non-Failing**: E11y errors don't fail jobs (observability is secondary to business logic).
126
+ #
127
+ # **Job lifecycle events**: Emits Events::Rails::Job::Started/Completed/Failed for raw Sidekiq jobs only.
128
+ # ActiveJob jobs (when Sidekiq is the queue adapter) are handled by RailsInstrumentation (ASN).
45
129
  class ServerMiddleware
46
- # rubocop:disable Metrics/AbcSize
47
- def call(_worker, job, queue)
48
- # C18: Disable fail_on_error for jobs (observability should not block business logic)
49
- original_fail_on_error = E11y.config.error_handling.fail_on_error
50
- E11y.config.error_handling.fail_on_error = false
51
-
52
- setup_job_context(job)
53
- setup_job_buffer
130
+ include RawSidekiqJob
131
+ include JobEventEmitter
54
132
 
55
- # Track job start time for SLO
133
+ def call(_worker, job, queue)
134
+ original_fail_on_error = disable_fail_on_error
56
135
  start_time = Time.now
57
136
  job_status = :success
58
137
 
59
- # Execute job (business logic)
138
+ setup_job_context(job, queue)
139
+ setup_job_buffer
140
+
141
+ emit_job_started(job, queue) if raw_sidekiq_job?(job)
60
142
  yield
61
143
  rescue StandardError => e
62
144
  job_status = :failed
63
- # Check if this is E11y error (circuit breaker, retry exhausted, etc.)
64
- handle_job_error(e)
65
-
66
- raise # Always re-raise original exception
145
+ on_job_exception(job, queue, start_time, e)
146
+ raise
67
147
  ensure
68
- # Track SLO metrics
69
- track_job_slo(job, queue, job_status, start_time)
148
+ finalize_job(job, queue, start_time, job_status, original_fail_on_error)
149
+ end
70
150
 
71
- cleanup_job_context
151
+ private
72
152
 
73
- # Restore original setting
74
- E11y.config.error_handling.fail_on_error = original_fail_on_error
153
+ def disable_fail_on_error
154
+ original = E11y.config.error_handling_fail_on_error
155
+ E11y.config.error_handling_fail_on_error = false
156
+ original
75
157
  end
76
- # rubocop:enable Metrics/AbcSize
77
158
 
78
- private
159
+ def on_job_exception(job, queue, start_time, error)
160
+ emit_job_failed(job, queue, start_time, error) if raw_sidekiq_job?(job)
161
+ handle_job_error(error)
162
+ end
163
+
164
+ def finalize_job(job, queue, start_time, job_status, original_fail_on_error)
165
+ emit_job_completed(job, queue, start_time) if raw_sidekiq_job?(job) && job_status == :success
166
+ track_job_slo(job, queue, job_status, start_time)
167
+ cleanup_job_context
168
+ E11y.config.error_handling_fail_on_error = original_fail_on_error
169
+ end
79
170
 
80
171
  # Setup job-scoped context (C17 Hybrid Tracing)
81
- def setup_job_context(job)
172
+ def setup_job_context(job, queue = nil)
82
173
  # Extract parent trace context from job metadata
83
174
  parent_trace_id = job["e11y_parent_trace_id"]
84
175
 
@@ -91,13 +182,28 @@ module E11y
91
182
  E11y::Current.span_id = span_id
92
183
  E11y::Current.parent_trace_id = parent_trace_id
93
184
  E11y::Current.request_id = job["jid"]
185
+ E11y::Current.baggage = job["e11y_baggage"] if job.key?("e11y_baggage") && job["e11y_baggage"].is_a?(Hash)
186
+
187
+ # Restore or compute sampling decision (ADR-005 §7)
188
+ if job.key?("e11y_sampled")
189
+ E11y::Current.sampled = job["e11y_sampled"]
190
+ else
191
+ require "e11y/trace_context/sampler"
192
+ ctx = E11y::Current.to_context.merge(
193
+ job_class: job["class"],
194
+ queue: queue
195
+ ).compact
196
+ E11y::Current.sampled = E11y::TraceContext::Sampler.should_sample?(ctx)
197
+ end
94
198
  end
95
199
 
96
- # Setup job-scoped buffer
200
+ # Setup request-scoped buffer (same as HTTP; optional job_buffer_limit)
97
201
  def setup_job_buffer
98
- return unless E11y.config.request_buffer&.enabled
202
+ return unless E11y.config.ephemeral_buffer_enabled
99
203
 
100
- E11y::Buffers::RequestScopedBuffer.initialize!
204
+ limit = E11y.config.ephemeral_buffer_job_buffer_limit ||
205
+ E11y::Buffers::EphemeralBuffer::DEFAULT_BUFFER_LIMIT
206
+ E11y::Buffers::EphemeralBuffer.initialize!(buffer_limit: limit)
101
207
  rescue StandardError => e
102
208
  # C18: Don't fail job if buffer setup fails
103
209
  warn "[E11y] Failed to start job buffer: #{e.message}"
@@ -106,9 +212,9 @@ module E11y
106
212
  # Handle job error (C18: Non-Failing Event Tracking)
107
213
  def handle_job_error(_error)
108
214
  # Flush buffer on error (includes debug events)
109
- return unless E11y.config.request_buffer&.enabled
215
+ return unless E11y.config.ephemeral_buffer_enabled
110
216
 
111
- E11y::Buffers::RequestScopedBuffer.flush_on_error
217
+ E11y::Buffers::EphemeralBuffer.flush_on_error
112
218
  rescue StandardError => e
113
219
  # C18: Don't fail job if buffer flush fails
114
220
  warn "[E11y] Failed to flush job buffer on error: #{e.message}"
@@ -117,9 +223,9 @@ module E11y
117
223
  # Cleanup job-scoped context
118
224
  def cleanup_job_context
119
225
  # Discard buffer on success (not on error, already flushed in rescue)
120
- if !$ERROR_INFO && E11y.config.request_buffer&.enabled
226
+ if !$ERROR_INFO && E11y.config.ephemeral_buffer_enabled
121
227
  begin
122
- E11y::Buffers::RequestScopedBuffer.discard
228
+ E11y::Buffers::EphemeralBuffer.discard
123
229
  rescue StandardError => e
124
230
  # C18: Don't fail job if buffer flush fails
125
231
  warn "[E11y] Failed to flush job buffer: #{e.message}"
@@ -154,7 +260,7 @@ module E11y
154
260
  # @return [void]
155
261
  # @api private
156
262
  def track_job_slo(job, queue, status, start_time)
157
- return unless E11y.config.slo_tracking&.enabled
263
+ return unless E11y.config.respond_to?(:slo_tracking_enabled) && E11y.config.slo_tracking_enabled
158
264
 
159
265
  duration_ms = ((Time.now - start_time) * 1000).round(2)
160
266
 
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module E11y
4
+ module Linters
5
+ # Namespace for linter base infrastructure (satisfies Zeitwerk for base.rb).
6
+ module Base
7
+ end
8
+
9
+ class LinterError < StandardError; end
10
+ end
11
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "e11y/linters/base"
4
+ require "e11y/registry"
5
+
6
+ module E11y
7
+ module Linters
8
+ module PII
9
+ # Linter for explicit PII declaration on Event classes.
10
+ #
11
+ # When an event declares `contains_pii true`, every schema field must have
12
+ # an explicit PII strategy in the pii_filtering block.
13
+ #
14
+ # @see ADR-006 §3.0.5 PII Declaration Linter
15
+ # @see UC-007 PII Filtering
16
+ class PiiDeclarationLinter
17
+ VALID_STRATEGIES = %i[allow skip mask hash redact partial truncate encrypt].freeze
18
+
19
+ class << self
20
+ # Validate all registered event classes.
21
+ #
22
+ # @raise [E11y::Linters::PiiDeclarationError] when any event with contains_pii true has missing/invalid declarations
23
+ def validate_all!
24
+ errors = []
25
+
26
+ E11y::Registry.event_classes.each do |event_class|
27
+ validate!(event_class)
28
+ rescue PiiDeclarationError => e
29
+ errors << e.message
30
+ end
31
+
32
+ raise PiiDeclarationError, errors.join("\n\n") if errors.any?
33
+ end
34
+
35
+ # Validate a single event class.
36
+ #
37
+ # @param event_class [Class] Event class to validate
38
+ # @raise [E11y::Linters::PiiDeclarationError] when validation fails
39
+ def validate!(event_class)
40
+ return unless event_class.contains_pii == true
41
+
42
+ schema_fields = extract_schema_keys(event_class)
43
+ return if schema_fields.nil? || schema_fields.empty?
44
+
45
+ pii_config = event_class.pii_filtering_config
46
+ declared_fields = pii_config&.dig(:fields)&.keys&.map(&:to_s) || []
47
+
48
+ missing = schema_fields.map(&:to_s) - declared_fields
49
+ raise PiiDeclarationError, build_missing_message(event_class, missing) if missing.any?
50
+
51
+ # Validate each declared field has valid strategy
52
+ pii_config[:fields].each do |field, config|
53
+ validate_field_config!(event_class, field, config)
54
+ end
55
+ end
56
+
57
+ private
58
+
59
+ def extract_schema_keys(klass)
60
+ return nil unless klass.respond_to?(:compiled_schema)
61
+
62
+ schema = klass.compiled_schema
63
+ return nil unless schema.respond_to?(:key_map)
64
+
65
+ schema.key_map.keys.map(&:name)
66
+ rescue StandardError
67
+ nil
68
+ end
69
+
70
+ def build_missing_message(event_class, missing_fields)
71
+ fields_snippet = missing_fields.map do |f|
72
+ " field :#{f} do\n strategy :mask # or :hash, :allow, :redact\n end"
73
+ end.join("\n ")
74
+
75
+ <<~ERROR
76
+ PII Declaration Error: #{event_class.name}
77
+
78
+ Event declared `contains_pii true` but missing field declarations:
79
+
80
+ Missing fields: #{missing_fields.map { |x| ":#{x}" }.join(', ')}
81
+
82
+ Fix: Add explicit PII strategy for each field in pii_filtering block:
83
+
84
+ class #{event_class.name} < E11y::Event::Base
85
+ contains_pii true
86
+
87
+ pii_filtering do
88
+ #{fields_snippet}
89
+ end
90
+ end
91
+
92
+ Available strategies: #{VALID_STRATEGIES.map { |s| ":#{s}" }.join(', ')}
93
+ ERROR
94
+ end
95
+
96
+ def validate_field_config!(event_class, field, config)
97
+ strategy = config[:strategy]
98
+ unless VALID_STRATEGIES.include?(strategy)
99
+ raise PiiDeclarationError, <<~ERROR
100
+ Invalid PII strategy for #{event_class.name}##{field}
101
+
102
+ Strategy: #{strategy.inspect}
103
+ Valid strategies: #{VALID_STRATEGIES.map { |s| ":#{s}" }.join(', ')}
104
+ ERROR
105
+ end
106
+
107
+ return unless config.key?(:exclude_adapters)
108
+
109
+ return if config[:exclude_adapters].is_a?(Array)
110
+
111
+ raise PiiDeclarationError, "exclude_adapters must be an Array for #{event_class.name}##{field}"
112
+ end
113
+ end
114
+ end
115
+
116
+ # Raised when PII declaration validation fails.
117
+ class PiiDeclarationError < LinterError; end
118
+ end
119
+ end
120
+ end