e11y 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +130 -10
  3. data/CHANGELOG.md +56 -1
  4. data/CLAUDE.md +168 -0
  5. data/CONTRIBUTING.md +640 -0
  6. data/README.md +134 -702
  7. data/RELEASE.md +18 -3
  8. data/Rakefile +108 -29
  9. data/config/README.md +1 -1
  10. data/config/loki-local-config.yaml +12 -0
  11. data/config/otel-collector-config.yaml +44 -0
  12. data/cucumber.yml +1 -0
  13. data/docker-compose.yml +18 -2
  14. data/docs/ADAPTERS.md +76 -0
  15. data/docs/ADAPTIVE_SAMPLING.md +59 -0
  16. data/docs/COMPARISON.md +104 -0
  17. data/docs/CONFIGURATION.md +52 -0
  18. data/docs/DISTRIBUTED_TRACING.md +44 -0
  19. data/docs/LIMITATIONS.md +13 -0
  20. data/docs/METRICS_DSL.md +84 -0
  21. data/docs/PERFORMANCE.md +60 -0
  22. data/docs/PII_FILTERING.md +40 -0
  23. data/docs/PRESETS.md +65 -0
  24. data/docs/QUICK-START.md +546 -587
  25. data/docs/RAILS_INTEGRATION.md +29 -0
  26. data/docs/SCHEMA_VALIDATION.md +63 -0
  27. data/docs/SLO-PROMQL-ALERTS.md +161 -0
  28. data/docs/TESTING.md +69 -0
  29. data/docs/{ADR-001-architecture.md → architecture/ADR-001-architecture.md} +35 -64
  30. data/docs/{ADR-002-metrics-yabeda.md → architecture/ADR-002-metrics-yabeda.md} +62 -236
  31. data/docs/{ADR-003-slo-observability.md → architecture/ADR-003-slo-observability.md} +27 -466
  32. data/docs/{ADR-004-adapter-architecture.md → architecture/ADR-004-adapter-architecture.md} +163 -146
  33. data/docs/{ADR-005-tracing-context.md → architecture/ADR-005-tracing-context.md} +10 -9
  34. data/docs/{ADR-006-security-compliance.md → architecture/ADR-006-security-compliance.md} +184 -191
  35. data/docs/{ADR-007-opentelemetry-integration.md → architecture/ADR-007-opentelemetry-integration.md} +3 -21
  36. data/docs/{ADR-008-rails-integration.md → architecture/ADR-008-rails-integration.md} +209 -339
  37. data/docs/{ADR-009-cost-optimization.md → architecture/ADR-009-cost-optimization.md} +45 -54
  38. data/docs/architecture/ADR-010-developer-experience.md +522 -0
  39. data/docs/{ADR-011-testing-strategy.md → architecture/ADR-011-testing-strategy.md} +41 -83
  40. data/docs/{ADR-013-reliability-error-handling.md → architecture/ADR-013-reliability-error-handling.md} +37 -12
  41. data/docs/{ADR-014-event-driven-slo.md → architecture/ADR-014-event-driven-slo.md} +12 -24
  42. data/docs/{ADR-015-middleware-order.md → architecture/ADR-015-middleware-order.md} +23 -41
  43. data/docs/{ADR-016-self-monitoring-slo.md → architecture/ADR-016-self-monitoring-slo.md} +52 -349
  44. data/docs/{ADR-017-multi-rails-compatibility.md → architecture/ADR-017-multi-rails-compatibility.md} +4 -11
  45. data/docs/architecture/ADR-018-memory-optimization.md +366 -0
  46. data/docs/{ADR-INDEX.md → architecture/ADR-INDEX.md} +11 -6
  47. data/docs/{00-ICP-AND-TIMELINE.md → prd/00-ICP-AND-TIMELINE.md} +6 -6
  48. data/docs/{01-SCALE-REQUIREMENTS.md → prd/01-SCALE-REQUIREMENTS.md} +6 -6
  49. data/docs/prd/01-overview-vision.md +19 -14
  50. data/docs/use_cases/README.md +22 -23
  51. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +50 -44
  52. data/docs/use_cases/UC-002-business-event-tracking.md +26 -95
  53. data/docs/use_cases/UC-003-event-metrics.md +66 -0
  54. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +42 -101
  55. data/docs/use_cases/UC-005-sentry-integration.md +13 -15
  56. data/docs/use_cases/UC-006-trace-context-management.md +30 -28
  57. data/docs/use_cases/UC-007-pii-filtering.md +35 -87
  58. data/docs/use_cases/UC-008-opentelemetry-integration.md +51 -89
  59. data/docs/use_cases/UC-009-multi-service-tracing.md +4 -4
  60. data/docs/use_cases/UC-010-background-job-tracking.md +5 -5
  61. data/docs/use_cases/UC-011-rate-limiting.md +95 -168
  62. data/docs/use_cases/UC-012-audit-trail.md +21 -46
  63. data/docs/use_cases/UC-013-high-cardinality-protection.md +29 -167
  64. data/docs/use_cases/UC-014-adaptive-sampling.md +2 -2
  65. data/docs/use_cases/UC-015-cost-optimization.md +46 -99
  66. data/docs/use_cases/UC-016-rails-logger-migration.md +39 -213
  67. data/docs/use_cases/UC-017-local-development.md +203 -777
  68. data/docs/use_cases/UC-018-testing-events.md +3 -3
  69. data/docs/use_cases/UC-019-retention-based-routing.md +53 -106
  70. data/docs/use_cases/UC-020-event-versioning.md +8 -9
  71. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +18 -22
  72. data/docs/use_cases/UC-022-event-registry.md +15 -21
  73. data/docs/use_cases/backlog.md +119 -87
  74. data/e11y.gemspec +2 -2
  75. data/gems/e11y-devtools/README.md +136 -0
  76. data/gems/e11y-devtools/config/routes.rb +8 -0
  77. data/gems/e11y-devtools/e11y-devtools.gemspec +25 -0
  78. data/gems/e11y-devtools/exe/e11y +34 -0
  79. data/gems/e11y-devtools/lib/e11y/devtools/mcp/server.rb +96 -0
  80. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tool_base.rb +25 -0
  81. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/clear.rb +31 -0
  82. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/errors.rb +35 -0
  83. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/event_detail.rb +33 -0
  84. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/events_by_trace.rb +33 -0
  85. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/interactions.rb +40 -0
  86. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/recent_events.rb +34 -0
  87. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/search.rb +34 -0
  88. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/stats.rb +30 -0
  89. data/gems/e11y-devtools/lib/e11y/devtools/overlay/assets/overlay.js +115 -0
  90. data/gems/e11y-devtools/lib/e11y/devtools/overlay/controller.rb +54 -0
  91. data/gems/e11y-devtools/lib/e11y/devtools/overlay/engine.rb +26 -0
  92. data/gems/e11y-devtools/lib/e11y/devtools/overlay/middleware.rb +80 -0
  93. data/gems/e11y-devtools/lib/e11y/devtools/overlay/rails_controller.rb +42 -0
  94. data/gems/e11y-devtools/lib/e11y/devtools/tui/app.rb +262 -0
  95. data/gems/e11y-devtools/lib/e11y/devtools/tui/grouping.rb +66 -0
  96. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_detail.rb +62 -0
  97. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_list.rb +70 -0
  98. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/interaction_list.rb +47 -0
  99. data/gems/e11y-devtools/lib/e11y/devtools/version.rb +8 -0
  100. data/gems/e11y-devtools/lib/e11y/devtools.rb +13 -0
  101. data/gems/e11y-devtools/spec/e11y/devtools/mcp/tools_spec.rb +107 -0
  102. data/gems/e11y-devtools/spec/e11y/devtools/overlay/controller_spec.rb +58 -0
  103. data/gems/e11y-devtools/spec/e11y/devtools/overlay/middleware_spec.rb +46 -0
  104. data/gems/e11y-devtools/spec/e11y/devtools/tui/app_spec.rb +85 -0
  105. data/gems/e11y-devtools/spec/e11y/devtools/tui/grouping_spec.rb +64 -0
  106. data/gems/e11y-devtools/spec/spec_helper.rb +5 -0
  107. data/gems/e11y-devtools/spec/tui/widgets/event_list_spec.rb +44 -0
  108. data/gems/e11y-devtools/spec/tui/widgets/interaction_list_spec.rb +62 -0
  109. data/lib/e11y/adapters/audit_encrypted.rb +53 -11
  110. data/lib/e11y/adapters/base.rb +33 -34
  111. data/lib/e11y/adapters/dev_log/file_store.rb +143 -0
  112. data/lib/e11y/adapters/dev_log/query.rb +219 -0
  113. data/lib/e11y/adapters/dev_log.rb +118 -0
  114. data/lib/e11y/adapters/file.rb +3 -6
  115. data/lib/e11y/adapters/in_memory.rb +52 -5
  116. data/lib/e11y/adapters/in_memory_test.rb +29 -0
  117. data/lib/e11y/adapters/loki.rb +58 -23
  118. data/lib/e11y/adapters/null.rb +82 -0
  119. data/lib/e11y/adapters/opentelemetry_collector.rb +183 -0
  120. data/lib/e11y/adapters/otel_logs.rb +136 -23
  121. data/lib/e11y/adapters/sentry.rb +4 -7
  122. data/lib/e11y/adapters/stdout.rb +73 -7
  123. data/lib/e11y/adapters/yabeda.rb +153 -29
  124. data/lib/e11y/buffers/adaptive_buffer.rb +3 -17
  125. data/lib/e11y/buffers/{request_scoped_buffer.rb → ephemeral_buffer.rb} +72 -58
  126. data/lib/e11y/buffers/ring_buffer.rb +3 -16
  127. data/lib/e11y/configuration.rb +272 -0
  128. data/lib/e11y/console.rb +10 -17
  129. data/lib/e11y/current.rb +53 -1
  130. data/lib/e11y/debug/pipeline_inspector.rb +96 -0
  131. data/lib/e11y/documentation/generator.rb +48 -0
  132. data/lib/e11y/event/base.rb +176 -82
  133. data/lib/e11y/event/value_sampling_config.rb +1 -5
  134. data/lib/e11y/events/rails/database/query.rb +1 -4
  135. data/lib/e11y/events/rails/job/failed.rb +2 -0
  136. data/lib/e11y/instruments/active_job.rb +46 -12
  137. data/lib/e11y/instruments/rails_instrumentation.rb +49 -24
  138. data/lib/e11y/instruments/sidekiq.rb +137 -31
  139. data/lib/e11y/linters/base.rb +11 -0
  140. data/lib/e11y/linters/pii/pii_declaration_linter.rb +120 -0
  141. data/lib/e11y/linters/slo/config_consistency_linter.rb +76 -0
  142. data/lib/e11y/linters/slo/explicit_declaration_linter.rb +36 -0
  143. data/lib/e11y/linters/slo/slo_status_from_linter.rb +41 -0
  144. data/lib/e11y/logger/bridge.rb +26 -7
  145. data/lib/e11y/metrics/cardinality_protection.rb +10 -15
  146. data/lib/e11y/metrics/cardinality_tracker.rb +16 -6
  147. data/lib/e11y/metrics/registry.rb +3 -5
  148. data/lib/e11y/metrics/test_backend.rb +62 -0
  149. data/lib/e11y/metrics.rb +56 -10
  150. data/lib/e11y/middleware/adapter_resolver.rb +40 -0
  151. data/lib/e11y/middleware/audit_signing.rb +43 -6
  152. data/lib/e11y/middleware/baggage_protection.rb +75 -0
  153. data/lib/e11y/middleware/dev_log_source.rb +24 -0
  154. data/lib/e11y/middleware/event_slo.rb +23 -9
  155. data/lib/e11y/middleware/otel_span.rb +23 -0
  156. data/lib/e11y/middleware/pii_filter.rb +104 -75
  157. data/lib/e11y/middleware/rate_limiting.rb +54 -27
  158. data/lib/e11y/middleware/request.rb +70 -23
  159. data/lib/e11y/middleware/routing.rb +78 -21
  160. data/lib/e11y/middleware/sampling.rb +66 -17
  161. data/lib/e11y/middleware/self_monitoring_emit.rb +39 -0
  162. data/lib/e11y/middleware/trace_context.rb +45 -10
  163. data/lib/e11y/middleware/track_latency.rb +34 -0
  164. data/lib/e11y/middleware/validation.rb +7 -16
  165. data/lib/e11y/middleware/versioning.rb +26 -22
  166. data/lib/e11y/opentelemetry/semantic_conventions.rb +109 -0
  167. data/lib/e11y/opentelemetry/span_creator.rb +142 -0
  168. data/lib/e11y/pii/patterns.rb +12 -1
  169. data/lib/e11y/pipeline/builder.rb +1 -1
  170. data/lib/e11y/presets/audit_event.rb +13 -2
  171. data/lib/e11y/railtie.rb +52 -15
  172. data/lib/e11y/registry.rb +306 -0
  173. data/lib/e11y/reliability/circuit_breaker.rb +19 -21
  174. data/lib/e11y/reliability/dlq/base.rb +71 -0
  175. data/lib/e11y/reliability/dlq/file_adapter.rb +301 -0
  176. data/lib/e11y/reliability/dlq/file_storage.rb +63 -34
  177. data/lib/e11y/reliability/dlq/filter.rb +37 -54
  178. data/lib/e11y/reliability/retry_handler.rb +26 -29
  179. data/lib/e11y/reliability/retry_rate_limiter.rb +3 -11
  180. data/lib/e11y/sampling/error_spike_detector.rb +0 -2
  181. data/lib/e11y/sampling/load_monitor.rb +5 -9
  182. data/lib/e11y/sampling/stratified_tracker.rb +18 -0
  183. data/lib/e11y/self_monitoring/buffer_monitor.rb +2 -0
  184. data/lib/e11y/self_monitoring/performance_monitor.rb +19 -61
  185. data/lib/e11y/self_monitoring/reliability_monitor.rb +4 -74
  186. data/lib/e11y/slo/config_loader.rb +40 -0
  187. data/lib/e11y/slo/config_validator.rb +58 -0
  188. data/lib/e11y/slo/dashboard_generator.rb +122 -0
  189. data/lib/e11y/slo/event_driven.rb +8 -0
  190. data/lib/e11y/slo/tracker.rb +31 -4
  191. data/lib/e11y/testing/have_tracked_event_matcher.rb +190 -0
  192. data/lib/e11y/testing/rspec_matchers.rb +21 -0
  193. data/lib/e11y/testing/snapshot_matcher.rb +86 -0
  194. data/lib/e11y/trace_context/sampler.rb +35 -0
  195. data/lib/e11y/tracing/faraday_middleware.rb +31 -0
  196. data/lib/e11y/tracing/net_http_patch.rb +33 -0
  197. data/lib/e11y/tracing/propagator.rb +116 -0
  198. data/lib/e11y/tracing.rb +47 -0
  199. data/lib/e11y/version.rb +1 -1
  200. data/lib/e11y/versioning/version_extractor.rb +32 -0
  201. data/lib/e11y.rb +141 -265
  202. data/lib/generators/e11y/event/event_generator.rb +22 -0
  203. data/lib/generators/e11y/event/templates/event.rb.tt +16 -0
  204. data/lib/generators/e11y/grafana_dashboard/grafana_dashboard_generator.rb +30 -0
  205. data/lib/generators/e11y/grafana_dashboard/templates/e11y_dashboard.json +81 -0
  206. data/lib/generators/e11y/install/install_generator.rb +34 -0
  207. data/lib/generators/e11y/install/templates/e11y.rb +239 -0
  208. data/lib/generators/e11y/prometheus_alerts/prometheus_alerts_generator.rb +29 -0
  209. data/lib/generators/e11y/prometheus_alerts/templates/e11y_alerts.yml +28 -0
  210. data/lib/tasks/e11y_docs.rake +30 -0
  211. data/lib/tasks/e11y_events.rake +71 -0
  212. data/lib/tasks/e11y_lint.rake +91 -0
  213. data/lib/tasks/e11y_slo.rake +29 -0
  214. metadata +129 -39
  215. data/docs/ADR-010-developer-experience.md +0 -2166
  216. data/docs/API-REFERENCE-L28.md +0 -914
  217. data/docs/COMPREHENSIVE-CONFIGURATION.md +0 -2366
  218. data/docs/CONTRIBUTING.md +0 -312
  219. data/docs/IMPLEMENTATION_NOTES.md +0 -2804
  220. data/docs/IMPLEMENTATION_PLAN.md +0 -1971
  221. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +0 -586
  222. data/docs/PLAN.md +0 -148
  223. data/docs/README.md +0 -296
  224. data/docs/design/00-memory-optimization.md +0 -593
  225. data/docs/guides/MIGRATION-L27-L28.md +0 -692
  226. data/docs/guides/PERFORMANCE-BENCHMARKS.md +0 -434
  227. data/docs/guides/README.md +0 -44
  228. data/docs/use_cases/UC-003-pattern-based-metrics.md +0 -1627
  229. data/lib/e11y/adapters/registry.rb +0 -141
  230. /data/docs/{ADR-012-event-evolution.md → architecture/ADR-012-event-evolution.md} +0 -0
@@ -33,11 +33,8 @@ module E11y
33
33
  # severity_threshold: :warn
34
34
  # )
35
35
  #
36
- # @example With Registry
37
- # E11y::Adapters::Registry.register(
38
- # :error_tracker,
39
- # E11y::Adapters::Sentry.new(dsn: ENV["SENTRY_DSN"])
40
- # )
36
+ # @example Configuration
37
+ # config.adapters[:sentry] = E11y::Adapters::Sentry.new(dsn: ENV["SENTRY_DSN"])
41
38
  #
42
39
  # @see https://docs.sentry.io/platforms/ruby/
43
40
  # rubocop:disable Metrics/ClassLength
@@ -155,7 +152,7 @@ module E11y
155
152
  # Send error to Sentry
156
153
  #
157
154
  # @param event_data [Hash] Event data
158
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
155
+ # rubocop:disable Metrics/AbcSize
159
156
  # Sentry scope configuration requires multiple context enrichment steps
160
157
  def send_error_to_sentry(event_data)
161
158
  ::Sentry.with_scope do |scope|
@@ -187,7 +184,7 @@ module E11y
187
184
  end
188
185
  end
189
186
  end
190
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
187
+ # rubocop:enable Metrics/AbcSize
191
188
 
192
189
  # Send breadcrumb to Sentry
193
190
  #
@@ -41,11 +41,12 @@ module E11y
41
41
  #
42
42
  # @param config [Hash] Configuration options
43
43
  # @option config [Boolean] :colorize (true) Enable colored output
44
- # @option config [Boolean] :pretty_print (true) Enable pretty-printed JSON
44
+ # @option config [Boolean] :pretty_print (true) Enable pretty-printed JSON (when format: :json)
45
+ # @option config [Symbol] :format (:json) Output format: :json (JSON), :compact (single-line JSON), :rich (ADR-010 §3 structured)
45
46
  def initialize(config = {})
46
47
  @colorize = config.fetch(:colorize, true)
47
- @pretty_print = config.fetch(:pretty_print, true)
48
-
48
+ @format = config.fetch(:format, :json)
49
+ @pretty_print = resolve_pretty_print(config)
49
50
  super
50
51
  end
51
52
 
@@ -82,15 +83,29 @@ module E11y
82
83
 
83
84
  private
84
85
 
86
+ # Resolve pretty_print from format or pretty_print keys
87
+ #
88
+ # @param config [Hash] Adapter config
89
+ # @return [Boolean]
90
+ def resolve_pretty_print(config)
91
+ return config[:pretty_print] if config.key?(:pretty_print)
92
+
93
+ case config[:format]
94
+ when :compact then false
95
+ when :pretty then true
96
+ else config.fetch(:pretty_print, true)
97
+ end
98
+ end
99
+
85
100
  # Format event for console output
86
101
  #
87
102
  # @param event_data [Hash] Event data
88
103
  # @return [String] Formatted output
89
104
  def format_event(event_data)
90
- if @pretty_print
91
- JSON.pretty_generate(event_data)
92
- else
93
- event_data.to_json
105
+ case @format
106
+ when :rich then format_event_rich(event_data)
107
+ when :compact then event_data.to_json
108
+ else @pretty_print ? JSON.pretty_generate(event_data) : event_data.to_json
94
109
  end
95
110
  end
96
111
 
@@ -103,6 +118,57 @@ module E11y
103
118
  color_code = SEVERITY_COLORS[severity] || ""
104
119
  "#{color_code}#{output}#{COLOR_RESET}"
105
120
  end
121
+
122
+ # Rich format: ADR-010 §3 — structured output with header, event name, payload, metadata
123
+ def format_event_rich(event_data)
124
+ lines = []
125
+ lines << format_header(event_data)
126
+ lines << format_event_name_line(event_data)
127
+ lines << format_payload_section(event_data[:payload]) if event_data[:payload]&.any?
128
+ lines << format_metadata_section(event_data) if event_data[:trace_id] || event_data[:span_id]
129
+ lines << ("─" * 80)
130
+ lines.join("\n")
131
+ end
132
+
133
+ def format_header(event_data)
134
+ ts = event_data[:timestamp]
135
+ ts = Time.parse(ts) if ts.is_a?(String)
136
+ time_str = ts&.strftime("%H:%M:%S.%L") || "??:??:??.???"
137
+ sev = event_data[:severity].to_s.upcase.ljust(8)
138
+ "#{time_str} #{sev}"
139
+ end
140
+
141
+ def format_event_name_line(event_data)
142
+ name = event_data[:event_name].to_s
143
+ " → #{name}"
144
+ end
145
+
146
+ def format_payload_section(payload)
147
+ lines = [" Payload:"]
148
+ payload.each do |k, v|
149
+ lines << " #{k}: #{format_value_rich(v)}"
150
+ end
151
+ lines.join("\n")
152
+ end
153
+
154
+ def format_metadata_section(event_data)
155
+ meta = { trace_id: event_data[:trace_id], span_id: event_data[:span_id] }.compact
156
+ return "" if meta.empty?
157
+
158
+ meta.map { |k, v| " #{k}: #{v}" }.unshift(" Metadata:").join("\n")
159
+ end
160
+
161
+ def format_value_rich(value)
162
+ case value
163
+ when String then "\"#{value.length > 50 ? "#{value[0...50]}..." : value}\""
164
+ when Array then "[#{value.size} items]"
165
+ when Hash then "{#{value.size} keys}"
166
+ else value.inspect
167
+ end
168
+ end
106
169
  end
170
+
171
+ # Alias for ADR-010 §3 (Console Output) — Console and Stdout are the same adapter
172
+ Console = Stdout
107
173
  end
108
174
  end
@@ -43,7 +43,7 @@ module E11y
43
43
  # # Events automatically update metrics via middleware
44
44
  #
45
45
  # @see ADR-002 Metrics & Yabeda Integration
46
- # @see UC-003 Pattern-Based Metrics
46
+ # @see UC-003 Event Metrics
47
47
  # rubocop:disable Metrics/ClassLength
48
48
  # Yabeda adapter contains metrics registration and update logic as cohesive unit
49
49
  class Yabeda < Base
@@ -67,6 +67,8 @@ module E11y
67
67
  return unless config.fetch(:auto_register, true)
68
68
 
69
69
  register_metrics_from_registry!
70
+ register_middleware_metrics!
71
+ register_self_monitoring_metrics!
70
72
 
71
73
  # Apply configuration in non-Rails environments (Rails does this automatically)
72
74
  # In tests, Yabeda.configure! should be called explicitly in before blocks
@@ -108,9 +110,10 @@ module E11y
108
110
 
109
111
  # Check if adapter is healthy
110
112
  #
111
- # @return [Boolean] true if Yabeda is available and configured
113
+ # @return [Boolean] true if Yabeda is available, configured, and e11y group exists
112
114
  def healthy?
113
115
  return false unless defined?(::Yabeda)
116
+ return false unless ::Yabeda.respond_to?(:e11y)
114
117
 
115
118
  ::Yabeda.configured?
116
119
  rescue StandardError
@@ -151,8 +154,11 @@ module E11y
151
154
  # Register metric if not exists
152
155
  register_metric_if_needed(name, :counter, safe_labels.keys)
153
156
 
154
- # Update Yabeda metric
155
- ::Yabeda.e11y.send(name).increment(safe_labels, by: value)
157
+ # Update Yabeda metric (guard against nil when metric wasn't registered, e.g. after configure!)
158
+ metric = ::Yabeda.e11y.send(name)
159
+ return unless metric
160
+
161
+ metric.increment(safe_labels, by: value)
156
162
  rescue StandardError => e
157
163
  E11y.logger.warn("Failed to increment Yabeda metric #{name}: #{e.message}")
158
164
  end
@@ -173,8 +179,11 @@ module E11y
173
179
  # Register metric if not exists
174
180
  register_metric_if_needed(name, :histogram, safe_labels.keys, buckets: buckets)
175
181
 
176
- # Update Yabeda metric
177
- ::Yabeda.e11y.send(name).measure(safe_labels, value)
182
+ # Update Yabeda metric (guard against nil when metric wasn't registered)
183
+ metric = ::Yabeda.e11y.send(name)
184
+ return unless metric
185
+
186
+ metric.measure(safe_labels, value)
178
187
  rescue StandardError => e
179
188
  E11y.logger.warn("Failed to observe Yabeda histogram #{name}: #{e.message}")
180
189
  end
@@ -194,8 +203,11 @@ module E11y
194
203
  # Register metric if not exists
195
204
  register_metric_if_needed(name, :gauge, safe_labels.keys)
196
205
 
197
- # Update Yabeda metric
198
- ::Yabeda.e11y.send(name).set(safe_labels, value)
206
+ # Update Yabeda metric (guard against nil when metric wasn't registered)
207
+ metric = ::Yabeda.e11y.send(name)
208
+ return unless metric
209
+
210
+ metric.set(safe_labels, value)
199
211
  rescue StandardError => e
200
212
  E11y.logger.warn("Failed to set Yabeda gauge #{name}: #{e.message}")
201
213
  end
@@ -208,9 +220,7 @@ module E11y
208
220
  super
209
221
 
210
222
  # Validate cardinality_limit
211
- if @config[:cardinality_limit] && !@config[:cardinality_limit].is_a?(Integer)
212
- raise ArgumentError, "cardinality_limit must be an Integer"
213
- end
223
+ raise ArgumentError, "cardinality_limit must be an Integer" if @config[:cardinality_limit] && !@config[:cardinality_limit].is_a?(Integer)
214
224
 
215
225
  # Validate forbidden_labels
216
226
  return unless @config[:forbidden_labels] && !@config[:forbidden_labels].is_a?(Array)
@@ -278,20 +288,112 @@ module E11y
278
288
  end
279
289
  end
280
290
 
291
+ # Pre-register middleware self-monitoring metrics.
292
+ #
293
+ # These metrics are used by TraceContext, Validation, and Routing middleware.
294
+ # Must be registered before Yabeda.configure! is called (e.g. in app initializers).
295
+ # Called during adapter initialization so they're available when events flow.
296
+ # Names use underscores (Prometheus requires /[a-zA-Z_:][a-zA-Z0-9_:]*/, no dots).
297
+ #
298
+ # @return [void]
299
+ def register_middleware_metrics!
300
+ return unless defined?(::Yabeda)
301
+
302
+ middleware_metrics = [
303
+ { name: :e11y_middleware_trace_context_processed, tags: [] },
304
+ { name: :e11y_middleware_validation_total, tags: [:result] },
305
+ { name: :e11y_middleware_routing_routed, tags: %i[adapters_count routing_type] }
306
+ ]
307
+
308
+ cardinality_metrics = [
309
+ { name: :e11y_cardinality_overflow_total, tags: %i[metric action strategy] },
310
+ { name: :e11y_cardinality_current, type: :gauge, tags: [:metric] }
311
+ ]
312
+
313
+ (middleware_metrics + cardinality_metrics).each do |m|
314
+ type = m[:type] || :counter
315
+ register_metric_if_needed(m[:name], type, m[:tags])
316
+ end
317
+ rescue StandardError => e
318
+ E11y.logger.debug("Could not register middleware metrics: #{e.message}")
319
+ end
320
+
321
+ # Pre-register self-monitoring metrics (request buffer, retry, circuit breaker, DLQ, etc.).
322
+ # Must be registered before Yabeda.configure! so they exist when reliability layer runs.
323
+ #
324
+ # @return [void] # -- metric list is inherently long
325
+ def register_self_monitoring_metrics!
326
+ return unless defined?(::Yabeda)
327
+
328
+ metrics = [
329
+ # Request buffer (consolidated)
330
+ { name: :e11y_ephemeral_buffer_total, tags: [:event] },
331
+ # Retry handler
332
+ { name: :e11y_retry_success, tags: %i[adapter attempts] },
333
+ { name: :e11y_retry_recovered, tags: %i[adapter attempts] },
334
+ { name: :e11y_retry_permanent_failure, tags: %i[adapter error attempt] },
335
+ { name: :e11y_retry_exhausted, tags: %i[adapter error attempts] },
336
+ { name: :e11y_retry_attempt, tags: %i[adapter error attempt] },
337
+ # Circuit breaker (consolidated: transitions counter + state gauge)
338
+ { name: :e11y_circuit_breaker_transitions_total, tags: %i[adapter event] },
339
+ { name: :e11y_circuit_breaker_state, type: :gauge, tags: [:adapter] },
340
+ # Adapter performance & reliability
341
+ { name: :e11y_adapter_send_duration_seconds, type: :histogram, tags: [:adapter], buckets: [0.001, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0] },
342
+ { name: :e11y_adapter_writes_total, tags: %i[adapter status error_class] },
343
+ # DLQ
344
+ { name: :e11y_dlq_size, type: :gauge, tags: [] },
345
+ { name: :e11y_dlq_filter_decisions_total, tags: %i[action reason] },
346
+ { name: :e11y_dlq_saved_total, tags: [:event_name] },
347
+ { name: :e11y_dlq_parse_error_total, tags: [:error] },
348
+ { name: :e11y_dlq_replayed_total, tags: [:event_name] },
349
+ { name: :e11y_dlq_replay_failed_total, tags: [:error] },
350
+ # Retry rate limiter (consolidated)
351
+ { name: :e11y_retry_rate_limiter_total, tags: %i[adapter event delay_sec] },
352
+ # Buffer (ring, adaptive) — consolidated
353
+ { name: :e11y_buffer_overflow_total, tags: [:event] },
354
+ # Rate limiting / sampling
355
+ { name: :e11y_events_dropped_total, tags: %i[reason event_type] },
356
+ # SLO tracking (Request middleware triggers on every HTTP request when enabled)
357
+ { name: :slo_http_requests_total, tags: %i[controller action status] },
358
+ { name: :slo_http_request_duration_seconds, type: :histogram, tags: %i[controller action],
359
+ buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0] },
360
+ { name: :slo_background_jobs_total, tags: %i[job_class status queue] },
361
+ { name: :slo_background_job_duration_seconds, type: :histogram, tags: %i[job_class queue],
362
+ buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0] },
363
+ # E11y self-monitoring (events tracked at pipeline end)
364
+ { name: :e11y_events_tracked_total, tags: %i[result event_name] },
365
+ # Track latency (TrackLatency middleware)
366
+ { name: :e11y_track_duration_seconds, type: :histogram, tags: %i[event_class severity result],
367
+ buckets: [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1] }
368
+ ]
369
+
370
+ metrics.each do |m|
371
+ type = m[:type] || :counter
372
+ buckets = m[:buckets]
373
+ register_metric_if_needed(m[:name], type, m[:tags], buckets: buckets)
374
+ end
375
+ rescue StandardError => e
376
+ E11y.logger.debug("Could not register self-monitoring metrics: #{e.message}")
377
+ end
378
+
281
379
  # Register a single metric in Yabeda
282
380
  #
283
381
  # @param metric_config [Hash] Metric configuration from Registry
284
382
  # @return [void]
285
- # rubocop:disable Metrics/MethodLength
286
383
  # Metric registration requires case/when for different metric types
287
384
  def register_yabeda_metric(metric_config)
288
385
  metric_name = metric_config[:name]
289
386
  metric_type = metric_config[:type]
290
387
  tags = metric_config[:tags] || []
291
388
 
389
+ # Skip if metric already exists (prevents re-registration errors)
390
+ return if ::Yabeda.metrics.key?("e11y_#{metric_name}")
391
+
292
392
  # Define metric in Yabeda group
293
- ::Yabeda.configure do
294
- group :e11y do
393
+ ::Yabeda.configure do |config = nil|
394
+ next unless config.respond_to?(:group)
395
+
396
+ config.group :e11y do
295
397
  case metric_type
296
398
  when :counter
297
399
  counter metric_name, tags: tags, comment: "E11y metric: #{metric_name}"
@@ -309,7 +411,6 @@ module E11y
309
411
  # Metric might already be registered - that's OK
310
412
  warn "E11y Yabeda: Could not register metric #{metric_name}: #{e.message}"
311
413
  end
312
- # rubocop:enable Metrics/MethodLength
313
414
 
314
415
  # Register a metric if it doesn't exist yet (for direct metric calls).
315
416
  #
@@ -319,14 +420,15 @@ module E11y
319
420
  # @param buckets [Array<Numeric>, nil] Optional histogram buckets
320
421
  # @return [void]
321
422
  # @api private
322
- # rubocop:disable Metrics/MethodLength
323
423
  # Metric registration requires case/when for different metric types
324
424
  def register_metric_if_needed(name, type, tags, buckets: nil)
325
- # Check if metric already exists
326
- return if ::Yabeda.metrics.key?(:"e11y_#{name}")
425
+ # Check if metric already exists (Yabeda stores metric keys as strings)
426
+ return if ::Yabeda.metrics.key?("e11y_#{name}")
327
427
 
328
- ::Yabeda.configure do
329
- group :e11y do
428
+ ::Yabeda.configure do |config = nil|
429
+ next unless config.respond_to?(:group)
430
+
431
+ config.group :e11y do
330
432
  case type
331
433
  when :counter
332
434
  counter name, tags: tags, comment: "E11y self-monitoring: #{name}"
@@ -347,38 +449,60 @@ module E11y
347
449
  # Metric might already be registered - that's OK
348
450
  E11y.logger.warn("Could not register Yabeda metric #{name}: #{e.message}")
349
451
  end
350
- # rubocop:enable Metrics/MethodLength
351
452
 
352
453
  # Update a single metric based on event data
353
454
  #
354
455
  # @param metric_config [Hash] Metric configuration
355
456
  # @param event_data [Hash] Event data
356
457
  # @return [void]
357
- # rubocop:disable Metrics/AbcSize
358
458
  # Metric update requires multiple steps for label extraction and value handling
359
459
  def update_metric(metric_config, event_data)
360
460
  metric_name = metric_config[:name]
361
461
  labels = extract_labels(metric_config, event_data)
362
462
 
363
- # Apply cardinality protection
364
- safe_labels = @cardinality_protection.filter(labels, metric_name)
463
+ # Apply cardinality protection (normalize metric_name to string for consistent tracking)
464
+ safe_labels = @cardinality_protection.filter(labels, metric_name.to_s)
365
465
 
366
466
  # Extract value for histogram/gauge
367
467
  value = extract_value(metric_config, event_data) if %i[histogram gauge].include?(metric_config[:type])
368
468
 
369
- # Update Yabeda metric
469
+ # Get original tags from metric config - these are the tags the metric was registered with
470
+ original_tags = metric_config.fetch(:tags, [])
471
+
472
+ # Lazy registration: register metric if it doesn't exist in Yabeda yet
473
+ # CRITICAL: Use ORIGINAL tags from metric config, not filtered safe_labels.keys
474
+ # Prometheus requires all tags declared at registration time
475
+ register_metric_if_needed(
476
+ metric_name,
477
+ metric_config[:type],
478
+ original_tags,
479
+ buckets: metric_config[:buckets]
480
+ )
481
+
482
+ # Ensure all required tags are present in safe_labels
483
+ # If cardinality protection dropped a tag, add placeholder value
484
+ # Prometheus requires all tags declared at registration to be present in every update
485
+ final_labels = original_tags.to_h do |tag|
486
+ [tag, safe_labels.key?(tag) ? safe_labels[tag] : "[DROPPED]"]
487
+ end
488
+
489
+ # Update Yabeda metric (skip if e11y group not registered, e.g. Yabeda not configured)
490
+ return unless ::Yabeda.respond_to?(:e11y)
491
+
492
+ metric = ::Yabeda.e11y.send(metric_name)
493
+ return unless metric
494
+
370
495
  case metric_config[:type]
371
496
  when :counter
372
- ::Yabeda.e11y.send(metric_name).increment(safe_labels)
497
+ metric.increment(final_labels)
373
498
  when :histogram
374
- ::Yabeda.e11y.send(metric_name).measure(safe_labels, value)
499
+ metric.measure(final_labels, value)
375
500
  when :gauge
376
- ::Yabeda.e11y.send(metric_name).set(safe_labels, value)
501
+ metric.set(final_labels, value)
377
502
  end
378
503
  rescue StandardError => e
379
504
  warn "E11y Yabeda: Error updating metric #{metric_name}: #{e.message}"
380
505
  end
381
- # rubocop:enable Metrics/AbcSize
382
506
 
383
507
  # Extract labels from event data
384
508
  #
@@ -247,7 +247,6 @@ module E11y
247
247
  #
248
248
  # @param obj [Object] Payload object
249
249
  # @return [Integer] Size in bytes
250
- # rubocop:disable Metrics/CyclomaticComplexity, Metrics/MethodLength
251
250
  def calculate_payload_size(obj)
252
251
  case obj
253
252
  when String
@@ -266,14 +265,12 @@ module E11y
266
265
  rescue StandardError
267
266
  500 # Fallback for errors
268
267
  end
269
- # rubocop:enable Metrics/CyclomaticComplexity, Metrics/MethodLength
270
268
 
271
269
  # Handle memory exhaustion according to strategy
272
270
  #
273
271
  # @param event_data [Hash] Event that caused exhaustion
274
272
  # @param event_size [Integer] Size of event
275
273
  # @return [Boolean] true if event was eventually added, false if dropped
276
- # rubocop:disable Metrics/MethodLength
277
274
  def handle_memory_exhaustion(event_data, event_size)
278
275
  case @backpressure_strategy
279
276
  when :block
@@ -288,7 +285,7 @@ module E11y
288
285
  # Check timeout
289
286
  if Time.now - wait_start > @max_block_time
290
287
  # Timeout exceeded - drop event
291
- increment_metric("e11y.buffer.memory_exhaustion.dropped")
288
+ E11y::Metrics.increment(:e11y_buffer_overflow_total, event: "memory_exhaustion_dropped")
292
289
  return false
293
290
  end
294
291
 
@@ -297,16 +294,15 @@ module E11y
297
294
  end
298
295
 
299
296
  # Space available - retry add
300
- increment_metric("e11y.buffer.memory_exhaustion.blocked")
297
+ E11y::Metrics.increment(:e11y_buffer_overflow_total, event: "memory_exhaustion_blocked")
301
298
  add_event(event_data)
302
299
 
303
300
  when :drop
304
301
  # Drop new event
305
- increment_metric("e11y.buffer.memory_exhaustion.dropped")
302
+ E11y::Metrics.increment(:e11y_buffer_overflow_total, event: "memory_exhaustion_dropped")
306
303
  false
307
304
  end
308
305
  end
309
- # rubocop:enable Metrics/MethodLength
310
306
 
311
307
  # Trigger early flush (80% threshold reached)
312
308
  #
@@ -323,16 +319,6 @@ module E11y
323
319
  warn "E11y: Early flush callback failed: #{e.message}"
324
320
  end
325
321
 
326
- # Increment metric (placeholder for Phase 3: Metrics)
327
- #
328
- # TODO Phase 3: Replace with actual Yabeda metrics
329
- #
330
- # @param metric_name [String] Metric to increment
331
- # @return [void]
332
- def increment_metric(metric_name)
333
- # Placeholder - will be implemented in Phase 3
334
- # Yabeda.e11y.buffer_memory_exhaustion.increment(strategy: @backpressure_strategy)
335
- end
336
322
  # rubocop:enable Metrics/ClassLength
337
323
  end
338
324
  end