e11y 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +130 -10
- data/CHANGELOG.md +56 -1
- data/CLAUDE.md +168 -0
- data/CONTRIBUTING.md +640 -0
- data/README.md +134 -702
- data/RELEASE.md +18 -3
- data/Rakefile +108 -29
- data/config/README.md +1 -1
- data/config/loki-local-config.yaml +12 -0
- data/config/otel-collector-config.yaml +44 -0
- data/cucumber.yml +1 -0
- data/docker-compose.yml +18 -2
- data/docs/ADAPTERS.md +76 -0
- data/docs/ADAPTIVE_SAMPLING.md +59 -0
- data/docs/COMPARISON.md +104 -0
- data/docs/CONFIGURATION.md +52 -0
- data/docs/DISTRIBUTED_TRACING.md +44 -0
- data/docs/LIMITATIONS.md +13 -0
- data/docs/METRICS_DSL.md +84 -0
- data/docs/PERFORMANCE.md +60 -0
- data/docs/PII_FILTERING.md +40 -0
- data/docs/PRESETS.md +65 -0
- data/docs/QUICK-START.md +546 -587
- data/docs/RAILS_INTEGRATION.md +29 -0
- data/docs/SCHEMA_VALIDATION.md +63 -0
- data/docs/SLO-PROMQL-ALERTS.md +161 -0
- data/docs/TESTING.md +69 -0
- data/docs/{ADR-001-architecture.md → architecture/ADR-001-architecture.md} +35 -64
- data/docs/{ADR-002-metrics-yabeda.md → architecture/ADR-002-metrics-yabeda.md} +62 -236
- data/docs/{ADR-003-slo-observability.md → architecture/ADR-003-slo-observability.md} +27 -466
- data/docs/{ADR-004-adapter-architecture.md → architecture/ADR-004-adapter-architecture.md} +163 -146
- data/docs/{ADR-005-tracing-context.md → architecture/ADR-005-tracing-context.md} +10 -9
- data/docs/{ADR-006-security-compliance.md → architecture/ADR-006-security-compliance.md} +184 -191
- data/docs/{ADR-007-opentelemetry-integration.md → architecture/ADR-007-opentelemetry-integration.md} +3 -21
- data/docs/{ADR-008-rails-integration.md → architecture/ADR-008-rails-integration.md} +209 -339
- data/docs/{ADR-009-cost-optimization.md → architecture/ADR-009-cost-optimization.md} +45 -54
- data/docs/architecture/ADR-010-developer-experience.md +522 -0
- data/docs/{ADR-011-testing-strategy.md → architecture/ADR-011-testing-strategy.md} +41 -83
- data/docs/{ADR-013-reliability-error-handling.md → architecture/ADR-013-reliability-error-handling.md} +37 -12
- data/docs/{ADR-014-event-driven-slo.md → architecture/ADR-014-event-driven-slo.md} +12 -24
- data/docs/{ADR-015-middleware-order.md → architecture/ADR-015-middleware-order.md} +23 -41
- data/docs/{ADR-016-self-monitoring-slo.md → architecture/ADR-016-self-monitoring-slo.md} +52 -349
- data/docs/{ADR-017-multi-rails-compatibility.md → architecture/ADR-017-multi-rails-compatibility.md} +4 -11
- data/docs/architecture/ADR-018-memory-optimization.md +366 -0
- data/docs/{ADR-INDEX.md → architecture/ADR-INDEX.md} +11 -6
- data/docs/{00-ICP-AND-TIMELINE.md → prd/00-ICP-AND-TIMELINE.md} +6 -6
- data/docs/{01-SCALE-REQUIREMENTS.md → prd/01-SCALE-REQUIREMENTS.md} +6 -6
- data/docs/prd/01-overview-vision.md +19 -14
- data/docs/use_cases/README.md +22 -23
- data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +50 -44
- data/docs/use_cases/UC-002-business-event-tracking.md +26 -95
- data/docs/use_cases/UC-003-event-metrics.md +66 -0
- data/docs/use_cases/UC-004-zero-config-slo-tracking.md +42 -101
- data/docs/use_cases/UC-005-sentry-integration.md +13 -15
- data/docs/use_cases/UC-006-trace-context-management.md +30 -28
- data/docs/use_cases/UC-007-pii-filtering.md +35 -87
- data/docs/use_cases/UC-008-opentelemetry-integration.md +51 -89
- data/docs/use_cases/UC-009-multi-service-tracing.md +4 -4
- data/docs/use_cases/UC-010-background-job-tracking.md +5 -5
- data/docs/use_cases/UC-011-rate-limiting.md +95 -168
- data/docs/use_cases/UC-012-audit-trail.md +21 -46
- data/docs/use_cases/UC-013-high-cardinality-protection.md +29 -167
- data/docs/use_cases/UC-014-adaptive-sampling.md +2 -2
- data/docs/use_cases/UC-015-cost-optimization.md +46 -99
- data/docs/use_cases/UC-016-rails-logger-migration.md +39 -213
- data/docs/use_cases/UC-017-local-development.md +203 -777
- data/docs/use_cases/UC-018-testing-events.md +3 -3
- data/docs/use_cases/UC-019-retention-based-routing.md +53 -106
- data/docs/use_cases/UC-020-event-versioning.md +8 -9
- data/docs/use_cases/UC-021-error-handling-retry-dlq.md +18 -22
- data/docs/use_cases/UC-022-event-registry.md +15 -21
- data/docs/use_cases/backlog.md +119 -87
- data/e11y.gemspec +2 -2
- data/gems/e11y-devtools/README.md +136 -0
- data/gems/e11y-devtools/config/routes.rb +8 -0
- data/gems/e11y-devtools/e11y-devtools.gemspec +25 -0
- data/gems/e11y-devtools/exe/e11y +34 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/server.rb +96 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tool_base.rb +25 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/clear.rb +31 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/errors.rb +35 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/event_detail.rb +33 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/events_by_trace.rb +33 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/interactions.rb +40 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/recent_events.rb +34 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/search.rb +34 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/stats.rb +30 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/assets/overlay.js +115 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/controller.rb +54 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/engine.rb +26 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/middleware.rb +80 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/rails_controller.rb +42 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/app.rb +262 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/grouping.rb +66 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_detail.rb +62 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_list.rb +70 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/interaction_list.rb +47 -0
- data/gems/e11y-devtools/lib/e11y/devtools/version.rb +8 -0
- data/gems/e11y-devtools/lib/e11y/devtools.rb +13 -0
- data/gems/e11y-devtools/spec/e11y/devtools/mcp/tools_spec.rb +107 -0
- data/gems/e11y-devtools/spec/e11y/devtools/overlay/controller_spec.rb +58 -0
- data/gems/e11y-devtools/spec/e11y/devtools/overlay/middleware_spec.rb +46 -0
- data/gems/e11y-devtools/spec/e11y/devtools/tui/app_spec.rb +85 -0
- data/gems/e11y-devtools/spec/e11y/devtools/tui/grouping_spec.rb +64 -0
- data/gems/e11y-devtools/spec/spec_helper.rb +5 -0
- data/gems/e11y-devtools/spec/tui/widgets/event_list_spec.rb +44 -0
- data/gems/e11y-devtools/spec/tui/widgets/interaction_list_spec.rb +62 -0
- data/lib/e11y/adapters/audit_encrypted.rb +53 -11
- data/lib/e11y/adapters/base.rb +33 -34
- data/lib/e11y/adapters/dev_log/file_store.rb +143 -0
- data/lib/e11y/adapters/dev_log/query.rb +219 -0
- data/lib/e11y/adapters/dev_log.rb +118 -0
- data/lib/e11y/adapters/file.rb +3 -6
- data/lib/e11y/adapters/in_memory.rb +52 -5
- data/lib/e11y/adapters/in_memory_test.rb +29 -0
- data/lib/e11y/adapters/loki.rb +58 -23
- data/lib/e11y/adapters/null.rb +82 -0
- data/lib/e11y/adapters/opentelemetry_collector.rb +183 -0
- data/lib/e11y/adapters/otel_logs.rb +136 -23
- data/lib/e11y/adapters/sentry.rb +4 -7
- data/lib/e11y/adapters/stdout.rb +73 -7
- data/lib/e11y/adapters/yabeda.rb +153 -29
- data/lib/e11y/buffers/adaptive_buffer.rb +3 -17
- data/lib/e11y/buffers/{request_scoped_buffer.rb → ephemeral_buffer.rb} +72 -58
- data/lib/e11y/buffers/ring_buffer.rb +3 -16
- data/lib/e11y/configuration.rb +272 -0
- data/lib/e11y/console.rb +10 -17
- data/lib/e11y/current.rb +53 -1
- data/lib/e11y/debug/pipeline_inspector.rb +96 -0
- data/lib/e11y/documentation/generator.rb +48 -0
- data/lib/e11y/event/base.rb +176 -82
- data/lib/e11y/event/value_sampling_config.rb +1 -5
- data/lib/e11y/events/rails/database/query.rb +1 -4
- data/lib/e11y/events/rails/job/failed.rb +2 -0
- data/lib/e11y/instruments/active_job.rb +46 -12
- data/lib/e11y/instruments/rails_instrumentation.rb +49 -24
- data/lib/e11y/instruments/sidekiq.rb +137 -31
- data/lib/e11y/linters/base.rb +11 -0
- data/lib/e11y/linters/pii/pii_declaration_linter.rb +120 -0
- data/lib/e11y/linters/slo/config_consistency_linter.rb +76 -0
- data/lib/e11y/linters/slo/explicit_declaration_linter.rb +36 -0
- data/lib/e11y/linters/slo/slo_status_from_linter.rb +41 -0
- data/lib/e11y/logger/bridge.rb +26 -7
- data/lib/e11y/metrics/cardinality_protection.rb +10 -15
- data/lib/e11y/metrics/cardinality_tracker.rb +16 -6
- data/lib/e11y/metrics/registry.rb +3 -5
- data/lib/e11y/metrics/test_backend.rb +62 -0
- data/lib/e11y/metrics.rb +56 -10
- data/lib/e11y/middleware/adapter_resolver.rb +40 -0
- data/lib/e11y/middleware/audit_signing.rb +43 -6
- data/lib/e11y/middleware/baggage_protection.rb +75 -0
- data/lib/e11y/middleware/dev_log_source.rb +24 -0
- data/lib/e11y/middleware/event_slo.rb +23 -9
- data/lib/e11y/middleware/otel_span.rb +23 -0
- data/lib/e11y/middleware/pii_filter.rb +104 -75
- data/lib/e11y/middleware/rate_limiting.rb +54 -27
- data/lib/e11y/middleware/request.rb +70 -23
- data/lib/e11y/middleware/routing.rb +78 -21
- data/lib/e11y/middleware/sampling.rb +66 -17
- data/lib/e11y/middleware/self_monitoring_emit.rb +39 -0
- data/lib/e11y/middleware/trace_context.rb +45 -10
- data/lib/e11y/middleware/track_latency.rb +34 -0
- data/lib/e11y/middleware/validation.rb +7 -16
- data/lib/e11y/middleware/versioning.rb +26 -22
- data/lib/e11y/opentelemetry/semantic_conventions.rb +109 -0
- data/lib/e11y/opentelemetry/span_creator.rb +142 -0
- data/lib/e11y/pii/patterns.rb +12 -1
- data/lib/e11y/pipeline/builder.rb +1 -1
- data/lib/e11y/presets/audit_event.rb +13 -2
- data/lib/e11y/railtie.rb +52 -15
- data/lib/e11y/registry.rb +306 -0
- data/lib/e11y/reliability/circuit_breaker.rb +19 -21
- data/lib/e11y/reliability/dlq/base.rb +71 -0
- data/lib/e11y/reliability/dlq/file_adapter.rb +301 -0
- data/lib/e11y/reliability/dlq/file_storage.rb +63 -34
- data/lib/e11y/reliability/dlq/filter.rb +37 -54
- data/lib/e11y/reliability/retry_handler.rb +26 -29
- data/lib/e11y/reliability/retry_rate_limiter.rb +3 -11
- data/lib/e11y/sampling/error_spike_detector.rb +0 -2
- data/lib/e11y/sampling/load_monitor.rb +5 -9
- data/lib/e11y/sampling/stratified_tracker.rb +18 -0
- data/lib/e11y/self_monitoring/buffer_monitor.rb +2 -0
- data/lib/e11y/self_monitoring/performance_monitor.rb +19 -61
- data/lib/e11y/self_monitoring/reliability_monitor.rb +4 -74
- data/lib/e11y/slo/config_loader.rb +40 -0
- data/lib/e11y/slo/config_validator.rb +58 -0
- data/lib/e11y/slo/dashboard_generator.rb +122 -0
- data/lib/e11y/slo/event_driven.rb +8 -0
- data/lib/e11y/slo/tracker.rb +31 -4
- data/lib/e11y/testing/have_tracked_event_matcher.rb +190 -0
- data/lib/e11y/testing/rspec_matchers.rb +21 -0
- data/lib/e11y/testing/snapshot_matcher.rb +86 -0
- data/lib/e11y/trace_context/sampler.rb +35 -0
- data/lib/e11y/tracing/faraday_middleware.rb +31 -0
- data/lib/e11y/tracing/net_http_patch.rb +33 -0
- data/lib/e11y/tracing/propagator.rb +116 -0
- data/lib/e11y/tracing.rb +47 -0
- data/lib/e11y/version.rb +1 -1
- data/lib/e11y/versioning/version_extractor.rb +32 -0
- data/lib/e11y.rb +141 -265
- data/lib/generators/e11y/event/event_generator.rb +22 -0
- data/lib/generators/e11y/event/templates/event.rb.tt +16 -0
- data/lib/generators/e11y/grafana_dashboard/grafana_dashboard_generator.rb +30 -0
- data/lib/generators/e11y/grafana_dashboard/templates/e11y_dashboard.json +81 -0
- data/lib/generators/e11y/install/install_generator.rb +34 -0
- data/lib/generators/e11y/install/templates/e11y.rb +239 -0
- data/lib/generators/e11y/prometheus_alerts/prometheus_alerts_generator.rb +29 -0
- data/lib/generators/e11y/prometheus_alerts/templates/e11y_alerts.yml +28 -0
- data/lib/tasks/e11y_docs.rake +30 -0
- data/lib/tasks/e11y_events.rake +71 -0
- data/lib/tasks/e11y_lint.rake +91 -0
- data/lib/tasks/e11y_slo.rake +29 -0
- metadata +129 -39
- data/docs/ADR-010-developer-experience.md +0 -2166
- data/docs/API-REFERENCE-L28.md +0 -914
- data/docs/COMPREHENSIVE-CONFIGURATION.md +0 -2366
- data/docs/CONTRIBUTING.md +0 -312
- data/docs/IMPLEMENTATION_NOTES.md +0 -2804
- data/docs/IMPLEMENTATION_PLAN.md +0 -1971
- data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +0 -586
- data/docs/PLAN.md +0 -148
- data/docs/README.md +0 -296
- data/docs/design/00-memory-optimization.md +0 -593
- data/docs/guides/MIGRATION-L27-L28.md +0 -692
- data/docs/guides/PERFORMANCE-BENCHMARKS.md +0 -434
- data/docs/guides/README.md +0 -44
- data/docs/use_cases/UC-003-pattern-based-metrics.md +0 -1627
- data/lib/e11y/adapters/registry.rb +0 -141
- /data/docs/{ADR-012-event-evolution.md → architecture/ADR-012-event-evolution.md} +0 -0
data/lib/e11y/adapters/sentry.rb
CHANGED
|
@@ -33,11 +33,8 @@ module E11y
|
|
|
33
33
|
# severity_threshold: :warn
|
|
34
34
|
# )
|
|
35
35
|
#
|
|
36
|
-
# @example
|
|
37
|
-
# E11y::Adapters::
|
|
38
|
-
# :error_tracker,
|
|
39
|
-
# E11y::Adapters::Sentry.new(dsn: ENV["SENTRY_DSN"])
|
|
40
|
-
# )
|
|
36
|
+
# @example Configuration
|
|
37
|
+
# config.adapters[:sentry] = E11y::Adapters::Sentry.new(dsn: ENV["SENTRY_DSN"])
|
|
41
38
|
#
|
|
42
39
|
# @see https://docs.sentry.io/platforms/ruby/
|
|
43
40
|
# rubocop:disable Metrics/ClassLength
|
|
@@ -155,7 +152,7 @@ module E11y
|
|
|
155
152
|
# Send error to Sentry
|
|
156
153
|
#
|
|
157
154
|
# @param event_data [Hash] Event data
|
|
158
|
-
# rubocop:disable Metrics/AbcSize
|
|
155
|
+
# rubocop:disable Metrics/AbcSize
|
|
159
156
|
# Sentry scope configuration requires multiple context enrichment steps
|
|
160
157
|
def send_error_to_sentry(event_data)
|
|
161
158
|
::Sentry.with_scope do |scope|
|
|
@@ -187,7 +184,7 @@ module E11y
|
|
|
187
184
|
end
|
|
188
185
|
end
|
|
189
186
|
end
|
|
190
|
-
# rubocop:enable Metrics/AbcSize
|
|
187
|
+
# rubocop:enable Metrics/AbcSize
|
|
191
188
|
|
|
192
189
|
# Send breadcrumb to Sentry
|
|
193
190
|
#
|
data/lib/e11y/adapters/stdout.rb
CHANGED
|
@@ -41,11 +41,12 @@ module E11y
|
|
|
41
41
|
#
|
|
42
42
|
# @param config [Hash] Configuration options
|
|
43
43
|
# @option config [Boolean] :colorize (true) Enable colored output
|
|
44
|
-
# @option config [Boolean] :pretty_print (true) Enable pretty-printed JSON
|
|
44
|
+
# @option config [Boolean] :pretty_print (true) Enable pretty-printed JSON (when format: :json)
|
|
45
|
+
# @option config [Symbol] :format (:json) Output format: :json (JSON), :compact (single-line JSON), :rich (ADR-010 §3 structured)
|
|
45
46
|
def initialize(config = {})
|
|
46
47
|
@colorize = config.fetch(:colorize, true)
|
|
47
|
-
@
|
|
48
|
-
|
|
48
|
+
@format = config.fetch(:format, :json)
|
|
49
|
+
@pretty_print = resolve_pretty_print(config)
|
|
49
50
|
super
|
|
50
51
|
end
|
|
51
52
|
|
|
@@ -82,15 +83,29 @@ module E11y
|
|
|
82
83
|
|
|
83
84
|
private
|
|
84
85
|
|
|
86
|
+
# Resolve pretty_print from format or pretty_print keys
|
|
87
|
+
#
|
|
88
|
+
# @param config [Hash] Adapter config
|
|
89
|
+
# @return [Boolean]
|
|
90
|
+
def resolve_pretty_print(config)
|
|
91
|
+
return config[:pretty_print] if config.key?(:pretty_print)
|
|
92
|
+
|
|
93
|
+
case config[:format]
|
|
94
|
+
when :compact then false
|
|
95
|
+
when :pretty then true
|
|
96
|
+
else config.fetch(:pretty_print, true)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
85
100
|
# Format event for console output
|
|
86
101
|
#
|
|
87
102
|
# @param event_data [Hash] Event data
|
|
88
103
|
# @return [String] Formatted output
|
|
89
104
|
def format_event(event_data)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
105
|
+
case @format
|
|
106
|
+
when :rich then format_event_rich(event_data)
|
|
107
|
+
when :compact then event_data.to_json
|
|
108
|
+
else @pretty_print ? JSON.pretty_generate(event_data) : event_data.to_json
|
|
94
109
|
end
|
|
95
110
|
end
|
|
96
111
|
|
|
@@ -103,6 +118,57 @@ module E11y
|
|
|
103
118
|
color_code = SEVERITY_COLORS[severity] || ""
|
|
104
119
|
"#{color_code}#{output}#{COLOR_RESET}"
|
|
105
120
|
end
|
|
121
|
+
|
|
122
|
+
# Rich format: ADR-010 §3 — structured output with header, event name, payload, metadata
|
|
123
|
+
def format_event_rich(event_data)
|
|
124
|
+
lines = []
|
|
125
|
+
lines << format_header(event_data)
|
|
126
|
+
lines << format_event_name_line(event_data)
|
|
127
|
+
lines << format_payload_section(event_data[:payload]) if event_data[:payload]&.any?
|
|
128
|
+
lines << format_metadata_section(event_data) if event_data[:trace_id] || event_data[:span_id]
|
|
129
|
+
lines << ("─" * 80)
|
|
130
|
+
lines.join("\n")
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def format_header(event_data)
|
|
134
|
+
ts = event_data[:timestamp]
|
|
135
|
+
ts = Time.parse(ts) if ts.is_a?(String)
|
|
136
|
+
time_str = ts&.strftime("%H:%M:%S.%L") || "??:??:??.???"
|
|
137
|
+
sev = event_data[:severity].to_s.upcase.ljust(8)
|
|
138
|
+
"#{time_str} #{sev}"
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def format_event_name_line(event_data)
|
|
142
|
+
name = event_data[:event_name].to_s
|
|
143
|
+
" → #{name}"
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def format_payload_section(payload)
|
|
147
|
+
lines = [" Payload:"]
|
|
148
|
+
payload.each do |k, v|
|
|
149
|
+
lines << " #{k}: #{format_value_rich(v)}"
|
|
150
|
+
end
|
|
151
|
+
lines.join("\n")
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def format_metadata_section(event_data)
|
|
155
|
+
meta = { trace_id: event_data[:trace_id], span_id: event_data[:span_id] }.compact
|
|
156
|
+
return "" if meta.empty?
|
|
157
|
+
|
|
158
|
+
meta.map { |k, v| " #{k}: #{v}" }.unshift(" Metadata:").join("\n")
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def format_value_rich(value)
|
|
162
|
+
case value
|
|
163
|
+
when String then "\"#{value.length > 50 ? "#{value[0...50]}..." : value}\""
|
|
164
|
+
when Array then "[#{value.size} items]"
|
|
165
|
+
when Hash then "{#{value.size} keys}"
|
|
166
|
+
else value.inspect
|
|
167
|
+
end
|
|
168
|
+
end
|
|
106
169
|
end
|
|
170
|
+
|
|
171
|
+
# Alias for ADR-010 §3 (Console Output) — Console and Stdout are the same adapter
|
|
172
|
+
Console = Stdout
|
|
107
173
|
end
|
|
108
174
|
end
|
data/lib/e11y/adapters/yabeda.rb
CHANGED
|
@@ -43,7 +43,7 @@ module E11y
|
|
|
43
43
|
# # Events automatically update metrics via middleware
|
|
44
44
|
#
|
|
45
45
|
# @see ADR-002 Metrics & Yabeda Integration
|
|
46
|
-
# @see UC-003
|
|
46
|
+
# @see UC-003 Event Metrics
|
|
47
47
|
# rubocop:disable Metrics/ClassLength
|
|
48
48
|
# Yabeda adapter contains metrics registration and update logic as cohesive unit
|
|
49
49
|
class Yabeda < Base
|
|
@@ -67,6 +67,8 @@ module E11y
|
|
|
67
67
|
return unless config.fetch(:auto_register, true)
|
|
68
68
|
|
|
69
69
|
register_metrics_from_registry!
|
|
70
|
+
register_middleware_metrics!
|
|
71
|
+
register_self_monitoring_metrics!
|
|
70
72
|
|
|
71
73
|
# Apply configuration in non-Rails environments (Rails does this automatically)
|
|
72
74
|
# In tests, Yabeda.configure! should be called explicitly in before blocks
|
|
@@ -108,9 +110,10 @@ module E11y
|
|
|
108
110
|
|
|
109
111
|
# Check if adapter is healthy
|
|
110
112
|
#
|
|
111
|
-
# @return [Boolean] true if Yabeda is available and
|
|
113
|
+
# @return [Boolean] true if Yabeda is available, configured, and e11y group exists
|
|
112
114
|
def healthy?
|
|
113
115
|
return false unless defined?(::Yabeda)
|
|
116
|
+
return false unless ::Yabeda.respond_to?(:e11y)
|
|
114
117
|
|
|
115
118
|
::Yabeda.configured?
|
|
116
119
|
rescue StandardError
|
|
@@ -151,8 +154,11 @@ module E11y
|
|
|
151
154
|
# Register metric if not exists
|
|
152
155
|
register_metric_if_needed(name, :counter, safe_labels.keys)
|
|
153
156
|
|
|
154
|
-
# Update Yabeda metric
|
|
155
|
-
::Yabeda.e11y.send(name)
|
|
157
|
+
# Update Yabeda metric (guard against nil when metric wasn't registered, e.g. after configure!)
|
|
158
|
+
metric = ::Yabeda.e11y.send(name)
|
|
159
|
+
return unless metric
|
|
160
|
+
|
|
161
|
+
metric.increment(safe_labels, by: value)
|
|
156
162
|
rescue StandardError => e
|
|
157
163
|
E11y.logger.warn("Failed to increment Yabeda metric #{name}: #{e.message}")
|
|
158
164
|
end
|
|
@@ -173,8 +179,11 @@ module E11y
|
|
|
173
179
|
# Register metric if not exists
|
|
174
180
|
register_metric_if_needed(name, :histogram, safe_labels.keys, buckets: buckets)
|
|
175
181
|
|
|
176
|
-
# Update Yabeda metric
|
|
177
|
-
::Yabeda.e11y.send(name)
|
|
182
|
+
# Update Yabeda metric (guard against nil when metric wasn't registered)
|
|
183
|
+
metric = ::Yabeda.e11y.send(name)
|
|
184
|
+
return unless metric
|
|
185
|
+
|
|
186
|
+
metric.measure(safe_labels, value)
|
|
178
187
|
rescue StandardError => e
|
|
179
188
|
E11y.logger.warn("Failed to observe Yabeda histogram #{name}: #{e.message}")
|
|
180
189
|
end
|
|
@@ -194,8 +203,11 @@ module E11y
|
|
|
194
203
|
# Register metric if not exists
|
|
195
204
|
register_metric_if_needed(name, :gauge, safe_labels.keys)
|
|
196
205
|
|
|
197
|
-
# Update Yabeda metric
|
|
198
|
-
::Yabeda.e11y.send(name)
|
|
206
|
+
# Update Yabeda metric (guard against nil when metric wasn't registered)
|
|
207
|
+
metric = ::Yabeda.e11y.send(name)
|
|
208
|
+
return unless metric
|
|
209
|
+
|
|
210
|
+
metric.set(safe_labels, value)
|
|
199
211
|
rescue StandardError => e
|
|
200
212
|
E11y.logger.warn("Failed to set Yabeda gauge #{name}: #{e.message}")
|
|
201
213
|
end
|
|
@@ -208,9 +220,7 @@ module E11y
|
|
|
208
220
|
super
|
|
209
221
|
|
|
210
222
|
# Validate cardinality_limit
|
|
211
|
-
if @config[:cardinality_limit] && !@config[:cardinality_limit].is_a?(Integer)
|
|
212
|
-
raise ArgumentError, "cardinality_limit must be an Integer"
|
|
213
|
-
end
|
|
223
|
+
raise ArgumentError, "cardinality_limit must be an Integer" if @config[:cardinality_limit] && !@config[:cardinality_limit].is_a?(Integer)
|
|
214
224
|
|
|
215
225
|
# Validate forbidden_labels
|
|
216
226
|
return unless @config[:forbidden_labels] && !@config[:forbidden_labels].is_a?(Array)
|
|
@@ -278,20 +288,112 @@ module E11y
|
|
|
278
288
|
end
|
|
279
289
|
end
|
|
280
290
|
|
|
291
|
+
# Pre-register middleware self-monitoring metrics.
|
|
292
|
+
#
|
|
293
|
+
# These metrics are used by TraceContext, Validation, and Routing middleware.
|
|
294
|
+
# Must be registered before Yabeda.configure! is called (e.g. in app initializers).
|
|
295
|
+
# Called during adapter initialization so they're available when events flow.
|
|
296
|
+
# Names use underscores (Prometheus requires /[a-zA-Z_:][a-zA-Z0-9_:]*/, no dots).
|
|
297
|
+
#
|
|
298
|
+
# @return [void]
|
|
299
|
+
def register_middleware_metrics!
|
|
300
|
+
return unless defined?(::Yabeda)
|
|
301
|
+
|
|
302
|
+
middleware_metrics = [
|
|
303
|
+
{ name: :e11y_middleware_trace_context_processed, tags: [] },
|
|
304
|
+
{ name: :e11y_middleware_validation_total, tags: [:result] },
|
|
305
|
+
{ name: :e11y_middleware_routing_routed, tags: %i[adapters_count routing_type] }
|
|
306
|
+
]
|
|
307
|
+
|
|
308
|
+
cardinality_metrics = [
|
|
309
|
+
{ name: :e11y_cardinality_overflow_total, tags: %i[metric action strategy] },
|
|
310
|
+
{ name: :e11y_cardinality_current, type: :gauge, tags: [:metric] }
|
|
311
|
+
]
|
|
312
|
+
|
|
313
|
+
(middleware_metrics + cardinality_metrics).each do |m|
|
|
314
|
+
type = m[:type] || :counter
|
|
315
|
+
register_metric_if_needed(m[:name], type, m[:tags])
|
|
316
|
+
end
|
|
317
|
+
rescue StandardError => e
|
|
318
|
+
E11y.logger.debug("Could not register middleware metrics: #{e.message}")
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# Pre-register self-monitoring metrics (request buffer, retry, circuit breaker, DLQ, etc.).
|
|
322
|
+
# Must be registered before Yabeda.configure! so they exist when reliability layer runs.
|
|
323
|
+
#
|
|
324
|
+
# @return [void] # -- metric list is inherently long
|
|
325
|
+
def register_self_monitoring_metrics!
|
|
326
|
+
return unless defined?(::Yabeda)
|
|
327
|
+
|
|
328
|
+
metrics = [
|
|
329
|
+
# Request buffer (consolidated)
|
|
330
|
+
{ name: :e11y_ephemeral_buffer_total, tags: [:event] },
|
|
331
|
+
# Retry handler
|
|
332
|
+
{ name: :e11y_retry_success, tags: %i[adapter attempts] },
|
|
333
|
+
{ name: :e11y_retry_recovered, tags: %i[adapter attempts] },
|
|
334
|
+
{ name: :e11y_retry_permanent_failure, tags: %i[adapter error attempt] },
|
|
335
|
+
{ name: :e11y_retry_exhausted, tags: %i[adapter error attempts] },
|
|
336
|
+
{ name: :e11y_retry_attempt, tags: %i[adapter error attempt] },
|
|
337
|
+
# Circuit breaker (consolidated: transitions counter + state gauge)
|
|
338
|
+
{ name: :e11y_circuit_breaker_transitions_total, tags: %i[adapter event] },
|
|
339
|
+
{ name: :e11y_circuit_breaker_state, type: :gauge, tags: [:adapter] },
|
|
340
|
+
# Adapter performance & reliability
|
|
341
|
+
{ name: :e11y_adapter_send_duration_seconds, type: :histogram, tags: [:adapter], buckets: [0.001, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0] },
|
|
342
|
+
{ name: :e11y_adapter_writes_total, tags: %i[adapter status error_class] },
|
|
343
|
+
# DLQ
|
|
344
|
+
{ name: :e11y_dlq_size, type: :gauge, tags: [] },
|
|
345
|
+
{ name: :e11y_dlq_filter_decisions_total, tags: %i[action reason] },
|
|
346
|
+
{ name: :e11y_dlq_saved_total, tags: [:event_name] },
|
|
347
|
+
{ name: :e11y_dlq_parse_error_total, tags: [:error] },
|
|
348
|
+
{ name: :e11y_dlq_replayed_total, tags: [:event_name] },
|
|
349
|
+
{ name: :e11y_dlq_replay_failed_total, tags: [:error] },
|
|
350
|
+
# Retry rate limiter (consolidated)
|
|
351
|
+
{ name: :e11y_retry_rate_limiter_total, tags: %i[adapter event delay_sec] },
|
|
352
|
+
# Buffer (ring, adaptive) — consolidated
|
|
353
|
+
{ name: :e11y_buffer_overflow_total, tags: [:event] },
|
|
354
|
+
# Rate limiting / sampling
|
|
355
|
+
{ name: :e11y_events_dropped_total, tags: %i[reason event_type] },
|
|
356
|
+
# SLO tracking (Request middleware triggers on every HTTP request when enabled)
|
|
357
|
+
{ name: :slo_http_requests_total, tags: %i[controller action status] },
|
|
358
|
+
{ name: :slo_http_request_duration_seconds, type: :histogram, tags: %i[controller action],
|
|
359
|
+
buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0] },
|
|
360
|
+
{ name: :slo_background_jobs_total, tags: %i[job_class status queue] },
|
|
361
|
+
{ name: :slo_background_job_duration_seconds, type: :histogram, tags: %i[job_class queue],
|
|
362
|
+
buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0] },
|
|
363
|
+
# E11y self-monitoring (events tracked at pipeline end)
|
|
364
|
+
{ name: :e11y_events_tracked_total, tags: %i[result event_name] },
|
|
365
|
+
# Track latency (TrackLatency middleware)
|
|
366
|
+
{ name: :e11y_track_duration_seconds, type: :histogram, tags: %i[event_class severity result],
|
|
367
|
+
buckets: [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1] }
|
|
368
|
+
]
|
|
369
|
+
|
|
370
|
+
metrics.each do |m|
|
|
371
|
+
type = m[:type] || :counter
|
|
372
|
+
buckets = m[:buckets]
|
|
373
|
+
register_metric_if_needed(m[:name], type, m[:tags], buckets: buckets)
|
|
374
|
+
end
|
|
375
|
+
rescue StandardError => e
|
|
376
|
+
E11y.logger.debug("Could not register self-monitoring metrics: #{e.message}")
|
|
377
|
+
end
|
|
378
|
+
|
|
281
379
|
# Register a single metric in Yabeda
|
|
282
380
|
#
|
|
283
381
|
# @param metric_config [Hash] Metric configuration from Registry
|
|
284
382
|
# @return [void]
|
|
285
|
-
# rubocop:disable Metrics/MethodLength
|
|
286
383
|
# Metric registration requires case/when for different metric types
|
|
287
384
|
def register_yabeda_metric(metric_config)
|
|
288
385
|
metric_name = metric_config[:name]
|
|
289
386
|
metric_type = metric_config[:type]
|
|
290
387
|
tags = metric_config[:tags] || []
|
|
291
388
|
|
|
389
|
+
# Skip if metric already exists (prevents re-registration errors)
|
|
390
|
+
return if ::Yabeda.metrics.key?("e11y_#{metric_name}")
|
|
391
|
+
|
|
292
392
|
# Define metric in Yabeda group
|
|
293
|
-
::Yabeda.configure do
|
|
294
|
-
|
|
393
|
+
::Yabeda.configure do |config = nil|
|
|
394
|
+
next unless config.respond_to?(:group)
|
|
395
|
+
|
|
396
|
+
config.group :e11y do
|
|
295
397
|
case metric_type
|
|
296
398
|
when :counter
|
|
297
399
|
counter metric_name, tags: tags, comment: "E11y metric: #{metric_name}"
|
|
@@ -309,7 +411,6 @@ module E11y
|
|
|
309
411
|
# Metric might already be registered - that's OK
|
|
310
412
|
warn "E11y Yabeda: Could not register metric #{metric_name}: #{e.message}"
|
|
311
413
|
end
|
|
312
|
-
# rubocop:enable Metrics/MethodLength
|
|
313
414
|
|
|
314
415
|
# Register a metric if it doesn't exist yet (for direct metric calls).
|
|
315
416
|
#
|
|
@@ -319,14 +420,15 @@ module E11y
|
|
|
319
420
|
# @param buckets [Array<Numeric>, nil] Optional histogram buckets
|
|
320
421
|
# @return [void]
|
|
321
422
|
# @api private
|
|
322
|
-
# rubocop:disable Metrics/MethodLength
|
|
323
423
|
# Metric registration requires case/when for different metric types
|
|
324
424
|
def register_metric_if_needed(name, type, tags, buckets: nil)
|
|
325
|
-
# Check if metric already exists
|
|
326
|
-
return if ::Yabeda.metrics.key?(
|
|
425
|
+
# Check if metric already exists (Yabeda stores metric keys as strings)
|
|
426
|
+
return if ::Yabeda.metrics.key?("e11y_#{name}")
|
|
327
427
|
|
|
328
|
-
::Yabeda.configure do
|
|
329
|
-
|
|
428
|
+
::Yabeda.configure do |config = nil|
|
|
429
|
+
next unless config.respond_to?(:group)
|
|
430
|
+
|
|
431
|
+
config.group :e11y do
|
|
330
432
|
case type
|
|
331
433
|
when :counter
|
|
332
434
|
counter name, tags: tags, comment: "E11y self-monitoring: #{name}"
|
|
@@ -347,38 +449,60 @@ module E11y
|
|
|
347
449
|
# Metric might already be registered - that's OK
|
|
348
450
|
E11y.logger.warn("Could not register Yabeda metric #{name}: #{e.message}")
|
|
349
451
|
end
|
|
350
|
-
# rubocop:enable Metrics/MethodLength
|
|
351
452
|
|
|
352
453
|
# Update a single metric based on event data
|
|
353
454
|
#
|
|
354
455
|
# @param metric_config [Hash] Metric configuration
|
|
355
456
|
# @param event_data [Hash] Event data
|
|
356
457
|
# @return [void]
|
|
357
|
-
# rubocop:disable Metrics/AbcSize
|
|
358
458
|
# Metric update requires multiple steps for label extraction and value handling
|
|
359
459
|
def update_metric(metric_config, event_data)
|
|
360
460
|
metric_name = metric_config[:name]
|
|
361
461
|
labels = extract_labels(metric_config, event_data)
|
|
362
462
|
|
|
363
|
-
# Apply cardinality protection
|
|
364
|
-
safe_labels = @cardinality_protection.filter(labels, metric_name)
|
|
463
|
+
# Apply cardinality protection (normalize metric_name to string for consistent tracking)
|
|
464
|
+
safe_labels = @cardinality_protection.filter(labels, metric_name.to_s)
|
|
365
465
|
|
|
366
466
|
# Extract value for histogram/gauge
|
|
367
467
|
value = extract_value(metric_config, event_data) if %i[histogram gauge].include?(metric_config[:type])
|
|
368
468
|
|
|
369
|
-
#
|
|
469
|
+
# Get original tags from metric config - these are the tags the metric was registered with
|
|
470
|
+
original_tags = metric_config.fetch(:tags, [])
|
|
471
|
+
|
|
472
|
+
# Lazy registration: register metric if it doesn't exist in Yabeda yet
|
|
473
|
+
# CRITICAL: Use ORIGINAL tags from metric config, not filtered safe_labels.keys
|
|
474
|
+
# Prometheus requires all tags declared at registration time
|
|
475
|
+
register_metric_if_needed(
|
|
476
|
+
metric_name,
|
|
477
|
+
metric_config[:type],
|
|
478
|
+
original_tags,
|
|
479
|
+
buckets: metric_config[:buckets]
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
# Ensure all required tags are present in safe_labels
|
|
483
|
+
# If cardinality protection dropped a tag, add placeholder value
|
|
484
|
+
# Prometheus requires all tags declared at registration to be present in every update
|
|
485
|
+
final_labels = original_tags.to_h do |tag|
|
|
486
|
+
[tag, safe_labels.key?(tag) ? safe_labels[tag] : "[DROPPED]"]
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
# Update Yabeda metric (skip if e11y group not registered, e.g. Yabeda not configured)
|
|
490
|
+
return unless ::Yabeda.respond_to?(:e11y)
|
|
491
|
+
|
|
492
|
+
metric = ::Yabeda.e11y.send(metric_name)
|
|
493
|
+
return unless metric
|
|
494
|
+
|
|
370
495
|
case metric_config[:type]
|
|
371
496
|
when :counter
|
|
372
|
-
|
|
497
|
+
metric.increment(final_labels)
|
|
373
498
|
when :histogram
|
|
374
|
-
|
|
499
|
+
metric.measure(final_labels, value)
|
|
375
500
|
when :gauge
|
|
376
|
-
|
|
501
|
+
metric.set(final_labels, value)
|
|
377
502
|
end
|
|
378
503
|
rescue StandardError => e
|
|
379
504
|
warn "E11y Yabeda: Error updating metric #{metric_name}: #{e.message}"
|
|
380
505
|
end
|
|
381
|
-
# rubocop:enable Metrics/AbcSize
|
|
382
506
|
|
|
383
507
|
# Extract labels from event data
|
|
384
508
|
#
|
|
@@ -247,7 +247,6 @@ module E11y
|
|
|
247
247
|
#
|
|
248
248
|
# @param obj [Object] Payload object
|
|
249
249
|
# @return [Integer] Size in bytes
|
|
250
|
-
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
251
250
|
def calculate_payload_size(obj)
|
|
252
251
|
case obj
|
|
253
252
|
when String
|
|
@@ -266,14 +265,12 @@ module E11y
|
|
|
266
265
|
rescue StandardError
|
|
267
266
|
500 # Fallback for errors
|
|
268
267
|
end
|
|
269
|
-
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/MethodLength
|
|
270
268
|
|
|
271
269
|
# Handle memory exhaustion according to strategy
|
|
272
270
|
#
|
|
273
271
|
# @param event_data [Hash] Event that caused exhaustion
|
|
274
272
|
# @param event_size [Integer] Size of event
|
|
275
273
|
# @return [Boolean] true if event was eventually added, false if dropped
|
|
276
|
-
# rubocop:disable Metrics/MethodLength
|
|
277
274
|
def handle_memory_exhaustion(event_data, event_size)
|
|
278
275
|
case @backpressure_strategy
|
|
279
276
|
when :block
|
|
@@ -288,7 +285,7 @@ module E11y
|
|
|
288
285
|
# Check timeout
|
|
289
286
|
if Time.now - wait_start > @max_block_time
|
|
290
287
|
# Timeout exceeded - drop event
|
|
291
|
-
|
|
288
|
+
E11y::Metrics.increment(:e11y_buffer_overflow_total, event: "memory_exhaustion_dropped")
|
|
292
289
|
return false
|
|
293
290
|
end
|
|
294
291
|
|
|
@@ -297,16 +294,15 @@ module E11y
|
|
|
297
294
|
end
|
|
298
295
|
|
|
299
296
|
# Space available - retry add
|
|
300
|
-
|
|
297
|
+
E11y::Metrics.increment(:e11y_buffer_overflow_total, event: "memory_exhaustion_blocked")
|
|
301
298
|
add_event(event_data)
|
|
302
299
|
|
|
303
300
|
when :drop
|
|
304
301
|
# Drop new event
|
|
305
|
-
|
|
302
|
+
E11y::Metrics.increment(:e11y_buffer_overflow_total, event: "memory_exhaustion_dropped")
|
|
306
303
|
false
|
|
307
304
|
end
|
|
308
305
|
end
|
|
309
|
-
# rubocop:enable Metrics/MethodLength
|
|
310
306
|
|
|
311
307
|
# Trigger early flush (80% threshold reached)
|
|
312
308
|
#
|
|
@@ -323,16 +319,6 @@ module E11y
|
|
|
323
319
|
warn "E11y: Early flush callback failed: #{e.message}"
|
|
324
320
|
end
|
|
325
321
|
|
|
326
|
-
# Increment metric (placeholder for Phase 3: Metrics)
|
|
327
|
-
#
|
|
328
|
-
# TODO Phase 3: Replace with actual Yabeda metrics
|
|
329
|
-
#
|
|
330
|
-
# @param metric_name [String] Metric to increment
|
|
331
|
-
# @return [void]
|
|
332
|
-
def increment_metric(metric_name)
|
|
333
|
-
# Placeholder - will be implemented in Phase 3
|
|
334
|
-
# Yabeda.e11y.buffer_memory_exhaustion.increment(strategy: @backpressure_strategy)
|
|
335
|
-
end
|
|
336
322
|
# rubocop:enable Metrics/ClassLength
|
|
337
323
|
end
|
|
338
324
|
end
|