e11y 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +4 -0
- data/.rubocop.yml +69 -0
- data/CHANGELOG.md +26 -0
- data/CODE_OF_CONDUCT.md +64 -0
- data/LICENSE.txt +21 -0
- data/README.md +179 -0
- data/Rakefile +37 -0
- data/benchmarks/run_all.rb +33 -0
- data/config/README.md +83 -0
- data/config/loki-local-config.yaml +35 -0
- data/config/prometheus.yml +15 -0
- data/docker-compose.yml +78 -0
- data/docs/00-ICP-AND-TIMELINE.md +483 -0
- data/docs/01-SCALE-REQUIREMENTS.md +858 -0
- data/docs/ADR-001-architecture.md +2617 -0
- data/docs/ADR-002-metrics-yabeda.md +1395 -0
- data/docs/ADR-003-slo-observability.md +3337 -0
- data/docs/ADR-004-adapter-architecture.md +2385 -0
- data/docs/ADR-005-tracing-context.md +1372 -0
- data/docs/ADR-006-security-compliance.md +4143 -0
- data/docs/ADR-007-opentelemetry-integration.md +1385 -0
- data/docs/ADR-008-rails-integration.md +1911 -0
- data/docs/ADR-009-cost-optimization.md +2993 -0
- data/docs/ADR-010-developer-experience.md +2166 -0
- data/docs/ADR-011-testing-strategy.md +1836 -0
- data/docs/ADR-012-event-evolution.md +958 -0
- data/docs/ADR-013-reliability-error-handling.md +2750 -0
- data/docs/ADR-014-event-driven-slo.md +1533 -0
- data/docs/ADR-015-middleware-order.md +1061 -0
- data/docs/ADR-016-self-monitoring-slo.md +1234 -0
- data/docs/API-REFERENCE-L28.md +914 -0
- data/docs/COMPREHENSIVE-CONFIGURATION.md +2366 -0
- data/docs/IMPLEMENTATION_NOTES.md +2804 -0
- data/docs/IMPLEMENTATION_PLAN.md +1971 -0
- data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +586 -0
- data/docs/PLAN.md +148 -0
- data/docs/QUICK-START.md +934 -0
- data/docs/README.md +296 -0
- data/docs/design/00-memory-optimization.md +593 -0
- data/docs/guides/MIGRATION-L27-L28.md +692 -0
- data/docs/guides/PERFORMANCE-BENCHMARKS.md +434 -0
- data/docs/guides/README.md +44 -0
- data/docs/prd/01-overview-vision.md +440 -0
- data/docs/use_cases/README.md +119 -0
- data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +813 -0
- data/docs/use_cases/UC-002-business-event-tracking.md +1953 -0
- data/docs/use_cases/UC-003-pattern-based-metrics.md +1627 -0
- data/docs/use_cases/UC-004-zero-config-slo-tracking.md +728 -0
- data/docs/use_cases/UC-005-sentry-integration.md +759 -0
- data/docs/use_cases/UC-006-trace-context-management.md +905 -0
- data/docs/use_cases/UC-007-pii-filtering.md +2648 -0
- data/docs/use_cases/UC-008-opentelemetry-integration.md +1153 -0
- data/docs/use_cases/UC-009-multi-service-tracing.md +1043 -0
- data/docs/use_cases/UC-010-background-job-tracking.md +1018 -0
- data/docs/use_cases/UC-011-rate-limiting.md +1906 -0
- data/docs/use_cases/UC-012-audit-trail.md +2301 -0
- data/docs/use_cases/UC-013-high-cardinality-protection.md +2127 -0
- data/docs/use_cases/UC-014-adaptive-sampling.md +1940 -0
- data/docs/use_cases/UC-015-cost-optimization.md +735 -0
- data/docs/use_cases/UC-016-rails-logger-migration.md +785 -0
- data/docs/use_cases/UC-017-local-development.md +867 -0
- data/docs/use_cases/UC-018-testing-events.md +1081 -0
- data/docs/use_cases/UC-019-tiered-storage-migration.md +562 -0
- data/docs/use_cases/UC-020-event-versioning.md +708 -0
- data/docs/use_cases/UC-021-error-handling-retry-dlq.md +956 -0
- data/docs/use_cases/UC-022-event-registry.md +648 -0
- data/docs/use_cases/backlog.md +226 -0
- data/e11y.gemspec +76 -0
- data/lib/e11y/adapters/adaptive_batcher.rb +207 -0
- data/lib/e11y/adapters/audit_encrypted.rb +239 -0
- data/lib/e11y/adapters/base.rb +580 -0
- data/lib/e11y/adapters/file.rb +224 -0
- data/lib/e11y/adapters/in_memory.rb +216 -0
- data/lib/e11y/adapters/loki.rb +333 -0
- data/lib/e11y/adapters/otel_logs.rb +203 -0
- data/lib/e11y/adapters/registry.rb +141 -0
- data/lib/e11y/adapters/sentry.rb +230 -0
- data/lib/e11y/adapters/stdout.rb +108 -0
- data/lib/e11y/adapters/yabeda.rb +370 -0
- data/lib/e11y/buffers/adaptive_buffer.rb +339 -0
- data/lib/e11y/buffers/base_buffer.rb +40 -0
- data/lib/e11y/buffers/request_scoped_buffer.rb +246 -0
- data/lib/e11y/buffers/ring_buffer.rb +267 -0
- data/lib/e11y/buffers.rb +14 -0
- data/lib/e11y/console.rb +122 -0
- data/lib/e11y/current.rb +48 -0
- data/lib/e11y/event/base.rb +894 -0
- data/lib/e11y/event/value_sampling_config.rb +84 -0
- data/lib/e11y/events/base_audit_event.rb +43 -0
- data/lib/e11y/events/base_payment_event.rb +33 -0
- data/lib/e11y/events/rails/cache/delete.rb +21 -0
- data/lib/e11y/events/rails/cache/read.rb +23 -0
- data/lib/e11y/events/rails/cache/write.rb +22 -0
- data/lib/e11y/events/rails/database/query.rb +45 -0
- data/lib/e11y/events/rails/http/redirect.rb +21 -0
- data/lib/e11y/events/rails/http/request.rb +26 -0
- data/lib/e11y/events/rails/http/send_file.rb +21 -0
- data/lib/e11y/events/rails/http/start_processing.rb +26 -0
- data/lib/e11y/events/rails/job/completed.rb +22 -0
- data/lib/e11y/events/rails/job/enqueued.rb +22 -0
- data/lib/e11y/events/rails/job/failed.rb +22 -0
- data/lib/e11y/events/rails/job/scheduled.rb +23 -0
- data/lib/e11y/events/rails/job/started.rb +22 -0
- data/lib/e11y/events/rails/log.rb +56 -0
- data/lib/e11y/events/rails/view/render.rb +23 -0
- data/lib/e11y/events.rb +18 -0
- data/lib/e11y/instruments/active_job.rb +201 -0
- data/lib/e11y/instruments/rails_instrumentation.rb +141 -0
- data/lib/e11y/instruments/sidekiq.rb +175 -0
- data/lib/e11y/logger/bridge.rb +205 -0
- data/lib/e11y/metrics/cardinality_protection.rb +172 -0
- data/lib/e11y/metrics/cardinality_tracker.rb +134 -0
- data/lib/e11y/metrics/registry.rb +234 -0
- data/lib/e11y/metrics/relabeling.rb +226 -0
- data/lib/e11y/metrics.rb +102 -0
- data/lib/e11y/middleware/audit_signing.rb +174 -0
- data/lib/e11y/middleware/base.rb +140 -0
- data/lib/e11y/middleware/event_slo.rb +167 -0
- data/lib/e11y/middleware/pii_filter.rb +266 -0
- data/lib/e11y/middleware/pii_filtering.rb +280 -0
- data/lib/e11y/middleware/rate_limiting.rb +214 -0
- data/lib/e11y/middleware/request.rb +163 -0
- data/lib/e11y/middleware/routing.rb +157 -0
- data/lib/e11y/middleware/sampling.rb +254 -0
- data/lib/e11y/middleware/slo.rb +168 -0
- data/lib/e11y/middleware/trace_context.rb +131 -0
- data/lib/e11y/middleware/validation.rb +118 -0
- data/lib/e11y/middleware/versioning.rb +132 -0
- data/lib/e11y/middleware.rb +12 -0
- data/lib/e11y/pii/patterns.rb +90 -0
- data/lib/e11y/pii.rb +13 -0
- data/lib/e11y/pipeline/builder.rb +155 -0
- data/lib/e11y/pipeline/zone_validator.rb +110 -0
- data/lib/e11y/pipeline.rb +12 -0
- data/lib/e11y/presets/audit_event.rb +65 -0
- data/lib/e11y/presets/debug_event.rb +34 -0
- data/lib/e11y/presets/high_value_event.rb +51 -0
- data/lib/e11y/presets.rb +19 -0
- data/lib/e11y/railtie.rb +138 -0
- data/lib/e11y/reliability/circuit_breaker.rb +216 -0
- data/lib/e11y/reliability/dlq/file_storage.rb +277 -0
- data/lib/e11y/reliability/dlq/filter.rb +117 -0
- data/lib/e11y/reliability/retry_handler.rb +207 -0
- data/lib/e11y/reliability/retry_rate_limiter.rb +117 -0
- data/lib/e11y/sampling/error_spike_detector.rb +225 -0
- data/lib/e11y/sampling/load_monitor.rb +161 -0
- data/lib/e11y/sampling/stratified_tracker.rb +92 -0
- data/lib/e11y/sampling/value_extractor.rb +82 -0
- data/lib/e11y/self_monitoring/buffer_monitor.rb +79 -0
- data/lib/e11y/self_monitoring/performance_monitor.rb +97 -0
- data/lib/e11y/self_monitoring/reliability_monitor.rb +146 -0
- data/lib/e11y/slo/event_driven.rb +150 -0
- data/lib/e11y/slo/tracker.rb +119 -0
- data/lib/e11y/version.rb +9 -0
- data/lib/e11y.rb +283 -0
- metadata +452 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/concern"
|
|
4
|
+
|
|
5
|
+
module E11y
|
|
6
|
+
module Instruments
|
|
7
|
+
# ActiveJob integration for job-scoped context and trace propagation.
|
|
8
|
+
#
|
|
9
|
+
# Provides callbacks to:
|
|
10
|
+
# 1. Inject trace context when job is enqueued (before_enqueue)
|
|
11
|
+
# 2. Set up job-scoped context when job executes (around_perform)
|
|
12
|
+
#
|
|
13
|
+
# @example Setup (automatic via Railtie)
|
|
14
|
+
# class ApplicationJob < ActiveJob::Base
|
|
15
|
+
# include E11y::Instruments::ActiveJob::Callbacks
|
|
16
|
+
# end
|
|
17
|
+
#
|
|
18
|
+
# @see ADR-008 §10 (ActiveJob Integration)
|
|
19
|
+
module ActiveJob
|
|
20
|
+
# Callbacks module to be included into ActiveJob classes.
|
|
21
|
+
# Provides before_enqueue and around_perform callbacks for trace propagation.
|
|
22
|
+
module Callbacks
|
|
23
|
+
extend ActiveSupport::Concern
|
|
24
|
+
|
|
25
|
+
included do
|
|
26
|
+
# Inject trace context before enqueueing (C17 Hybrid Tracing)
|
|
27
|
+
# Store parent trace context for job to link back to originating request
|
|
28
|
+
before_enqueue do |job|
|
|
29
|
+
# Store current trace as parent (job will create NEW trace)
|
|
30
|
+
job.e11y_parent_trace_id = E11y::Current.trace_id if E11y::Current.trace_id
|
|
31
|
+
job.e11y_parent_span_id = E11y::Current.span_id if E11y::Current.span_id
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Set up job-scoped context around job execution (C17 Hybrid Tracing + C18 Non-Failing)
|
|
35
|
+
around_perform do |job, block|
|
|
36
|
+
# C18: Disable fail_on_error for jobs (observability should not block business logic)
|
|
37
|
+
original_fail_on_error = E11y.config.error_handling.fail_on_error
|
|
38
|
+
E11y.config.error_handling.fail_on_error = false
|
|
39
|
+
|
|
40
|
+
setup_job_context_active_job(job)
|
|
41
|
+
setup_job_buffer_active_job
|
|
42
|
+
|
|
43
|
+
# Track job start time for SLO
|
|
44
|
+
start_time = Time.now
|
|
45
|
+
job_status = :success
|
|
46
|
+
|
|
47
|
+
# Execute job (business logic)
|
|
48
|
+
block.call
|
|
49
|
+
rescue StandardError => e
|
|
50
|
+
job_status = :failed
|
|
51
|
+
# Handle error (C18: Non-Failing Event Tracking)
|
|
52
|
+
handle_job_error_active_job(e)
|
|
53
|
+
|
|
54
|
+
raise # Always re-raise original exception
|
|
55
|
+
ensure
|
|
56
|
+
# Track SLO metrics
|
|
57
|
+
track_job_slo_active_job(job, job_status, start_time)
|
|
58
|
+
|
|
59
|
+
cleanup_job_context_active_job
|
|
60
|
+
|
|
61
|
+
# Restore original setting
|
|
62
|
+
E11y.config.error_handling.fail_on_error = original_fail_on_error
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
# Setup job-scoped context (C17 Hybrid Tracing)
|
|
69
|
+
def setup_job_context_active_job(job)
|
|
70
|
+
# Extract parent trace context from job metadata
|
|
71
|
+
parent_trace_id = job.e11y_parent_trace_id
|
|
72
|
+
|
|
73
|
+
# Generate NEW trace_id for this job (not reuse parent!)
|
|
74
|
+
trace_id = generate_trace_id
|
|
75
|
+
span_id = generate_span_id
|
|
76
|
+
|
|
77
|
+
# Set job-scoped context
|
|
78
|
+
E11y::Current.trace_id = trace_id
|
|
79
|
+
E11y::Current.span_id = span_id
|
|
80
|
+
E11y::Current.parent_trace_id = parent_trace_id
|
|
81
|
+
E11y::Current.request_id = job.job_id
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Setup job-scoped buffer
|
|
85
|
+
def setup_job_buffer_active_job
|
|
86
|
+
return unless E11y.config.request_buffer&.enabled
|
|
87
|
+
|
|
88
|
+
E11y::Buffers::RequestScopedBuffer.start!
|
|
89
|
+
rescue StandardError => e
|
|
90
|
+
# C18: Don't fail job if buffer setup fails
|
|
91
|
+
warn "[E11y] Failed to start job buffer: #{e.message}"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Handle job error (C18: Non-Failing Event Tracking)
|
|
95
|
+
def handle_job_error_active_job(_error)
|
|
96
|
+
return unless E11y.config.request_buffer&.enabled
|
|
97
|
+
|
|
98
|
+
E11y::Buffers::RequestScopedBuffer.flush_on_error!
|
|
99
|
+
rescue StandardError => e
|
|
100
|
+
# C18: Don't fail job if buffer flush fails
|
|
101
|
+
warn "[E11y] Failed to flush job buffer on error: #{e.message}"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Cleanup job-scoped context
|
|
105
|
+
def cleanup_job_context_active_job
|
|
106
|
+
# Flush buffer on success (not on error, already flushed in rescue)
|
|
107
|
+
if !$ERROR_INFO && E11y.config.request_buffer&.enabled
|
|
108
|
+
begin
|
|
109
|
+
E11y::Buffers::RequestScopedBuffer.flush!
|
|
110
|
+
rescue StandardError => e
|
|
111
|
+
# C18: Don't fail job if buffer flush fails
|
|
112
|
+
warn "[E11y] Failed to flush job buffer: #{e.message}"
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Reset context (always, even if flush failed)
|
|
117
|
+
E11y::Current.reset
|
|
118
|
+
rescue StandardError => e
|
|
119
|
+
# C18: Absolutely don't fail job on context cleanup
|
|
120
|
+
warn "[E11y] Failed to reset job context: #{e.message}"
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Generate new trace_id (32-character hex)
|
|
124
|
+
# @return [String]
|
|
125
|
+
def generate_trace_id
|
|
126
|
+
SecureRandom.hex(16)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Generate new span_id (16-character hex)
|
|
130
|
+
# @return [String]
|
|
131
|
+
def generate_span_id
|
|
132
|
+
SecureRandom.hex(8)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Track ActiveJob for SLO metrics (if enabled).
|
|
136
|
+
#
|
|
137
|
+
# @param job [ActiveJob::Base] Job instance
|
|
138
|
+
# @param status [Symbol] Job status (:success or :failed)
|
|
139
|
+
# @param start_time [Time] Job start time
|
|
140
|
+
# @return [void]
|
|
141
|
+
# @api private
|
|
142
|
+
def track_job_slo_active_job(job, status, start_time)
|
|
143
|
+
return unless E11y.config.slo_tracking&.enabled
|
|
144
|
+
|
|
145
|
+
duration_ms = ((Time.now - start_time) * 1000).round(2)
|
|
146
|
+
|
|
147
|
+
require "e11y/slo/tracker"
|
|
148
|
+
E11y::SLO::Tracker.track_background_job(
|
|
149
|
+
job_class: job.class.name,
|
|
150
|
+
status: status,
|
|
151
|
+
duration_ms: duration_ms,
|
|
152
|
+
queue: job.queue_name
|
|
153
|
+
)
|
|
154
|
+
rescue StandardError => e
|
|
155
|
+
# C18: Don't fail if SLO tracking fails
|
|
156
|
+
E11y.logger.warn("[E11y] SLO tracking error: #{e.message}", error: e.class.name)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Custom attribute accessors for trace context (C17 Hybrid Tracing)
|
|
161
|
+
module TraceAttributes
|
|
162
|
+
def e11y_parent_trace_id
|
|
163
|
+
@e11y_parent_trace_id
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def e11y_parent_trace_id=(value)
|
|
167
|
+
@e11y_parent_trace_id = value
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def e11y_parent_span_id
|
|
171
|
+
@e11y_parent_span_id
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def e11y_parent_span_id=(value)
|
|
175
|
+
@e11y_parent_span_id = value
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Deprecated: Jobs should create NEW trace_id (C17)
|
|
179
|
+
# These are kept for backward compatibility but should not be used.
|
|
180
|
+
def e11y_trace_id
|
|
181
|
+
@e11y_trace_id
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def e11y_trace_id=(value)
|
|
185
|
+
@e11y_trace_id = value
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def e11y_span_id
|
|
189
|
+
@e11y_span_id
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def e11y_span_id=(value)
|
|
193
|
+
@e11y_span_id = value
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Extend ActiveJob::Base with trace attributes
|
|
201
|
+
ActiveJob::Base.include(E11y::Instruments::ActiveJob::TraceAttributes) if defined?(ActiveJob::Base)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module E11y
|
|
4
|
+
module Instruments
|
|
5
|
+
# Rails Instrumentation (ActiveSupport::Notifications → E11y)
|
|
6
|
+
#
|
|
7
|
+
# Subscribes to Rails internal events (ActiveSupport::Notifications)
|
|
8
|
+
# and converts them to E11y events for unified observability.
|
|
9
|
+
#
|
|
10
|
+
# **Unidirectional Flow:** ASN → E11y
|
|
11
|
+
#
|
|
12
|
+
# @example Basic usage
|
|
13
|
+
# # Automatically enabled by E11y::Railtie if config.rails_instrumentation.enabled = true
|
|
14
|
+
# E11y::Instruments::RailsInstrumentation.setup!
|
|
15
|
+
#
|
|
16
|
+
# @example Custom event mapping
|
|
17
|
+
# E11y.configure do |config|
|
|
18
|
+
# config.rails_instrumentation do
|
|
19
|
+
# event_class_for 'sql.active_record', MyApp::CustomQueryEvent
|
|
20
|
+
# ignore_event 'cache_read.active_support'
|
|
21
|
+
# end
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
# @see ADR-008 §4.1 (Unidirectional Flow ASN → E11y)
|
|
25
|
+
# @see UC-016 (Rails Logger Migration)
|
|
26
|
+
class RailsInstrumentation
|
|
27
|
+
# Built-in event mappings (ASN pattern → E11y Event class)
|
|
28
|
+
#
|
|
29
|
+
# These are defaults that can be overridden via config.event_class_for
|
|
30
|
+
#
|
|
31
|
+
# @return [Hash<String, Class>] Event mappings
|
|
32
|
+
DEFAULT_RAILS_EVENT_MAPPING = {
|
|
33
|
+
"sql.active_record" => "Events::Rails::Database::Query",
|
|
34
|
+
"process_action.action_controller" => "Events::Rails::Http::Request",
|
|
35
|
+
"render_template.action_view" => "Events::Rails::View::Render",
|
|
36
|
+
"send_file.action_controller" => "Events::Rails::Http::SendFile",
|
|
37
|
+
"redirect_to.action_controller" => "Events::Rails::Http::Redirect",
|
|
38
|
+
"cache_read.active_support" => "Events::Rails::Cache::Read",
|
|
39
|
+
"cache_write.active_support" => "Events::Rails::Cache::Write",
|
|
40
|
+
"cache_delete.active_support" => "Events::Rails::Cache::Delete",
|
|
41
|
+
"enqueue.active_job" => "Events::Rails::Job::Enqueued",
|
|
42
|
+
"enqueue_at.active_job" => "Events::Rails::Job::Scheduled",
|
|
43
|
+
"perform_start.active_job" => "Events::Rails::Job::Started",
|
|
44
|
+
"perform.active_job" => "Events::Rails::Job::Completed"
|
|
45
|
+
}.freeze
|
|
46
|
+
|
|
47
|
+
# Setup Rails instrumentation
|
|
48
|
+
#
|
|
49
|
+
# Subscribes to ActiveSupport::Notifications events and converts them to E11y events.
|
|
50
|
+
#
|
|
51
|
+
# @return [void]
|
|
52
|
+
def self.setup!
|
|
53
|
+
return unless E11y.config.rails_instrumentation&.enabled
|
|
54
|
+
|
|
55
|
+
# Subscribe to each configured event pattern
|
|
56
|
+
event_mapping.each do |asn_pattern, e11y_event_class_name|
|
|
57
|
+
next if ignored?(asn_pattern)
|
|
58
|
+
|
|
59
|
+
subscribe_to_event(asn_pattern, e11y_event_class_name)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Subscribe to a specific ASN event
|
|
64
|
+
# @param asn_pattern [String] ActiveSupport::Notifications pattern
|
|
65
|
+
# @param e11y_event_class_name [String] E11y event class name
|
|
66
|
+
# @return [void]
|
|
67
|
+
def self.subscribe_to_event(asn_pattern, e11y_event_class_name)
|
|
68
|
+
ActiveSupport::Notifications.subscribe(asn_pattern) do |name, start, finish, id, payload|
|
|
69
|
+
# Convert ASN event → E11y event
|
|
70
|
+
duration = (finish - start) * 1000 # Convert to milliseconds
|
|
71
|
+
|
|
72
|
+
# Resolve event class (string → constant)
|
|
73
|
+
e11y_event_class = resolve_event_class(e11y_event_class_name)
|
|
74
|
+
next unless e11y_event_class
|
|
75
|
+
|
|
76
|
+
# Track E11y event with extracted payload
|
|
77
|
+
e11y_event_class.track(
|
|
78
|
+
event_name: name,
|
|
79
|
+
duration: duration,
|
|
80
|
+
**extract_relevant_payload(payload)
|
|
81
|
+
)
|
|
82
|
+
rescue StandardError => e
|
|
83
|
+
# Don't crash the app if event tracking fails
|
|
84
|
+
warn "[E11y] Failed to track Rails event #{name}: #{e.message}"
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Get final event mapping (after config overrides)
|
|
89
|
+
# @return [Hash<String, String>] Event mappings
|
|
90
|
+
def self.event_mapping
|
|
91
|
+
@event_mapping ||= begin
|
|
92
|
+
mapping = DEFAULT_RAILS_EVENT_MAPPING.dup
|
|
93
|
+
|
|
94
|
+
# Apply custom mappings from config (Devise-style overrides)
|
|
95
|
+
custom_mappings = E11y.config.rails_instrumentation&.custom_mappings || {}
|
|
96
|
+
custom_mappings.each do |pattern, event_class|
|
|
97
|
+
mapping[pattern] = event_class.name
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
mapping
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Check if event pattern should be ignored
|
|
105
|
+
# @param pattern [String] ASN event pattern
|
|
106
|
+
# @return [Boolean] true if should be ignored
|
|
107
|
+
def self.ignored?(pattern)
|
|
108
|
+
ignore_list = E11y.config.rails_instrumentation&.ignore_events || []
|
|
109
|
+
ignore_list.include?(pattern)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Extract relevant payload fields from ASN event
|
|
113
|
+
#
|
|
114
|
+
# Filters out PII and noisy fields, keeping only relevant data.
|
|
115
|
+
#
|
|
116
|
+
# @param payload [Hash] ASN event payload
|
|
117
|
+
# @return [Hash] Filtered payload
|
|
118
|
+
def self.extract_relevant_payload(payload)
|
|
119
|
+
# Extract only relevant fields (avoid PII, reduce noise)
|
|
120
|
+
# This is a basic implementation - specific event classes can override
|
|
121
|
+
payload.slice(
|
|
122
|
+
:controller, :action, :format, :status,
|
|
123
|
+
:allocations, :db_runtime, :view_runtime,
|
|
124
|
+
:name, :sql, :connection_id,
|
|
125
|
+
:key, :hit,
|
|
126
|
+
:job_class, :job_id, :queue
|
|
127
|
+
)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Resolve event class from string name
|
|
131
|
+
# @param class_name [String] Event class name
|
|
132
|
+
# @return [Class, nil] Event class or nil if not found
|
|
133
|
+
def self.resolve_event_class(class_name)
|
|
134
|
+
class_name.constantize
|
|
135
|
+
rescue NameError => e
|
|
136
|
+
warn "[E11y] Event class not found: #{class_name} (#{e.message})"
|
|
137
|
+
nil
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module E11y
|
|
4
|
+
module Instruments
|
|
5
|
+
# Sidekiq integration for job-scoped context and trace propagation.
|
|
6
|
+
#
|
|
7
|
+
# Provides two middleware:
|
|
8
|
+
# 1. ClientMiddleware - Injects trace context when job is enqueued
|
|
9
|
+
# 2. ServerMiddleware - Sets up job-scoped context when job executes
|
|
10
|
+
#
|
|
11
|
+
# @example Setup (automatic via Railtie)
|
|
12
|
+
# Sidekiq.configure_server do |config|
|
|
13
|
+
# config.server_middleware do |chain|
|
|
14
|
+
# chain.add E11y::Instruments::Sidekiq::ServerMiddleware
|
|
15
|
+
# end
|
|
16
|
+
# end
|
|
17
|
+
#
|
|
18
|
+
# Sidekiq.configure_client do |config|
|
|
19
|
+
# config.client_middleware do |chain|
|
|
20
|
+
# chain.add E11y::Instruments::Sidekiq::ClientMiddleware
|
|
21
|
+
# end
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
# @see ADR-008 §9 (Sidekiq Integration)
|
|
25
|
+
module Sidekiq
|
|
26
|
+
# Client-side middleware: Inject trace context when enqueueing job
|
|
27
|
+
#
|
|
28
|
+
# **C17 Hybrid Tracing**: Propagates parent_trace_id to job metadata.
|
|
29
|
+
# Job will create NEW trace_id but keep link to parent.
|
|
30
|
+
class ClientMiddleware
|
|
31
|
+
def call(_worker_class, job, _queue, _redis_pool)
|
|
32
|
+
# Inject current trace context into job metadata as parent trace
|
|
33
|
+
# Job will generate NEW trace_id but keep parent link (C17)
|
|
34
|
+
job["e11y_parent_trace_id"] = E11y::Current.trace_id if E11y::Current.trace_id
|
|
35
|
+
job["e11y_parent_span_id"] = E11y::Current.span_id if E11y::Current.span_id
|
|
36
|
+
|
|
37
|
+
yield
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Server-side middleware: Set up job-scoped context when executing job
|
|
42
|
+
#
|
|
43
|
+
# **C17 Hybrid Tracing**: Creates NEW trace_id for job, but preserves parent link.
|
|
44
|
+
# **C18 Non-Failing**: E11y errors don't fail jobs (observability is secondary to business logic).
|
|
45
|
+
class ServerMiddleware
|
|
46
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
47
|
+
def call(_worker, job, queue)
|
|
48
|
+
# C18: Disable fail_on_error for jobs (observability should not block business logic)
|
|
49
|
+
original_fail_on_error = E11y.config.error_handling.fail_on_error
|
|
50
|
+
E11y.config.error_handling.fail_on_error = false
|
|
51
|
+
|
|
52
|
+
setup_job_context(job)
|
|
53
|
+
setup_job_buffer
|
|
54
|
+
|
|
55
|
+
# Track job start time for SLO
|
|
56
|
+
start_time = Time.now
|
|
57
|
+
job_status = :success
|
|
58
|
+
|
|
59
|
+
# Execute job (business logic)
|
|
60
|
+
yield
|
|
61
|
+
rescue StandardError => e
|
|
62
|
+
job_status = :failed
|
|
63
|
+
# Check if this is E11y error (circuit breaker, retry exhausted, etc.)
|
|
64
|
+
handle_job_error(e)
|
|
65
|
+
|
|
66
|
+
raise # Always re-raise original exception
|
|
67
|
+
ensure
|
|
68
|
+
# Track SLO metrics
|
|
69
|
+
track_job_slo(job, queue, job_status, start_time)
|
|
70
|
+
|
|
71
|
+
cleanup_job_context
|
|
72
|
+
|
|
73
|
+
# Restore original setting
|
|
74
|
+
E11y.config.error_handling.fail_on_error = original_fail_on_error
|
|
75
|
+
end
|
|
76
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
# Setup job-scoped context (C17 Hybrid Tracing)
|
|
81
|
+
def setup_job_context(job)
|
|
82
|
+
# Extract parent trace context from job metadata
|
|
83
|
+
parent_trace_id = job["e11y_parent_trace_id"]
|
|
84
|
+
|
|
85
|
+
# Generate NEW trace_id for this job (not reuse parent!)
|
|
86
|
+
trace_id = generate_trace_id
|
|
87
|
+
span_id = generate_span_id
|
|
88
|
+
|
|
89
|
+
# Set job-scoped context
|
|
90
|
+
E11y::Current.trace_id = trace_id
|
|
91
|
+
E11y::Current.span_id = span_id
|
|
92
|
+
E11y::Current.parent_trace_id = parent_trace_id
|
|
93
|
+
E11y::Current.request_id = job["jid"]
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Setup job-scoped buffer
|
|
97
|
+
def setup_job_buffer
|
|
98
|
+
return unless E11y.config.request_buffer&.enabled
|
|
99
|
+
|
|
100
|
+
E11y::Buffers::RequestScopedBuffer.start!
|
|
101
|
+
rescue StandardError => e
|
|
102
|
+
# C18: Don't fail job if buffer setup fails
|
|
103
|
+
warn "[E11y] Failed to start job buffer: #{e.message}"
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Handle job error (C18: Non-Failing Event Tracking)
|
|
107
|
+
def handle_job_error(error)
|
|
108
|
+
# Flush buffer on error (includes debug events)
|
|
109
|
+
return unless E11y.config.request_buffer&.enabled
|
|
110
|
+
|
|
111
|
+
E11y::Buffers::RequestScopedBuffer.flush_on_error!
|
|
112
|
+
rescue StandardError => e
|
|
113
|
+
# C18: Don't fail job if buffer flush fails
|
|
114
|
+
warn "[E11y] Failed to flush job buffer on error: #{e.message}"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Cleanup job-scoped context
|
|
118
|
+
def cleanup_job_context
|
|
119
|
+
# Flush buffer on success (not on error, already flushed in rescue)
|
|
120
|
+
if !$ERROR_INFO && E11y.config.request_buffer&.enabled
|
|
121
|
+
begin
|
|
122
|
+
E11y::Buffers::RequestScopedBuffer.flush!
|
|
123
|
+
rescue StandardError => e
|
|
124
|
+
# C18: Don't fail job if buffer flush fails
|
|
125
|
+
warn "[E11y] Failed to flush job buffer: #{e.message}"
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Reset context (always, even if flush failed)
|
|
130
|
+
E11y::Current.reset
|
|
131
|
+
rescue StandardError => e
|
|
132
|
+
# C18: Absolutely don't fail job on context cleanup
|
|
133
|
+
warn "[E11y] Failed to reset job context: #{e.message}"
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Generate new trace_id (32-character hex)
|
|
137
|
+
# @return [String]
|
|
138
|
+
def generate_trace_id
|
|
139
|
+
SecureRandom.hex(16)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Generate new span_id (16-character hex)
|
|
143
|
+
# @return [String]
|
|
144
|
+
def generate_span_id
|
|
145
|
+
SecureRandom.hex(8)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Track Sidekiq job for SLO metrics (if enabled).
|
|
149
|
+
#
|
|
150
|
+
# @param job [Hash] Sidekiq job hash
|
|
151
|
+
# @param queue [String] Queue name
|
|
152
|
+
# @param status [Symbol] Job status (:success or :failed)
|
|
153
|
+
# @param start_time [Time] Job start time
|
|
154
|
+
# @return [void]
|
|
155
|
+
# @api private
|
|
156
|
+
def track_job_slo(job, queue, status, start_time)
|
|
157
|
+
return unless E11y.config.slo_tracking&.enabled
|
|
158
|
+
|
|
159
|
+
duration_ms = ((Time.now - start_time) * 1000).round(2)
|
|
160
|
+
|
|
161
|
+
require "e11y/slo/tracker"
|
|
162
|
+
E11y::SLO::Tracker.track_background_job(
|
|
163
|
+
job_class: job["class"],
|
|
164
|
+
status: status,
|
|
165
|
+
duration_ms: duration_ms,
|
|
166
|
+
queue: queue
|
|
167
|
+
)
|
|
168
|
+
rescue StandardError => e
|
|
169
|
+
# C18: Don't fail if SLO tracking fails
|
|
170
|
+
warn "[E11y] SLO tracking error: #{e.message}"
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|