e11y 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +4 -0
  3. data/.rubocop.yml +69 -0
  4. data/CHANGELOG.md +26 -0
  5. data/CODE_OF_CONDUCT.md +64 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +179 -0
  8. data/Rakefile +37 -0
  9. data/benchmarks/run_all.rb +33 -0
  10. data/config/README.md +83 -0
  11. data/config/loki-local-config.yaml +35 -0
  12. data/config/prometheus.yml +15 -0
  13. data/docker-compose.yml +78 -0
  14. data/docs/00-ICP-AND-TIMELINE.md +483 -0
  15. data/docs/01-SCALE-REQUIREMENTS.md +858 -0
  16. data/docs/ADR-001-architecture.md +2617 -0
  17. data/docs/ADR-002-metrics-yabeda.md +1395 -0
  18. data/docs/ADR-003-slo-observability.md +3337 -0
  19. data/docs/ADR-004-adapter-architecture.md +2385 -0
  20. data/docs/ADR-005-tracing-context.md +1372 -0
  21. data/docs/ADR-006-security-compliance.md +4143 -0
  22. data/docs/ADR-007-opentelemetry-integration.md +1385 -0
  23. data/docs/ADR-008-rails-integration.md +1911 -0
  24. data/docs/ADR-009-cost-optimization.md +2993 -0
  25. data/docs/ADR-010-developer-experience.md +2166 -0
  26. data/docs/ADR-011-testing-strategy.md +1836 -0
  27. data/docs/ADR-012-event-evolution.md +958 -0
  28. data/docs/ADR-013-reliability-error-handling.md +2750 -0
  29. data/docs/ADR-014-event-driven-slo.md +1533 -0
  30. data/docs/ADR-015-middleware-order.md +1061 -0
  31. data/docs/ADR-016-self-monitoring-slo.md +1234 -0
  32. data/docs/API-REFERENCE-L28.md +914 -0
  33. data/docs/COMPREHENSIVE-CONFIGURATION.md +2366 -0
  34. data/docs/IMPLEMENTATION_NOTES.md +2804 -0
  35. data/docs/IMPLEMENTATION_PLAN.md +1971 -0
  36. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +586 -0
  37. data/docs/PLAN.md +148 -0
  38. data/docs/QUICK-START.md +934 -0
  39. data/docs/README.md +296 -0
  40. data/docs/design/00-memory-optimization.md +593 -0
  41. data/docs/guides/MIGRATION-L27-L28.md +692 -0
  42. data/docs/guides/PERFORMANCE-BENCHMARKS.md +434 -0
  43. data/docs/guides/README.md +44 -0
  44. data/docs/prd/01-overview-vision.md +440 -0
  45. data/docs/use_cases/README.md +119 -0
  46. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +813 -0
  47. data/docs/use_cases/UC-002-business-event-tracking.md +1953 -0
  48. data/docs/use_cases/UC-003-pattern-based-metrics.md +1627 -0
  49. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +728 -0
  50. data/docs/use_cases/UC-005-sentry-integration.md +759 -0
  51. data/docs/use_cases/UC-006-trace-context-management.md +905 -0
  52. data/docs/use_cases/UC-007-pii-filtering.md +2648 -0
  53. data/docs/use_cases/UC-008-opentelemetry-integration.md +1153 -0
  54. data/docs/use_cases/UC-009-multi-service-tracing.md +1043 -0
  55. data/docs/use_cases/UC-010-background-job-tracking.md +1018 -0
  56. data/docs/use_cases/UC-011-rate-limiting.md +1906 -0
  57. data/docs/use_cases/UC-012-audit-trail.md +2301 -0
  58. data/docs/use_cases/UC-013-high-cardinality-protection.md +2127 -0
  59. data/docs/use_cases/UC-014-adaptive-sampling.md +1940 -0
  60. data/docs/use_cases/UC-015-cost-optimization.md +735 -0
  61. data/docs/use_cases/UC-016-rails-logger-migration.md +785 -0
  62. data/docs/use_cases/UC-017-local-development.md +867 -0
  63. data/docs/use_cases/UC-018-testing-events.md +1081 -0
  64. data/docs/use_cases/UC-019-tiered-storage-migration.md +562 -0
  65. data/docs/use_cases/UC-020-event-versioning.md +708 -0
  66. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +956 -0
  67. data/docs/use_cases/UC-022-event-registry.md +648 -0
  68. data/docs/use_cases/backlog.md +226 -0
  69. data/e11y.gemspec +76 -0
  70. data/lib/e11y/adapters/adaptive_batcher.rb +207 -0
  71. data/lib/e11y/adapters/audit_encrypted.rb +239 -0
  72. data/lib/e11y/adapters/base.rb +580 -0
  73. data/lib/e11y/adapters/file.rb +224 -0
  74. data/lib/e11y/adapters/in_memory.rb +216 -0
  75. data/lib/e11y/adapters/loki.rb +333 -0
  76. data/lib/e11y/adapters/otel_logs.rb +203 -0
  77. data/lib/e11y/adapters/registry.rb +141 -0
  78. data/lib/e11y/adapters/sentry.rb +230 -0
  79. data/lib/e11y/adapters/stdout.rb +108 -0
  80. data/lib/e11y/adapters/yabeda.rb +370 -0
  81. data/lib/e11y/buffers/adaptive_buffer.rb +339 -0
  82. data/lib/e11y/buffers/base_buffer.rb +40 -0
  83. data/lib/e11y/buffers/request_scoped_buffer.rb +246 -0
  84. data/lib/e11y/buffers/ring_buffer.rb +267 -0
  85. data/lib/e11y/buffers.rb +14 -0
  86. data/lib/e11y/console.rb +122 -0
  87. data/lib/e11y/current.rb +48 -0
  88. data/lib/e11y/event/base.rb +894 -0
  89. data/lib/e11y/event/value_sampling_config.rb +84 -0
  90. data/lib/e11y/events/base_audit_event.rb +43 -0
  91. data/lib/e11y/events/base_payment_event.rb +33 -0
  92. data/lib/e11y/events/rails/cache/delete.rb +21 -0
  93. data/lib/e11y/events/rails/cache/read.rb +23 -0
  94. data/lib/e11y/events/rails/cache/write.rb +22 -0
  95. data/lib/e11y/events/rails/database/query.rb +45 -0
  96. data/lib/e11y/events/rails/http/redirect.rb +21 -0
  97. data/lib/e11y/events/rails/http/request.rb +26 -0
  98. data/lib/e11y/events/rails/http/send_file.rb +21 -0
  99. data/lib/e11y/events/rails/http/start_processing.rb +26 -0
  100. data/lib/e11y/events/rails/job/completed.rb +22 -0
  101. data/lib/e11y/events/rails/job/enqueued.rb +22 -0
  102. data/lib/e11y/events/rails/job/failed.rb +22 -0
  103. data/lib/e11y/events/rails/job/scheduled.rb +23 -0
  104. data/lib/e11y/events/rails/job/started.rb +22 -0
  105. data/lib/e11y/events/rails/log.rb +56 -0
  106. data/lib/e11y/events/rails/view/render.rb +23 -0
  107. data/lib/e11y/events.rb +18 -0
  108. data/lib/e11y/instruments/active_job.rb +201 -0
  109. data/lib/e11y/instruments/rails_instrumentation.rb +141 -0
  110. data/lib/e11y/instruments/sidekiq.rb +175 -0
  111. data/lib/e11y/logger/bridge.rb +205 -0
  112. data/lib/e11y/metrics/cardinality_protection.rb +172 -0
  113. data/lib/e11y/metrics/cardinality_tracker.rb +134 -0
  114. data/lib/e11y/metrics/registry.rb +234 -0
  115. data/lib/e11y/metrics/relabeling.rb +226 -0
  116. data/lib/e11y/metrics.rb +102 -0
  117. data/lib/e11y/middleware/audit_signing.rb +174 -0
  118. data/lib/e11y/middleware/base.rb +140 -0
  119. data/lib/e11y/middleware/event_slo.rb +167 -0
  120. data/lib/e11y/middleware/pii_filter.rb +266 -0
  121. data/lib/e11y/middleware/pii_filtering.rb +280 -0
  122. data/lib/e11y/middleware/rate_limiting.rb +214 -0
  123. data/lib/e11y/middleware/request.rb +163 -0
  124. data/lib/e11y/middleware/routing.rb +157 -0
  125. data/lib/e11y/middleware/sampling.rb +254 -0
  126. data/lib/e11y/middleware/slo.rb +168 -0
  127. data/lib/e11y/middleware/trace_context.rb +131 -0
  128. data/lib/e11y/middleware/validation.rb +118 -0
  129. data/lib/e11y/middleware/versioning.rb +132 -0
  130. data/lib/e11y/middleware.rb +12 -0
  131. data/lib/e11y/pii/patterns.rb +90 -0
  132. data/lib/e11y/pii.rb +13 -0
  133. data/lib/e11y/pipeline/builder.rb +155 -0
  134. data/lib/e11y/pipeline/zone_validator.rb +110 -0
  135. data/lib/e11y/pipeline.rb +12 -0
  136. data/lib/e11y/presets/audit_event.rb +65 -0
  137. data/lib/e11y/presets/debug_event.rb +34 -0
  138. data/lib/e11y/presets/high_value_event.rb +51 -0
  139. data/lib/e11y/presets.rb +19 -0
  140. data/lib/e11y/railtie.rb +138 -0
  141. data/lib/e11y/reliability/circuit_breaker.rb +216 -0
  142. data/lib/e11y/reliability/dlq/file_storage.rb +277 -0
  143. data/lib/e11y/reliability/dlq/filter.rb +117 -0
  144. data/lib/e11y/reliability/retry_handler.rb +207 -0
  145. data/lib/e11y/reliability/retry_rate_limiter.rb +117 -0
  146. data/lib/e11y/sampling/error_spike_detector.rb +225 -0
  147. data/lib/e11y/sampling/load_monitor.rb +161 -0
  148. data/lib/e11y/sampling/stratified_tracker.rb +92 -0
  149. data/lib/e11y/sampling/value_extractor.rb +82 -0
  150. data/lib/e11y/self_monitoring/buffer_monitor.rb +79 -0
  151. data/lib/e11y/self_monitoring/performance_monitor.rb +97 -0
  152. data/lib/e11y/self_monitoring/reliability_monitor.rb +146 -0
  153. data/lib/e11y/slo/event_driven.rb +150 -0
  154. data/lib/e11y/slo/tracker.rb +119 -0
  155. data/lib/e11y/version.rb +9 -0
  156. data/lib/e11y.rb +283 -0
  157. metadata +452 -0
@@ -0,0 +1,580 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../reliability/retry_handler"
4
+ require_relative "../reliability/circuit_breaker"
5
+
6
+ module E11y
7
+ module Adapters
8
+ # Base class for all E11y adapters
9
+ #
10
+ # Provides standard interface for event destinations following ADR-004.
11
+ # All adapters must implement {#write} method, optionally override {#write_batch}
12
+ # for performance optimization.
13
+ #
14
+ # @abstract Subclass and implement {#write}, optionally {#write_batch}
15
+ #
16
+ # @example Define custom adapter
17
+ # class CustomAdapter < E11y::Adapters::Base
18
+ # def initialize(config = {})
19
+ # super
20
+ # @url = config.fetch(:url)
21
+ # validate_config!
22
+ # end
23
+ #
24
+ # def write(event_data)
25
+ # # Send single event to external system
26
+ # send_to_api(event_data)
27
+ # true
28
+ # rescue => e
29
+ # warn "Adapter error: #{e.message}"
30
+ # false
31
+ # end
32
+ #
33
+ # def capabilities
34
+ # {
35
+ # batching: false,
36
+ # compression: false,
37
+ # async: false,
38
+ # streaming: false
39
+ # }
40
+ # end
41
+ #
42
+ # private
43
+ #
44
+ # def validate_config!
45
+ # raise ArgumentError, "url is required" unless @url
46
+ # end
47
+ # end
48
+ #
49
+ # @see ADR-004 Section 3.1 (Base Adapter Contract)
50
+ class Base
51
+ attr_reader :config
52
+
53
+ # Initialize adapter with config
54
+ #
55
+ # @param config [Hash] Adapter-specific configuration
56
+ # @option config [Hash] :reliability Reliability settings (retry, circuit_breaker, dlq)
57
+ def initialize(config = {})
58
+ @config = config
59
+ @reliability_enabled = config.fetch(:reliability, {}).fetch(:enabled, true)
60
+
61
+ setup_reliability_layer if @reliability_enabled
62
+
63
+ validate_config!
64
+ end
65
+
66
+ # Write a single event (synchronous)
67
+ #
68
+ # Subclasses must implement this method to send events to external systems.
69
+ # This method is called for each event when batching is not used.
70
+ #
71
+ # @param event_data [Hash] Event payload with keys:
72
+ # - :event_name [String] Event name (e.g., "order.paid")
73
+ # - :severity [Symbol] Severity level (:debug, :info, :success, :warn, :error, :fatal)
74
+ # - :timestamp [Time] Event timestamp
75
+ # - :payload [Hash] Event-specific data
76
+ # - :trace_id [String, nil] Trace ID (if tracing enabled)
77
+ # - :span_id [String, nil] Span ID (if tracing enabled)
78
+ #
79
+ # @return [Boolean] true on success, false on failure (failures should be logged)
80
+ # @raise [NotImplementedError] if not overridden in subclass
81
+ #
82
+ # @example
83
+ # def write(event_data)
84
+ # send_to_api(event_data)
85
+ # true
86
+ # rescue => e
87
+ # warn "Adapter error: #{e.message}"
88
+ # false
89
+ # end
90
+ def write(_event_data)
91
+ raise NotImplementedError, "#{self.class}#write must be implemented"
92
+ end
93
+
94
+ # Write event with reliability layer (retry, circuit breaker, DLQ).
95
+ #
96
+ # This is the recommended public API for sending events.
97
+ # Automatically handles failures, retries, and DLQ.
98
+ #
99
+ # Respects `E11y.config.error_handling.fail_on_error` setting (C18 Resolution):
100
+ # - `true`: Raises exceptions (fast feedback for web requests)
101
+ # - `false`: Swallows exceptions, saves to DLQ (don't fail background jobs)
102
+ #
103
+ # @param event_data [Hash] Event payload
104
+ # @return [Boolean] true on success
105
+ # @raise [RetryExhaustedError, CircuitOpenError] if fail_on_error=true
106
+ def write_with_reliability(event_data)
107
+ return write(event_data) unless @reliability_enabled
108
+
109
+ start_time = Time.now
110
+ begin
111
+ @retry_handler.with_retry(adapter: self, event: event_data) do
112
+ @circuit_breaker.call do
113
+ write(event_data)
114
+ end
115
+ end
116
+
117
+ # Track successful write
118
+ track_adapter_success(event_data, start_time)
119
+ true
120
+ rescue E11y::Reliability::RetryHandler::RetryExhaustedError => e
121
+ track_adapter_failure(event_data, e, start_time)
122
+ handle_reliability_error(event_data, e, :retry_exhausted)
123
+ rescue E11y::Reliability::CircuitBreaker::CircuitOpenError => e
124
+ track_adapter_failure(event_data, e, start_time)
125
+ handle_reliability_error(event_data, e, :circuit_open)
126
+ end
127
+ end
128
+
129
+ # Write a batch of events (preferred for performance)
130
+ #
131
+ # Default implementation calls {#write} for each event.
132
+ # Subclasses should override for better batch performance.
133
+ #
134
+ # @param events [Array<Hash>] Array of event payloads (same format as {#write})
135
+ # @return [Boolean] true if all events written successfully, false otherwise
136
+ #
137
+ # @example Override for batch API
138
+ # def write_batch(events)
139
+ # send_batch_to_api(events)
140
+ # true
141
+ # rescue => e
142
+ # warn "Batch error: #{e.message}"
143
+ # false
144
+ # end
145
+ def write_batch(events)
146
+ # Default: call write for each event
147
+ events.all? { |event| write(event) }
148
+ end
149
+
150
+ # Check if adapter is healthy
151
+ #
152
+ # Subclasses can override to implement health checks (e.g., ping destination).
153
+ # Called periodically to determine if adapter can accept events.
154
+ #
155
+ # @return [Boolean] Health status (true = healthy, false = unhealthy)
156
+ #
157
+ # @example
158
+ # def healthy?
159
+ # ping_api
160
+ # true
161
+ # rescue
162
+ # false
163
+ # end
164
+ def healthy?
165
+ true
166
+ end
167
+
168
+ # Close connections, flush buffers
169
+ #
170
+ # Called during graceful shutdown. Subclasses should override to:
171
+ # - Close HTTP connections
172
+ # - Flush internal buffers
173
+ # - Release resources
174
+ #
175
+ # @return [void]
176
+ #
177
+ # @example
178
+ # def close
179
+ # @buffer.flush! if @buffer.any?
180
+ # @connection.close
181
+ # end
182
+ def close
183
+ # Default: no-op
184
+ end
185
+
186
+ # Adapter capabilities
187
+ #
188
+ # Returns hash of capability flags. Subclasses should override to declare
189
+ # supported features.
190
+ #
191
+ # @return [Hash] Capability flags with keys:
192
+ # - :batching [Boolean] Supports efficient batch writes
193
+ # - :compression [Boolean] Supports compression
194
+ # - :async [Boolean] Non-blocking writes
195
+ # - :streaming [Boolean] Supports streaming
196
+ #
197
+ # @example
198
+ # def capabilities
199
+ # {
200
+ # batching: true,
201
+ # compression: true,
202
+ # async: false,
203
+ # streaming: false
204
+ # }
205
+ # end
206
+ def capabilities
207
+ {
208
+ batching: false,
209
+ compression: false,
210
+ async: false,
211
+ streaming: false
212
+ }
213
+ end
214
+
215
+ private
216
+
217
+ # Validate adapter config
218
+ #
219
+ # Subclasses should override to validate configuration during initialization.
220
+ # Raise ArgumentError for invalid config.
221
+ #
222
+ # @raise [ArgumentError] if configuration is invalid
223
+ #
224
+ # @example
225
+ # def validate_config!
226
+ # raise ArgumentError, "url is required" unless @config[:url]
227
+ # end
228
+ def validate_config!
229
+ # Default: no validation
230
+ end
231
+
232
+ # Format event for this adapter
233
+ #
234
+ # Subclasses can override to transform event_data to adapter-specific format.
235
+ #
236
+ # @param event_data [Hash] Event payload
237
+ # @return [Hash, String] Formatted event
238
+ #
239
+ # @example
240
+ # def format_event(event_data)
241
+ # {
242
+ # timestamp: event_data[:timestamp].iso8601,
243
+ # message: event_data[:event_name],
244
+ # level: event_data[:severity]
245
+ # }
246
+ # end
247
+ def format_event(event_data)
248
+ event_data
249
+ end
250
+
251
+ # Execute block with retry logic for transient errors
252
+ #
253
+ # Implements exponential backoff with jitter for network/transient errors.
254
+ # Use this helper in adapter write methods to handle temporary failures.
255
+ #
256
+ # @param max_attempts [Integer] Maximum retry attempts (default: 3)
257
+ # @param base_delay [Float] Initial retry delay in seconds (default: 1.0)
258
+ # @param max_delay [Float] Maximum retry delay in seconds (default: 16.0)
259
+ # @param jitter [Float] Jitter factor (0.0-1.0, default: 0.2 for ±20%)
260
+ # @yield Block to execute with retry
261
+ # @return [Object] Block result
262
+ # @raise Last exception if all retries exhausted
263
+ #
264
+ # @example Retry HTTP request
265
+ # def write(event_data)
266
+ # with_retry(max_attempts: 5) do
267
+ # http_client.post(event_data)
268
+ # end
269
+ # true
270
+ # rescue => e
271
+ # warn "Failed after retries: #{e.message}"
272
+ # false
273
+ # end
274
+ #
275
+ # @see ADR-004 Section 7.1 (Retry Policy)
276
+ def with_retry(max_attempts: 3, base_delay: 1.0, max_delay: 16.0, jitter: 0.2)
277
+ attempt = 0
278
+
279
+ begin
280
+ attempt += 1
281
+ yield
282
+ rescue StandardError => e
283
+ raise unless retriable_error?(e) && attempt < max_attempts
284
+
285
+ delay = calculate_backoff_delay(attempt, base_delay, max_delay, jitter)
286
+ warn "[E11y] #{self.class.name} retry #{attempt}/#{max_attempts} after #{delay.round(2)}s: #{e.message}"
287
+ sleep(delay)
288
+ retry
289
+ end
290
+ end
291
+
292
+ # Check if error is retriable (network/transient errors)
293
+ #
294
+ # Override in subclasses to customize retriable error detection.
295
+ # Default implementation handles common network errors.
296
+ #
297
+ # @param error [Exception] Error to check
298
+ # @return [Boolean] true if error is retriable
299
+ #
300
+ # @example Add custom retriable errors
301
+ # def retriable_error?(error)
302
+ # super || error.is_a?(CustomTransientError)
303
+ # end
304
+ def retriable_error?(error)
305
+ # Network timeout errors
306
+ return true if error.is_a?(Timeout::Error)
307
+ return true if defined?(Net::ReadTimeout) && error.is_a?(Net::ReadTimeout)
308
+ return true if defined?(Net::OpenTimeout) && error.is_a?(Net::OpenTimeout)
309
+
310
+ # Connection errors
311
+ return true if defined?(Errno::ECONNREFUSED) && error.is_a?(Errno::ECONNREFUSED)
312
+ return true if defined?(Errno::ECONNRESET) && error.is_a?(Errno::ECONNRESET)
313
+ return true if defined?(Errno::ETIMEDOUT) && error.is_a?(Errno::ETIMEDOUT)
314
+ return true if defined?(Errno::EHOSTUNREACH) && error.is_a?(Errno::EHOSTUNREACH)
315
+
316
+ # HTTP client errors (Faraday)
317
+ if defined?(Faraday::TimeoutError)
318
+ return true if error.is_a?(Faraday::TimeoutError)
319
+ return true if error.is_a?(Faraday::ConnectionFailed)
320
+ end
321
+
322
+ # HTTP 5xx errors (server errors are retriable)
323
+ if error.respond_to?(:response) && error.response.is_a?(Hash)
324
+ status = error.response[:status]
325
+ return true if status && status >= 500 && status < 600
326
+ end
327
+
328
+ false
329
+ end
330
+
331
+ # Calculate exponential backoff delay with jitter
332
+ #
333
+ # @param attempt [Integer] Current attempt number (1-based)
334
+ # @param base_delay [Float] Base delay in seconds
335
+ # @param max_delay [Float] Maximum delay in seconds
336
+ # @param jitter [Float] Jitter factor (0.0-1.0)
337
+ # @return [Float] Delay in seconds
338
+ #
339
+ # @api private
340
+ def calculate_backoff_delay(attempt, base_delay, max_delay, jitter)
341
+ # Exponential: 1s, 2s, 4s, 8s, 16s...
342
+ exponential_delay = base_delay * (2**(attempt - 1))
343
+ delay = [exponential_delay, max_delay].min
344
+
345
+ # Add jitter: ±20% by default
346
+ jitter_amount = delay * jitter * ((rand * 2) - 1) # Random between -jitter and +jitter
347
+ delay + jitter_amount
348
+ end
349
+
350
+ # Execute block with circuit breaker pattern
351
+ #
352
+ # Prevents cascading failures by opening circuit after threshold failures.
353
+ # Use this helper to wrap write operations that may fail.
354
+ #
355
+ # Note: This is a simplified circuit breaker for single adapter instance.
356
+ # For distributed systems, use external circuit breaker (e.g., semian gem).
357
+ #
358
+ # @param failure_threshold [Integer] Failures before opening circuit (default: 5)
359
+ # @param timeout [Integer] Seconds before testing half-open (default: 60)
360
+ # @yield Block to execute
361
+ # @return [Object] Block result
362
+ # @raise [CircuitOpenError] if circuit is open
363
+ #
364
+ # @example Wrap HTTP calls
365
+ # def write(event_data)
366
+ # with_circuit_breaker do
367
+ # http_client.post(event_data)
368
+ # end
369
+ # true
370
+ # rescue CircuitOpenError => e
371
+ # warn "Circuit open: #{e.message}"
372
+ # false
373
+ # end
374
+ #
375
+ # @see ADR-004 Section 7.2 (Circuit Breaker)
376
+ def with_circuit_breaker(failure_threshold: 5, timeout: 60)
377
+ init_circuit_breaker! unless @circuit_state
378
+
379
+ @circuit_mutex.synchronize do
380
+ if @circuit_state == :open
381
+ unless circuit_timeout_expired?(timeout)
382
+ raise CircuitOpenError, "Circuit breaker open for #{self.class.name}"
383
+ end
384
+
385
+ @circuit_state = :half_open
386
+ @circuit_success_count = 0
387
+
388
+ end
389
+ end
390
+
391
+ begin
392
+ result = yield
393
+ on_circuit_success
394
+ result
395
+ rescue StandardError
396
+ on_circuit_failure(failure_threshold)
397
+ raise
398
+ end
399
+ end
400
+
401
+ # Initialize circuit breaker state
402
+ #
403
+ # @api private
404
+ def init_circuit_breaker!
405
+ @circuit_mutex = Mutex.new
406
+ @circuit_state = :closed
407
+ @circuit_failure_count = 0
408
+ @circuit_success_count = 0
409
+ @circuit_last_failure_time = nil
410
+ end
411
+
412
+ # Handle successful circuit execution
413
+ #
414
+ # @api private
415
+ def on_circuit_success
416
+ @circuit_mutex.synchronize do
417
+ @circuit_failure_count = 0
418
+
419
+ if @circuit_state == :half_open
420
+ @circuit_success_count += 1
421
+ if @circuit_success_count >= 2 # 2 successes → close
422
+ @circuit_state = :closed
423
+ warn "[E11y] #{self.class.name} circuit breaker closed (recovered)"
424
+ end
425
+ end
426
+ end
427
+ end
428
+
429
+ # Handle failed circuit execution
430
+ #
431
+ # @param threshold [Integer] Failure threshold
432
+ # @api private
433
+ def on_circuit_failure(threshold)
434
+ @circuit_mutex.synchronize do
435
+ @circuit_failure_count += 1
436
+ @circuit_success_count = 0
437
+ @circuit_last_failure_time = Time.now
438
+
439
+ if @circuit_failure_count >= threshold && @circuit_state == :closed
440
+ @circuit_state = :open
441
+ warn "[E11y] #{self.class.name} circuit breaker opened (#{@circuit_failure_count} failures)"
442
+ end
443
+ end
444
+ end
445
+
446
+ # Check if circuit timeout has expired
447
+ #
448
+ # @param timeout [Integer] Timeout in seconds
449
+ # @return [Boolean]
450
+ # @api private
451
+ def circuit_timeout_expired?(timeout)
452
+ @circuit_last_failure_time && (Time.now - @circuit_last_failure_time) >= timeout
453
+ end
454
+
455
+ # Setup reliability layer (Retry + CircuitBreaker + DLQ).
456
+ #
457
+ # @api private
458
+ def setup_reliability_layer
459
+ reliability_config = @config.fetch(:reliability, {})
460
+
461
+ # Setup RetryHandler
462
+ retry_config = reliability_config.fetch(:retry, {})
463
+ @retry_handler = E11y::Reliability::RetryHandler.new(config: retry_config)
464
+
465
+ # Setup CircuitBreaker
466
+ circuit_breaker_config = reliability_config.fetch(:circuit_breaker, {})
467
+ @circuit_breaker = E11y::Reliability::CircuitBreaker.new(
468
+ adapter_name: self.class.name,
469
+ config: circuit_breaker_config
470
+ )
471
+
472
+ # Setup DLQ components (will be initialized from E11y.config later)
473
+ @dlq_filter = nil
474
+ @dlq_storage = nil
475
+ end
476
+
477
+ # Handle reliability error (retry exhausted / circuit breaker open).
478
+ #
479
+ # Behavior depends on `E11y.config.error_handling.fail_on_error` (C18 Resolution):
480
+ # - `true`: Re-raises exception (fast feedback for web requests)
481
+ # - `false`: Swallows exception, saves to DLQ (don't fail background jobs)
482
+ #
483
+ # @param event_data [Hash] Event payload
484
+ # @param error [StandardError] Error that occurred
485
+ # @param reason [Symbol] Error reason (:retry_exhausted, :circuit_open)
486
+ # @return [Boolean] false (event failed)
487
+ # @raise [StandardError] Re-raises if fail_on_error=true
488
+ #
489
+ # @api private
490
+ def handle_reliability_error(event_data, error, reason)
491
+ # Save to DLQ if filter allows
492
+ save_to_dlq_if_needed(event_data, error, reason)
493
+
494
+ # Log warning
495
+ warn "[E11y] #{self.class.name} #{reason} for event #{event_data[:event_name]}: #{error.message}"
496
+
497
+ # Check fail_on_error setting (C18 Resolution)
498
+ raise error if E11y.config.error_handling.fail_on_error
499
+
500
+ # Web request context: RAISE (fast feedback)
501
+
502
+ # Background job context: SWALLOW (don't fail business logic)
503
+ # TODO: Track metric e11y.event.tracking_failed_silent
504
+ false
505
+ end
506
+
507
+ # Save event to DLQ if filter allows.
508
+ #
509
+ # @api private
510
+ def save_to_dlq_if_needed(event_data, error, reason)
511
+ return unless @dlq_filter&.should_save?(event_data, error)
512
+
513
+ @dlq_storage&.save(event_data, metadata: {
514
+ error: error.message,
515
+ error_class: error.class.name,
516
+ reason: reason,
517
+ adapter: self.class.name,
518
+ timestamp: Time.now.utc.iso8601
519
+ })
520
+ rescue StandardError => e
521
+ # C18: Don't fail if DLQ save fails
522
+ warn "[E11y] Failed to save event to DLQ: #{e.message}"
523
+ end
524
+
525
+ # Track successful adapter write (self-monitoring).
526
+ #
527
+ # @api private
528
+ def track_adapter_success(_event_data, start_time)
529
+ duration_ms = ((Time.now - start_time) * 1000).round(2)
530
+
531
+ require "e11y/self_monitoring/performance_monitor"
532
+ require "e11y/self_monitoring/reliability_monitor"
533
+
534
+ # Use class name or "AnonymousAdapter" for anonymous classes
535
+ adapter_name = self.class.name || "AnonymousAdapter"
536
+
537
+ E11y::SelfMonitoring::PerformanceMonitor.track_adapter_latency(
538
+ adapter_name,
539
+ duration_ms
540
+ )
541
+
542
+ E11y::SelfMonitoring::ReliabilityMonitor.track_adapter_success(
543
+ adapter_name: adapter_name
544
+ )
545
+ rescue StandardError => e
546
+ # Don't fail if monitoring fails
547
+ warn "[E11y] Self-monitoring error: #{e.message}"
548
+ end
549
+
550
+ # Track failed adapter write (self-monitoring).
551
+ #
552
+ # @api private
553
+ def track_adapter_failure(_event_data, error, start_time)
554
+ duration_ms = ((Time.now - start_time) * 1000).round(2)
555
+
556
+ require "e11y/self_monitoring/performance_monitor"
557
+ require "e11y/self_monitoring/reliability_monitor"
558
+
559
+ # Use class name or "AnonymousAdapter" for anonymous classes
560
+ adapter_name = self.class.name || "AnonymousAdapter"
561
+
562
+ E11y::SelfMonitoring::PerformanceMonitor.track_adapter_latency(
563
+ adapter_name,
564
+ duration_ms
565
+ )
566
+
567
+ E11y::SelfMonitoring::ReliabilityMonitor.track_adapter_failure(
568
+ adapter_name: adapter_name,
569
+ error_class: error.class.name
570
+ )
571
+ rescue StandardError => e
572
+ # Don't fail if monitoring fails
573
+ warn "[E11y] Self-monitoring error: #{e.message}"
574
+ end
575
+ end
576
+
577
+ # Circuit breaker open error
578
+ class CircuitOpenError < Error; end
579
+ end
580
+ end