e11y 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +4 -0
  3. data/.rubocop.yml +69 -0
  4. data/CHANGELOG.md +26 -0
  5. data/CODE_OF_CONDUCT.md +64 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +179 -0
  8. data/Rakefile +37 -0
  9. data/benchmarks/run_all.rb +33 -0
  10. data/config/README.md +83 -0
  11. data/config/loki-local-config.yaml +35 -0
  12. data/config/prometheus.yml +15 -0
  13. data/docker-compose.yml +78 -0
  14. data/docs/00-ICP-AND-TIMELINE.md +483 -0
  15. data/docs/01-SCALE-REQUIREMENTS.md +858 -0
  16. data/docs/ADR-001-architecture.md +2617 -0
  17. data/docs/ADR-002-metrics-yabeda.md +1395 -0
  18. data/docs/ADR-003-slo-observability.md +3337 -0
  19. data/docs/ADR-004-adapter-architecture.md +2385 -0
  20. data/docs/ADR-005-tracing-context.md +1372 -0
  21. data/docs/ADR-006-security-compliance.md +4143 -0
  22. data/docs/ADR-007-opentelemetry-integration.md +1385 -0
  23. data/docs/ADR-008-rails-integration.md +1911 -0
  24. data/docs/ADR-009-cost-optimization.md +2993 -0
  25. data/docs/ADR-010-developer-experience.md +2166 -0
  26. data/docs/ADR-011-testing-strategy.md +1836 -0
  27. data/docs/ADR-012-event-evolution.md +958 -0
  28. data/docs/ADR-013-reliability-error-handling.md +2750 -0
  29. data/docs/ADR-014-event-driven-slo.md +1533 -0
  30. data/docs/ADR-015-middleware-order.md +1061 -0
  31. data/docs/ADR-016-self-monitoring-slo.md +1234 -0
  32. data/docs/API-REFERENCE-L28.md +914 -0
  33. data/docs/COMPREHENSIVE-CONFIGURATION.md +2366 -0
  34. data/docs/IMPLEMENTATION_NOTES.md +2804 -0
  35. data/docs/IMPLEMENTATION_PLAN.md +1971 -0
  36. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +586 -0
  37. data/docs/PLAN.md +148 -0
  38. data/docs/QUICK-START.md +934 -0
  39. data/docs/README.md +296 -0
  40. data/docs/design/00-memory-optimization.md +593 -0
  41. data/docs/guides/MIGRATION-L27-L28.md +692 -0
  42. data/docs/guides/PERFORMANCE-BENCHMARKS.md +434 -0
  43. data/docs/guides/README.md +44 -0
  44. data/docs/prd/01-overview-vision.md +440 -0
  45. data/docs/use_cases/README.md +119 -0
  46. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +813 -0
  47. data/docs/use_cases/UC-002-business-event-tracking.md +1953 -0
  48. data/docs/use_cases/UC-003-pattern-based-metrics.md +1627 -0
  49. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +728 -0
  50. data/docs/use_cases/UC-005-sentry-integration.md +759 -0
  51. data/docs/use_cases/UC-006-trace-context-management.md +905 -0
  52. data/docs/use_cases/UC-007-pii-filtering.md +2648 -0
  53. data/docs/use_cases/UC-008-opentelemetry-integration.md +1153 -0
  54. data/docs/use_cases/UC-009-multi-service-tracing.md +1043 -0
  55. data/docs/use_cases/UC-010-background-job-tracking.md +1018 -0
  56. data/docs/use_cases/UC-011-rate-limiting.md +1906 -0
  57. data/docs/use_cases/UC-012-audit-trail.md +2301 -0
  58. data/docs/use_cases/UC-013-high-cardinality-protection.md +2127 -0
  59. data/docs/use_cases/UC-014-adaptive-sampling.md +1940 -0
  60. data/docs/use_cases/UC-015-cost-optimization.md +735 -0
  61. data/docs/use_cases/UC-016-rails-logger-migration.md +785 -0
  62. data/docs/use_cases/UC-017-local-development.md +867 -0
  63. data/docs/use_cases/UC-018-testing-events.md +1081 -0
  64. data/docs/use_cases/UC-019-tiered-storage-migration.md +562 -0
  65. data/docs/use_cases/UC-020-event-versioning.md +708 -0
  66. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +956 -0
  67. data/docs/use_cases/UC-022-event-registry.md +648 -0
  68. data/docs/use_cases/backlog.md +226 -0
  69. data/e11y.gemspec +76 -0
  70. data/lib/e11y/adapters/adaptive_batcher.rb +207 -0
  71. data/lib/e11y/adapters/audit_encrypted.rb +239 -0
  72. data/lib/e11y/adapters/base.rb +580 -0
  73. data/lib/e11y/adapters/file.rb +224 -0
  74. data/lib/e11y/adapters/in_memory.rb +216 -0
  75. data/lib/e11y/adapters/loki.rb +333 -0
  76. data/lib/e11y/adapters/otel_logs.rb +203 -0
  77. data/lib/e11y/adapters/registry.rb +141 -0
  78. data/lib/e11y/adapters/sentry.rb +230 -0
  79. data/lib/e11y/adapters/stdout.rb +108 -0
  80. data/lib/e11y/adapters/yabeda.rb +370 -0
  81. data/lib/e11y/buffers/adaptive_buffer.rb +339 -0
  82. data/lib/e11y/buffers/base_buffer.rb +40 -0
  83. data/lib/e11y/buffers/request_scoped_buffer.rb +246 -0
  84. data/lib/e11y/buffers/ring_buffer.rb +267 -0
  85. data/lib/e11y/buffers.rb +14 -0
  86. data/lib/e11y/console.rb +122 -0
  87. data/lib/e11y/current.rb +48 -0
  88. data/lib/e11y/event/base.rb +894 -0
  89. data/lib/e11y/event/value_sampling_config.rb +84 -0
  90. data/lib/e11y/events/base_audit_event.rb +43 -0
  91. data/lib/e11y/events/base_payment_event.rb +33 -0
  92. data/lib/e11y/events/rails/cache/delete.rb +21 -0
  93. data/lib/e11y/events/rails/cache/read.rb +23 -0
  94. data/lib/e11y/events/rails/cache/write.rb +22 -0
  95. data/lib/e11y/events/rails/database/query.rb +45 -0
  96. data/lib/e11y/events/rails/http/redirect.rb +21 -0
  97. data/lib/e11y/events/rails/http/request.rb +26 -0
  98. data/lib/e11y/events/rails/http/send_file.rb +21 -0
  99. data/lib/e11y/events/rails/http/start_processing.rb +26 -0
  100. data/lib/e11y/events/rails/job/completed.rb +22 -0
  101. data/lib/e11y/events/rails/job/enqueued.rb +22 -0
  102. data/lib/e11y/events/rails/job/failed.rb +22 -0
  103. data/lib/e11y/events/rails/job/scheduled.rb +23 -0
  104. data/lib/e11y/events/rails/job/started.rb +22 -0
  105. data/lib/e11y/events/rails/log.rb +56 -0
  106. data/lib/e11y/events/rails/view/render.rb +23 -0
  107. data/lib/e11y/events.rb +18 -0
  108. data/lib/e11y/instruments/active_job.rb +201 -0
  109. data/lib/e11y/instruments/rails_instrumentation.rb +141 -0
  110. data/lib/e11y/instruments/sidekiq.rb +175 -0
  111. data/lib/e11y/logger/bridge.rb +205 -0
  112. data/lib/e11y/metrics/cardinality_protection.rb +172 -0
  113. data/lib/e11y/metrics/cardinality_tracker.rb +134 -0
  114. data/lib/e11y/metrics/registry.rb +234 -0
  115. data/lib/e11y/metrics/relabeling.rb +226 -0
  116. data/lib/e11y/metrics.rb +102 -0
  117. data/lib/e11y/middleware/audit_signing.rb +174 -0
  118. data/lib/e11y/middleware/base.rb +140 -0
  119. data/lib/e11y/middleware/event_slo.rb +167 -0
  120. data/lib/e11y/middleware/pii_filter.rb +266 -0
  121. data/lib/e11y/middleware/pii_filtering.rb +280 -0
  122. data/lib/e11y/middleware/rate_limiting.rb +214 -0
  123. data/lib/e11y/middleware/request.rb +163 -0
  124. data/lib/e11y/middleware/routing.rb +157 -0
  125. data/lib/e11y/middleware/sampling.rb +254 -0
  126. data/lib/e11y/middleware/slo.rb +168 -0
  127. data/lib/e11y/middleware/trace_context.rb +131 -0
  128. data/lib/e11y/middleware/validation.rb +118 -0
  129. data/lib/e11y/middleware/versioning.rb +132 -0
  130. data/lib/e11y/middleware.rb +12 -0
  131. data/lib/e11y/pii/patterns.rb +90 -0
  132. data/lib/e11y/pii.rb +13 -0
  133. data/lib/e11y/pipeline/builder.rb +155 -0
  134. data/lib/e11y/pipeline/zone_validator.rb +110 -0
  135. data/lib/e11y/pipeline.rb +12 -0
  136. data/lib/e11y/presets/audit_event.rb +65 -0
  137. data/lib/e11y/presets/debug_event.rb +34 -0
  138. data/lib/e11y/presets/high_value_event.rb +51 -0
  139. data/lib/e11y/presets.rb +19 -0
  140. data/lib/e11y/railtie.rb +138 -0
  141. data/lib/e11y/reliability/circuit_breaker.rb +216 -0
  142. data/lib/e11y/reliability/dlq/file_storage.rb +277 -0
  143. data/lib/e11y/reliability/dlq/filter.rb +117 -0
  144. data/lib/e11y/reliability/retry_handler.rb +207 -0
  145. data/lib/e11y/reliability/retry_rate_limiter.rb +117 -0
  146. data/lib/e11y/sampling/error_spike_detector.rb +225 -0
  147. data/lib/e11y/sampling/load_monitor.rb +161 -0
  148. data/lib/e11y/sampling/stratified_tracker.rb +92 -0
  149. data/lib/e11y/sampling/value_extractor.rb +82 -0
  150. data/lib/e11y/self_monitoring/buffer_monitor.rb +79 -0
  151. data/lib/e11y/self_monitoring/performance_monitor.rb +97 -0
  152. data/lib/e11y/self_monitoring/reliability_monitor.rb +146 -0
  153. data/lib/e11y/slo/event_driven.rb +150 -0
  154. data/lib/e11y/slo/tracker.rb +119 -0
  155. data/lib/e11y/version.rb +9 -0
  156. data/lib/e11y.rb +283 -0
  157. metadata +452 -0
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ module E11y
4
+ module Sampling
5
+ # Load Monitor for Adaptive Sampling (FEAT-4842.1)
6
+ #
7
+ # Monitors system load and event volume to enable load-based adaptive sampling.
8
+ # Implements load-based sampling strategy from ADR-009 §3.3.
9
+ #
10
+ # Features:
11
+ # - Event volume tracking (events/second)
12
+ # - Tiered load levels (normal, high, overload)
13
+ # - Sliding window for rate calculation
14
+ # - Thread-safe concurrent access
15
+ #
16
+ # @example Configuration
17
+ # monitor = E11y::Sampling::LoadMonitor.new(
18
+ # window: 60, # 60 seconds sliding window
19
+ # thresholds: {
20
+ # normal: 1_000, # 0-1k events/sec → 100% sampling
21
+ # high: 10_000, # 1k-10k events/sec → 50% sampling
22
+ # very_high: 50_000, # 10k-50k events/sec → 10% sampling
23
+ # overload: 100_000 # >50k events/sec → 1% sampling
24
+ # }
25
+ # )
26
+ #
27
+ # @example Usage
28
+ # monitor.record_event
29
+ #
30
+ # sample_rate = case monitor.load_level
31
+ # when :normal then 1.0
32
+ # when :high then 0.5
33
+ # when :very_high then 0.1
34
+ # when :overload then 0.01
35
+ # end
36
+ class LoadMonitor
37
+ # Default configuration
38
+ DEFAULT_WINDOW = 60 # 60 seconds sliding window
39
+ DEFAULT_THRESHOLDS = {
40
+ normal: 1_000, # 0-1k events/sec → 100% sampling
41
+ high: 10_000, # 1k-10k events/sec → 50% sampling
42
+ very_high: 50_000, # 10k-50k events/sec → 10% sampling
43
+ overload: 100_000 # >100k events/sec → 1% sampling
44
+ }.freeze
45
+
46
+ attr_reader :window, :thresholds
47
+
48
+ # Initialize load monitor
49
+ #
50
+ # @param config [Hash] Configuration options
51
+ # @option config [Integer] :window (60) Sliding window in seconds
52
+ # @option config [Hash] :thresholds ({}) Load thresholds (events/sec)
53
+ def initialize(config = {})
54
+ @window = config.fetch(:window, DEFAULT_WINDOW)
55
+ @thresholds = DEFAULT_THRESHOLDS.merge(config.fetch(:thresholds, {}))
56
+
57
+ # Event tracking
58
+ @events = [] # Timestamps of tracked events
59
+ @mutex = Mutex.new
60
+ end
61
+
62
+ # Record an event for load tracking
63
+ def record_event
64
+ @mutex.synchronize do
65
+ now = Time.now
66
+ @events << now
67
+
68
+ # Cleanup old events (outside window)
69
+ cleanup_old_events(now)
70
+ end
71
+ end
72
+
73
+ # Get current event rate (events per second)
74
+ #
75
+ # @return [Float] Events per second
76
+ def current_rate
77
+ @mutex.synchronize do
78
+ now = Time.now
79
+ cleanup_old_events(now)
80
+
81
+ count = @events.count { |ts| (now - ts) <= @window }
82
+ count.to_f / @window
83
+ end
84
+ end
85
+
86
+ # Get current load level
87
+ #
88
+ # @return [Symbol] Load level (:normal, :high, :very_high, :overload)
89
+ def load_level
90
+ rate = current_rate
91
+
92
+ # Check thresholds in descending order
93
+ if rate >= @thresholds[:overload]
94
+ :overload
95
+ elsif rate >= @thresholds[:very_high]
96
+ :very_high
97
+ elsif rate >= @thresholds[:high]
98
+ :high
99
+ elsif rate >= @thresholds[:normal]
100
+ :high # Between normal and high threshold
101
+ else
102
+ :normal
103
+ end
104
+ end
105
+
106
+ # Get recommended sample rate for current load
107
+ #
108
+ # @return [Float] Sample rate (0.0-1.0)
109
+ def recommended_sample_rate
110
+ case load_level
111
+ when :normal
112
+ 1.0 # 100% sampling
113
+ when :high
114
+ 0.5 # 50% sampling
115
+ when :very_high
116
+ 0.1 # 10% sampling
117
+ when :overload
118
+ 0.01 # 1% sampling
119
+ end
120
+ end
121
+
122
+ # Check if system is overloaded
123
+ #
124
+ # @return [Boolean] true if overload level reached
125
+ def overloaded?
126
+ load_level == :overload
127
+ end
128
+
129
+ # Reset monitor state (useful for testing)
130
+ def reset!
131
+ @mutex.synchronize do
132
+ @events.clear
133
+ end
134
+ end
135
+
136
+ # Get load statistics
137
+ #
138
+ # @return [Hash] Statistics (rate, level, sample_rate, event_count)
139
+ def stats
140
+ # Don't wrap in mutex - methods already handle locking
141
+ {
142
+ rate: current_rate,
143
+ level: load_level,
144
+ sample_rate: recommended_sample_rate,
145
+ event_count: @mutex.synchronize { @events.size },
146
+ window: @window
147
+ }
148
+ end
149
+
150
+ private
151
+
152
+ # Cleanup events outside the sliding window
153
+ #
154
+ # @param now [Time] Current timestamp
155
+ def cleanup_old_events(now)
156
+ cutoff = now - @window
157
+ @events.reject! { |ts| ts < cutoff }
158
+ end
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module E11y
4
+ module Sampling
5
+ # Stratified Sampling Tracker for SLO accuracy (FEAT-4851, C11 Resolution)
6
+ #
7
+ # Tracks sampling statistics per severity stratum to enable sampling correction
8
+ # in SLO calculations. Ensures accurate SLO metrics even with aggressive sampling.
9
+ #
10
+ # @example Usage in sampling middleware
11
+ # tracker = StratifiedTracker.new
12
+ # tracker.record_sample(severity: :success, sample_rate: 0.1, sampled: true)
13
+ # tracker.record_sample(severity: :error, sample_rate: 1.0, sampled: true)
14
+ #
15
+ # correction = tracker.sampling_correction(:success) # => 10.0 (1/0.1)
16
+ #
17
+ # @see ADR-009 §3.7 Stratified Sampling for SLO Accuracy
18
+ # @see UC-014 Adaptive Sampling (C11 Resolution)
19
+ class StratifiedTracker
20
+ # @return [Hash{Symbol => Hash}] Stratum statistics
21
+ attr_reader :strata
22
+
23
+ def initialize
24
+ @strata = Hash.new { |h, k| h[k] = { sampled_count: 0, total_count: 0, sample_rate_sum: 0.0 } }
25
+ @mutex = Mutex.new
26
+ end
27
+
28
+ # Record a sampling decision for a severity stratum
29
+ #
30
+ # @param severity [Symbol] Event severity (:debug, :info, :success, :warn, :error, :fatal)
31
+ # @param sample_rate [Float] Sample rate used (0.0-1.0)
32
+ # @param sampled [Boolean] Whether event was sampled
33
+ # @return [void]
34
+ def record_sample(severity:, sample_rate:, sampled:)
35
+ @mutex.synchronize do
36
+ stratum = @strata[severity]
37
+ stratum[:total_count] += 1
38
+ stratum[:sampled_count] += 1 if sampled
39
+ stratum[:sample_rate_sum] += sample_rate
40
+ end
41
+ end
42
+
43
+ # Get sampling correction factor for a severity
44
+ #
45
+ # Correction factor = 1 / sample_rate
46
+ # Multiply observed counts by this to estimate true counts.
47
+ #
48
+ # @param severity [Symbol] Event severity
49
+ # @return [Float] Correction factor (1.0 if no samples)
50
+ def sampling_correction(severity)
51
+ @mutex.synchronize do
52
+ stratum = @strata[severity]
53
+ return 1.0 if stratum[:sampled_count].zero?
54
+
55
+ # Average sample rate for this stratum
56
+ avg_sample_rate = stratum[:sample_rate_sum] / stratum[:total_count]
57
+ return 1.0 if avg_sample_rate.zero?
58
+
59
+ 1.0 / avg_sample_rate
60
+ end
61
+ end
62
+
63
+ # Get statistics for a severity stratum
64
+ #
65
+ # @param severity [Symbol] Event severity
66
+ # @return [Hash] Stratum statistics
67
+ def stratum_stats(severity)
68
+ @mutex.synchronize do
69
+ @strata[severity].dup
70
+ end
71
+ end
72
+
73
+ # Get statistics for all strata
74
+ #
75
+ # @return [Hash{Symbol => Hash}] All stratum statistics
76
+ def all_strata_stats
77
+ @mutex.synchronize do
78
+ @strata.transform_values(&:dup)
79
+ end
80
+ end
81
+
82
+ # Reset all statistics
83
+ #
84
+ # @return [void]
85
+ def reset!
86
+ @mutex.synchronize do
87
+ @strata.clear
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module E11y
4
+ module Sampling
5
+ # ValueExtractor for extracting numeric values from event payloads (FEAT-4847)
6
+ #
7
+ # Supports:
8
+ # - Nested field access (dot notation: "user.balance")
9
+ # - Type coercion (strings to numbers)
10
+ # - Nil handling (returns 0.0 for missing fields)
11
+ #
12
+ # Used by value-based sampling to prioritize high-value events.
13
+ #
14
+ # @example Basic usage
15
+ # extractor = ValueExtractor.new
16
+ # event_data = { amount: 1500, currency: "USD" }
17
+ # value = extractor.extract(event_data, :amount) # => 1500.0
18
+ #
19
+ # @example Nested fields
20
+ # event_data = { user: { balance: 5000 } }
21
+ # value = extractor.extract(event_data, "user.balance") # => 5000.0
22
+ #
23
+ # @example Type coercion
24
+ # event_data = { amount: "1234.56" }
25
+ # value = extractor.extract(event_data, :amount) # => 1234.56
26
+ #
27
+ # @example Nil handling
28
+ # event_data = {}
29
+ # value = extractor.extract(event_data, :missing) # => 0.0
30
+ class ValueExtractor
31
+ # Extract numeric value from event data
32
+ #
33
+ # @param event_data [Hash] Event payload
34
+ # @param field [String, Symbol] Field path (supports dot notation for nested fields)
35
+ # @return [Float] Extracted value (0.0 if field is missing or non-numeric)
36
+ def extract(event_data, field)
37
+ value = navigate_to_field(event_data, field)
38
+ coerce_to_number(value)
39
+ end
40
+
41
+ private
42
+
43
+ # Navigate to nested field using dot notation
44
+ #
45
+ # @param data [Hash] Current data hash
46
+ # @param field [String, Symbol] Field path
47
+ # @return [Object, nil] Field value or nil if not found
48
+ def navigate_to_field(data, field)
49
+ return nil unless data.is_a?(Hash)
50
+
51
+ field_path = field.to_s.split(".")
52
+ field_path.reduce(data) do |current, key|
53
+ break nil unless current.is_a?(Hash)
54
+
55
+ current[key.to_sym] || current[key.to_s]
56
+ end
57
+ end
58
+
59
+ # Coerce value to Float
60
+ #
61
+ # @param value [Object] Value to coerce
62
+ # @return [Float] Numeric value (0.0 for nil or non-coercible)
63
+ def coerce_to_number(value)
64
+ return 0.0 if value.nil?
65
+
66
+ case value
67
+ when Numeric
68
+ value.to_f
69
+ when String
70
+ # Try to convert string to float
71
+ Float(value)
72
+ else
73
+ # Non-numeric types default to 0.0
74
+ 0.0
75
+ end
76
+ rescue ArgumentError, TypeError
77
+ # Invalid numeric string or type - return 0.0
78
+ 0.0
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "e11y/metrics"
4
+
5
+ module E11y
6
+ module SelfMonitoring
7
+ # Buffer monitoring for E11y internal operations.
8
+ #
9
+ # Tracks buffer metrics:
10
+ # - Buffer size (current utilization)
11
+ # - Buffer overflows
12
+ # - Buffer flushes
13
+ #
14
+ # @see ADR-016 §3.3 (Buffer Metrics)
15
+ # @example
16
+ # E11y::SelfMonitoring::BufferMonitor.track_buffer_size(42, buffer_type: 'ring')
17
+ module BufferMonitor
18
+ # Track current buffer size.
19
+ #
20
+ # @param size [Integer] Current number of events in buffer
21
+ # @param buffer_type [String] Buffer type (e.g., 'ring', 'request_scoped')
22
+ # @return [void]
23
+ def self.track_buffer_size(size, buffer_type:)
24
+ E11y::Metrics.gauge(
25
+ :e11y_buffer_size,
26
+ size,
27
+ { buffer_type: buffer_type }
28
+ )
29
+ end
30
+
31
+ # Track buffer overflow (event dropped due to full buffer).
32
+ #
33
+ # @param buffer_type [String] Buffer type
34
+ # @return [void]
35
+ def self.track_buffer_overflow(buffer_type:)
36
+ E11y::Metrics.increment(
37
+ :e11y_buffer_overflows_total,
38
+ { buffer_type: buffer_type }
39
+ )
40
+ end
41
+
42
+ # Track buffer flush operation.
43
+ #
44
+ # @param buffer_type [String] Buffer type
45
+ # @param event_count [Integer] Number of events flushed
46
+ # @param trigger [String] Flush trigger (e.g., 'size', 'timeout', 'explicit')
47
+ # @return [void]
48
+ def self.track_buffer_flush(buffer_type:, event_count:, trigger:)
49
+ E11y::Metrics.increment(
50
+ :e11y_buffer_flushes_total,
51
+ {
52
+ buffer_type: buffer_type,
53
+ trigger: trigger
54
+ }
55
+ )
56
+
57
+ E11y::Metrics.histogram(
58
+ :e11y_buffer_flush_events_count,
59
+ event_count,
60
+ { buffer_type: buffer_type },
61
+ buckets: [1, 10, 50, 100, 500, 1000, 5000]
62
+ )
63
+ end
64
+
65
+ # Track buffer utilization (percentage).
66
+ #
67
+ # @param utilization_percent [Numeric] Buffer utilization percentage (0-100)
68
+ # @param buffer_type [String] Buffer type
69
+ # @return [void]
70
+ def self.track_buffer_utilization(utilization_percent, buffer_type:)
71
+ E11y::Metrics.gauge(
72
+ :e11y_buffer_utilization_percent,
73
+ utilization_percent,
74
+ { buffer_type: buffer_type }
75
+ )
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "e11y/metrics"
4
+
5
+ module E11y
6
+ module SelfMonitoring
7
+ # Performance monitoring for E11y internal operations.
8
+ #
9
+ # Tracks latency metrics for:
10
+ # - Event tracking (E11y.track)
11
+ # - Middleware execution
12
+ # - Adapter writes
13
+ # - Buffer flushes
14
+ #
15
+ # @see ADR-016 §3.1 (Performance Metrics)
16
+ # @example
17
+ # E11y::SelfMonitoring::PerformanceMonitor.track_latency(0.5, event_class: 'OrderCreated', severity: :info)
18
+ module PerformanceMonitor
19
+ # Track E11y.track() latency.
20
+ #
21
+ # @param duration_ms [Numeric] Duration in milliseconds
22
+ # @param event_class [String] Event class name
23
+ # @param severity [Symbol] Event severity
24
+ # @return [void]
25
+ def self.track_latency(duration_ms, event_class:, severity:)
26
+ E11y::Metrics.histogram(
27
+ :e11y_track_duration_seconds,
28
+ duration_ms / 1000.0,
29
+ {
30
+ event_class: event_class,
31
+ severity: severity
32
+ },
33
+ buckets: [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1] # 0.1ms to 100ms
34
+ )
35
+ end
36
+
37
+ # Track middleware execution time.
38
+ #
39
+ # @param middleware_name [String] Middleware class name
40
+ # @param duration_ms [Numeric] Duration in milliseconds
41
+ # @return [void]
42
+ def self.track_middleware_latency(middleware_name, duration_ms)
43
+ E11y::Metrics.histogram(
44
+ :e11y_middleware_duration_seconds,
45
+ duration_ms / 1000.0,
46
+ { middleware: middleware_name },
47
+ buckets: [0.00001, 0.0001, 0.0005, 0.001, 0.005] # 0.01ms to 5ms
48
+ )
49
+ end
50
+
51
+ # Track adapter send latency.
52
+ #
53
+ # @param adapter_name [String] Adapter class name
54
+ # @param duration_ms [Numeric] Duration in milliseconds
55
+ # @return [void]
56
+ def self.track_adapter_latency(adapter_name, duration_ms)
57
+ E11y::Metrics.histogram(
58
+ :e11y_adapter_send_duration_seconds,
59
+ duration_ms / 1000.0,
60
+ { adapter: adapter_name },
61
+ buckets: [0.001, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0] # 1ms to 5s
62
+ )
63
+ end
64
+
65
+ # Track buffer flush latency.
66
+ #
67
+ # @param duration_ms [Numeric] Duration in milliseconds
68
+ # @param event_count [Integer] Number of events flushed
69
+ # @return [void]
70
+ def self.track_flush_latency(duration_ms, event_count)
71
+ E11y::Metrics.histogram(
72
+ :e11y_buffer_flush_duration_seconds,
73
+ duration_ms / 1000.0,
74
+ { event_count_bucket: bucket_event_count(event_count) },
75
+ buckets: [0.001, 0.01, 0.05, 0.1, 0.5, 1.0]
76
+ )
77
+ end
78
+
79
+ # Convert event count to a low-cardinality bucket label.
80
+ #
81
+ # @param count [Integer] Event count
82
+ # @return [String] Bucket label
83
+ # @api private
84
+ def self.bucket_event_count(count)
85
+ case count
86
+ when 0..10 then "1-10"
87
+ when 11..50 then "11-50"
88
+ when 51..100 then "51-100"
89
+ when 101..500 then "101-500"
90
+ else "500+"
91
+ end
92
+ end
93
+
94
+ private_class_method :bucket_event_count
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "e11y/metrics"
4
+
5
+ module E11y
6
+ module SelfMonitoring
7
+ # Reliability monitoring for E11y internal operations.
8
+ #
9
+ # Tracks success/failure rates for:
10
+ # - Event tracking
11
+ # - Adapter writes
12
+ # - Buffer operations
13
+ # - DLQ saves
14
+ #
15
+ # @see ADR-016 §3.2 (Reliability Metrics)
16
+ # @example
17
+ # E11y::SelfMonitoring::ReliabilityMonitor.track_event_success(event_type: 'order.created')
18
+ module ReliabilityMonitor
19
+ # Track successful event tracking.
20
+ #
21
+ # @param event_type [String] Event type/name
22
+ # @return [void]
23
+ def self.track_event_success(event_type:)
24
+ E11y::Metrics.increment(
25
+ :e11y_events_tracked_total,
26
+ {
27
+ event_type: event_type,
28
+ status: "success"
29
+ }
30
+ )
31
+ end
32
+
33
+ # Track failed event tracking.
34
+ #
35
+ # @param event_type [String] Event type/name
36
+ # @param reason [String] Failure reason (e.g., 'validation_error', 'adapter_error')
37
+ # @return [void]
38
+ def self.track_event_failure(event_type:, reason:)
39
+ E11y::Metrics.increment(
40
+ :e11y_events_tracked_total,
41
+ {
42
+ event_type: event_type,
43
+ status: "failure",
44
+ reason: reason
45
+ }
46
+ )
47
+ end
48
+
49
+ # Track dropped event (rate limited, sampled out, etc).
50
+ #
51
+ # @param event_type [String] Event type/name
52
+ # @param reason [String] Drop reason (e.g., 'rate_limited', 'sampled_out')
53
+ # @return [void]
54
+ def self.track_event_dropped(event_type:, reason:)
55
+ E11y::Metrics.increment(
56
+ :e11y_events_dropped_total,
57
+ {
58
+ event_type: event_type,
59
+ reason: reason
60
+ }
61
+ )
62
+ end
63
+
64
+ # Track adapter write success.
65
+ #
66
+ # @param adapter_name [String] Adapter class name
67
+ # @return [void]
68
+ def self.track_adapter_success(adapter_name:)
69
+ E11y::Metrics.increment(
70
+ :e11y_adapter_writes_total,
71
+ {
72
+ adapter: adapter_name,
73
+ status: "success"
74
+ }
75
+ )
76
+ end
77
+
78
+ # Track adapter write failure.
79
+ #
80
+ # @param adapter_name [String] Adapter class name
81
+ # @param error_class [String] Error class name
82
+ # @return [void]
83
+ def self.track_adapter_failure(adapter_name:, error_class:)
84
+ E11y::Metrics.increment(
85
+ :e11y_adapter_writes_total,
86
+ {
87
+ adapter: adapter_name,
88
+ status: "failure",
89
+ error_class: error_class
90
+ }
91
+ )
92
+ end
93
+
94
+ # Track DLQ save operation.
95
+ #
96
+ # @param reason [String] Reason for DLQ save (e.g., 'adapter_error', 'rate_limited')
97
+ # @return [void]
98
+ def self.track_dlq_save(reason:)
99
+ E11y::Metrics.increment(
100
+ :e11y_dlq_saves_total,
101
+ { reason: reason }
102
+ )
103
+ end
104
+
105
+ # Track DLQ replay operation.
106
+ #
107
+ # @param status [String] Replay status ('success' or 'failure')
108
+ # @return [void]
109
+ def self.track_dlq_replay(status:)
110
+ E11y::Metrics.increment(
111
+ :e11y_dlq_replays_total,
112
+ { status: status }
113
+ )
114
+ end
115
+
116
+ # Track circuit breaker state change.
117
+ #
118
+ # @param adapter_name [String] Adapter class name
119
+ # @param state [String] New circuit state ('open', 'half_open', 'closed')
120
+ # @return [void]
121
+ def self.track_circuit_state(adapter_name:, state:)
122
+ E11y::Metrics.gauge(
123
+ :e11y_circuit_breaker_state,
124
+ state_to_value(state),
125
+ { adapter: adapter_name }
126
+ )
127
+ end
128
+
129
+ # Convert circuit state to numeric value for gauge.
130
+ #
131
+ # @param state [String] Circuit state
132
+ # @return [Integer] Numeric representation (0=closed, 1=half_open, 2=open)
133
+ # @api private
134
+ def self.state_to_value(state)
135
+ case state
136
+ when "closed" then 0
137
+ when "half_open" then 1
138
+ when "open" then 2
139
+ else 0
140
+ end
141
+ end
142
+
143
+ private_class_method :state_to_value
144
+ end
145
+ end
146
+ end