e11y 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +4 -0
  3. data/.rubocop.yml +69 -0
  4. data/CHANGELOG.md +26 -0
  5. data/CODE_OF_CONDUCT.md +64 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +179 -0
  8. data/Rakefile +37 -0
  9. data/benchmarks/run_all.rb +33 -0
  10. data/config/README.md +83 -0
  11. data/config/loki-local-config.yaml +35 -0
  12. data/config/prometheus.yml +15 -0
  13. data/docker-compose.yml +78 -0
  14. data/docs/00-ICP-AND-TIMELINE.md +483 -0
  15. data/docs/01-SCALE-REQUIREMENTS.md +858 -0
  16. data/docs/ADR-001-architecture.md +2617 -0
  17. data/docs/ADR-002-metrics-yabeda.md +1395 -0
  18. data/docs/ADR-003-slo-observability.md +3337 -0
  19. data/docs/ADR-004-adapter-architecture.md +2385 -0
  20. data/docs/ADR-005-tracing-context.md +1372 -0
  21. data/docs/ADR-006-security-compliance.md +4143 -0
  22. data/docs/ADR-007-opentelemetry-integration.md +1385 -0
  23. data/docs/ADR-008-rails-integration.md +1911 -0
  24. data/docs/ADR-009-cost-optimization.md +2993 -0
  25. data/docs/ADR-010-developer-experience.md +2166 -0
  26. data/docs/ADR-011-testing-strategy.md +1836 -0
  27. data/docs/ADR-012-event-evolution.md +958 -0
  28. data/docs/ADR-013-reliability-error-handling.md +2750 -0
  29. data/docs/ADR-014-event-driven-slo.md +1533 -0
  30. data/docs/ADR-015-middleware-order.md +1061 -0
  31. data/docs/ADR-016-self-monitoring-slo.md +1234 -0
  32. data/docs/API-REFERENCE-L28.md +914 -0
  33. data/docs/COMPREHENSIVE-CONFIGURATION.md +2366 -0
  34. data/docs/IMPLEMENTATION_NOTES.md +2804 -0
  35. data/docs/IMPLEMENTATION_PLAN.md +1971 -0
  36. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +586 -0
  37. data/docs/PLAN.md +148 -0
  38. data/docs/QUICK-START.md +934 -0
  39. data/docs/README.md +296 -0
  40. data/docs/design/00-memory-optimization.md +593 -0
  41. data/docs/guides/MIGRATION-L27-L28.md +692 -0
  42. data/docs/guides/PERFORMANCE-BENCHMARKS.md +434 -0
  43. data/docs/guides/README.md +44 -0
  44. data/docs/prd/01-overview-vision.md +440 -0
  45. data/docs/use_cases/README.md +119 -0
  46. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +813 -0
  47. data/docs/use_cases/UC-002-business-event-tracking.md +1953 -0
  48. data/docs/use_cases/UC-003-pattern-based-metrics.md +1627 -0
  49. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +728 -0
  50. data/docs/use_cases/UC-005-sentry-integration.md +759 -0
  51. data/docs/use_cases/UC-006-trace-context-management.md +905 -0
  52. data/docs/use_cases/UC-007-pii-filtering.md +2648 -0
  53. data/docs/use_cases/UC-008-opentelemetry-integration.md +1153 -0
  54. data/docs/use_cases/UC-009-multi-service-tracing.md +1043 -0
  55. data/docs/use_cases/UC-010-background-job-tracking.md +1018 -0
  56. data/docs/use_cases/UC-011-rate-limiting.md +1906 -0
  57. data/docs/use_cases/UC-012-audit-trail.md +2301 -0
  58. data/docs/use_cases/UC-013-high-cardinality-protection.md +2127 -0
  59. data/docs/use_cases/UC-014-adaptive-sampling.md +1940 -0
  60. data/docs/use_cases/UC-015-cost-optimization.md +735 -0
  61. data/docs/use_cases/UC-016-rails-logger-migration.md +785 -0
  62. data/docs/use_cases/UC-017-local-development.md +867 -0
  63. data/docs/use_cases/UC-018-testing-events.md +1081 -0
  64. data/docs/use_cases/UC-019-tiered-storage-migration.md +562 -0
  65. data/docs/use_cases/UC-020-event-versioning.md +708 -0
  66. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +956 -0
  67. data/docs/use_cases/UC-022-event-registry.md +648 -0
  68. data/docs/use_cases/backlog.md +226 -0
  69. data/e11y.gemspec +76 -0
  70. data/lib/e11y/adapters/adaptive_batcher.rb +207 -0
  71. data/lib/e11y/adapters/audit_encrypted.rb +239 -0
  72. data/lib/e11y/adapters/base.rb +580 -0
  73. data/lib/e11y/adapters/file.rb +224 -0
  74. data/lib/e11y/adapters/in_memory.rb +216 -0
  75. data/lib/e11y/adapters/loki.rb +333 -0
  76. data/lib/e11y/adapters/otel_logs.rb +203 -0
  77. data/lib/e11y/adapters/registry.rb +141 -0
  78. data/lib/e11y/adapters/sentry.rb +230 -0
  79. data/lib/e11y/adapters/stdout.rb +108 -0
  80. data/lib/e11y/adapters/yabeda.rb +370 -0
  81. data/lib/e11y/buffers/adaptive_buffer.rb +339 -0
  82. data/lib/e11y/buffers/base_buffer.rb +40 -0
  83. data/lib/e11y/buffers/request_scoped_buffer.rb +246 -0
  84. data/lib/e11y/buffers/ring_buffer.rb +267 -0
  85. data/lib/e11y/buffers.rb +14 -0
  86. data/lib/e11y/console.rb +122 -0
  87. data/lib/e11y/current.rb +48 -0
  88. data/lib/e11y/event/base.rb +894 -0
  89. data/lib/e11y/event/value_sampling_config.rb +84 -0
  90. data/lib/e11y/events/base_audit_event.rb +43 -0
  91. data/lib/e11y/events/base_payment_event.rb +33 -0
  92. data/lib/e11y/events/rails/cache/delete.rb +21 -0
  93. data/lib/e11y/events/rails/cache/read.rb +23 -0
  94. data/lib/e11y/events/rails/cache/write.rb +22 -0
  95. data/lib/e11y/events/rails/database/query.rb +45 -0
  96. data/lib/e11y/events/rails/http/redirect.rb +21 -0
  97. data/lib/e11y/events/rails/http/request.rb +26 -0
  98. data/lib/e11y/events/rails/http/send_file.rb +21 -0
  99. data/lib/e11y/events/rails/http/start_processing.rb +26 -0
  100. data/lib/e11y/events/rails/job/completed.rb +22 -0
  101. data/lib/e11y/events/rails/job/enqueued.rb +22 -0
  102. data/lib/e11y/events/rails/job/failed.rb +22 -0
  103. data/lib/e11y/events/rails/job/scheduled.rb +23 -0
  104. data/lib/e11y/events/rails/job/started.rb +22 -0
  105. data/lib/e11y/events/rails/log.rb +56 -0
  106. data/lib/e11y/events/rails/view/render.rb +23 -0
  107. data/lib/e11y/events.rb +18 -0
  108. data/lib/e11y/instruments/active_job.rb +201 -0
  109. data/lib/e11y/instruments/rails_instrumentation.rb +141 -0
  110. data/lib/e11y/instruments/sidekiq.rb +175 -0
  111. data/lib/e11y/logger/bridge.rb +205 -0
  112. data/lib/e11y/metrics/cardinality_protection.rb +172 -0
  113. data/lib/e11y/metrics/cardinality_tracker.rb +134 -0
  114. data/lib/e11y/metrics/registry.rb +234 -0
  115. data/lib/e11y/metrics/relabeling.rb +226 -0
  116. data/lib/e11y/metrics.rb +102 -0
  117. data/lib/e11y/middleware/audit_signing.rb +174 -0
  118. data/lib/e11y/middleware/base.rb +140 -0
  119. data/lib/e11y/middleware/event_slo.rb +167 -0
  120. data/lib/e11y/middleware/pii_filter.rb +266 -0
  121. data/lib/e11y/middleware/pii_filtering.rb +280 -0
  122. data/lib/e11y/middleware/rate_limiting.rb +214 -0
  123. data/lib/e11y/middleware/request.rb +163 -0
  124. data/lib/e11y/middleware/routing.rb +157 -0
  125. data/lib/e11y/middleware/sampling.rb +254 -0
  126. data/lib/e11y/middleware/slo.rb +168 -0
  127. data/lib/e11y/middleware/trace_context.rb +131 -0
  128. data/lib/e11y/middleware/validation.rb +118 -0
  129. data/lib/e11y/middleware/versioning.rb +132 -0
  130. data/lib/e11y/middleware.rb +12 -0
  131. data/lib/e11y/pii/patterns.rb +90 -0
  132. data/lib/e11y/pii.rb +13 -0
  133. data/lib/e11y/pipeline/builder.rb +155 -0
  134. data/lib/e11y/pipeline/zone_validator.rb +110 -0
  135. data/lib/e11y/pipeline.rb +12 -0
  136. data/lib/e11y/presets/audit_event.rb +65 -0
  137. data/lib/e11y/presets/debug_event.rb +34 -0
  138. data/lib/e11y/presets/high_value_event.rb +51 -0
  139. data/lib/e11y/presets.rb +19 -0
  140. data/lib/e11y/railtie.rb +138 -0
  141. data/lib/e11y/reliability/circuit_breaker.rb +216 -0
  142. data/lib/e11y/reliability/dlq/file_storage.rb +277 -0
  143. data/lib/e11y/reliability/dlq/filter.rb +117 -0
  144. data/lib/e11y/reliability/retry_handler.rb +207 -0
  145. data/lib/e11y/reliability/retry_rate_limiter.rb +117 -0
  146. data/lib/e11y/sampling/error_spike_detector.rb +225 -0
  147. data/lib/e11y/sampling/load_monitor.rb +161 -0
  148. data/lib/e11y/sampling/stratified_tracker.rb +92 -0
  149. data/lib/e11y/sampling/value_extractor.rb +82 -0
  150. data/lib/e11y/self_monitoring/buffer_monitor.rb +79 -0
  151. data/lib/e11y/self_monitoring/performance_monitor.rb +97 -0
  152. data/lib/e11y/self_monitoring/reliability_monitor.rb +146 -0
  153. data/lib/e11y/slo/event_driven.rb +150 -0
  154. data/lib/e11y/slo/tracker.rb +119 -0
  155. data/lib/e11y/version.rb +9 -0
  156. data/lib/e11y.rb +283 -0
  157. metadata +452 -0
@@ -0,0 +1,226 @@
1
+ # Backlog (Future Enhancements)
2
+
3
+ **Status:** Draft
4
+ **Priority:** Low (v1.1+)
5
+ **Category:** Future Ideas
6
+
7
+ ---
8
+
9
+ ## Overview
10
+
11
+ This document captures promising ideas for future versions of `e11y` that are not planned for v1.0 but may provide significant value in subsequent releases.
12
+
13
+ ---
14
+
15
+ ## 1. Quick Start Presets
16
+
17
+ ### Problem
18
+
19
+ Configuration complexity can be overwhelming for new users. Setting up production-ready configuration requires understanding multiple subsystems (sampling, compression, retention, payload optimization).
20
+
21
+ ### Proposal
22
+
23
+ Provide pre-configured profiles for common scenarios:
24
+
25
+ ```ruby
26
+ E11y.configure do |config|
27
+ # Option 1: Use a preset
28
+ config.use_preset :production_high_traffic
29
+
30
+ # Option 2: Use a preset with overrides
31
+ config.use_preset :production_high_traffic do |preset|
32
+ preset.sampling.max_events_per_sec = 5_000 # Override default
33
+ end
34
+ end
35
+ ```
36
+
37
+ ### Available Presets
38
+
39
+ | Preset | Description | Sample Rate | Compression | Retention |
40
+ |--------|-------------|-------------|-------------|-----------|
41
+ | `:development` | Local dev, no sampling | 100% | None | 1 day |
42
+ | `:staging` | Pre-prod testing | 50% | Gzip | 7 days |
43
+ | `:production_low_traffic` | < 1K events/sec | 80% | Gzip | 30 days |
44
+ | `:production_high_traffic` | > 10K events/sec | 10% | Zstd | 7 days hot, 90 days warm |
45
+ | `:production_cost_optimized` | Aggressive cost reduction | 5% | Zstd level 9 | 3 days hot, 30 days warm |
46
+
47
+ ### Implementation Example
48
+
49
+ ```ruby
50
+ module E11y
51
+ module Presets
52
+ PRODUCTION_HIGH_TRAFFIC = {
53
+ adaptive_sampling: {
54
+ enabled: true,
55
+ load_based: { max_events_per_sec: 10_000 },
56
+ error_based: { enabled: true },
57
+ value_based: {
58
+ high_value_patterns: [/^payment\./, /^order\./, /^error\./],
59
+ low_value_patterns: [/^debug\./, /^health_check/]
60
+ }
61
+ },
62
+ compression: {
63
+ enabled: true,
64
+ algorithm: :zstd,
65
+ level: 3
66
+ },
67
+ retention_tagging: {
68
+ enabled: true,
69
+ default_retention: 7.days,
70
+ retention_by_pattern: {
71
+ 'audit.*' => 7.years,
72
+ 'payment.*' => 1.year,
73
+ 'debug.*' => 1.day
74
+ }
75
+ },
76
+ payload_minimization: {
77
+ enabled: true,
78
+ truncate_strings_at: 1000,
79
+ truncate_arrays_at: 100,
80
+ remove_null_fields: true
81
+ }
82
+ }.freeze
83
+ end
84
+ end
85
+ ```
86
+
87
+ ### Benefits
88
+
89
+ - ✅ Faster onboarding (< 5 minutes to production-ready config)
90
+ - ✅ Best practices baked in
91
+ - ✅ Easy to customize (override specific settings)
92
+ - ✅ Reduces configuration errors
93
+
94
+ ### Priority
95
+
96
+ **Medium (v1.1)**
97
+
98
+ ---
99
+
100
+ ## 2. Sampling Budget
101
+
102
+ ### Problem
103
+
104
+ Current sampling is reactive (based on load). Hard to predict costs. Organizations need predictable telemetry budgets.
105
+
106
+ ### Proposal
107
+
108
+ Proactive budget-based sampling:
109
+
110
+ ```ruby
111
+ E11y.configure do |config|
112
+ config.cost_optimization do
113
+ sampling_budget do
114
+ enabled true
115
+
116
+ # Set a daily event budget
117
+ daily_budget 10_000_000 # 10M events/day
118
+
119
+ # Budget allocation by event type
120
+ allocate_by_pattern do
121
+ pattern 'payment.*', percent: 20 # 2M events/day
122
+ pattern 'order.*', percent: 15 # 1.5M events/day
123
+ pattern 'error.*', percent: 10 # 1M events/day (always track)
124
+ pattern 'debug.*', percent: 5 # 500K events/day
125
+ pattern '*', percent: 50 # 5M events/day (everything else)
126
+ end
127
+
128
+ # Dynamic adjustment
129
+ rebalance_interval 1.hour # Recalculate sample rates every hour
130
+
131
+ # Overflow strategy
132
+ on_budget_exceeded :reduce_low_value # or :stop_sampling, :alert_only
133
+
134
+ # Alert when budget is 80% consumed
135
+ alert_threshold 0.8
136
+ alert_to :slack # or :pagerduty, :email
137
+ end
138
+ end
139
+ end
140
+ ```
141
+
142
+ ### How It Works
143
+
144
+ #### 1. Budget Calculation
145
+
146
+ ```ruby
147
+ # At start of each hour:
148
+ hourly_budget = daily_budget / 24 # 416,666 events/hour
149
+
150
+ # Per-pattern budget:
151
+ payment_budget = hourly_budget * 0.20 # 83,333 events/hour
152
+ ```
153
+
154
+ #### 2. Dynamic Sample Rate Adjustment
155
+
156
+ ```ruby
157
+ # If payment events are at 50% of budget after 30 min:
158
+ current_rate = 41,666 / (83,333 / 2) # = 1.0 (100% sampling)
159
+
160
+ # If payment events are at 90% of budget after 30 min:
161
+ current_rate = 74,999 / (83,333 / 2) # = 1.8 (need to reduce)
162
+ new_sample_rate = 1.0 / 1.8 # = 55% sampling for next 30 min
163
+ ```
164
+
165
+ #### 3. Cost Predictability
166
+
167
+ ```ruby
168
+ # Known daily cost:
169
+ cost_per_event = $0.0001 # e.g., Datadog pricing
170
+ daily_cost = 10_000_000 * $0.0001 = $1,000/day
171
+ monthly_cost = $1,000 * 30 = $30,000/month
172
+ ```
173
+
174
+ ### Benefits
175
+
176
+ - ✅ Predictable costs (no surprises)
177
+ - ✅ Budget enforcement (hard cap on events)
178
+ - ✅ Intelligent allocation (high-value events get more budget)
179
+ - ✅ Real-time alerts (before overspending)
180
+
181
+ ### Trade-offs
182
+
183
+ - ⚠️ Complexity (requires Redis for distributed state)
184
+ - ⚠️ Potential data loss (if budget exceeded)
185
+ - ⚠️ Tuning required (optimal allocation per app)
186
+
187
+ ### Priority
188
+
189
+ **Low (v1.2+)**
190
+
191
+ ### Alternative Approach
192
+
193
+ - Use cloud cost management tools (AWS Cost Anomaly Detection, Datadog Cost Management)
194
+ - Set alerts on actual billing, not event counts
195
+ - Simpler, but less proactive
196
+
197
+ ---
198
+
199
+ ## 3. Additional Ideas (Placeholder)
200
+
201
+ Future ideas to be added:
202
+
203
+ - **ML-Based Anomaly Detection:** Automatically detect unusual patterns in events
204
+ - **Event Replay from Storage:** Replay events from cold storage for debugging
205
+ - **Multi-Tenant Support:** Isolate events by tenant/customer
206
+ - **Event Transformation Rules:** Transform events before sending to adapters
207
+ - **Custom Retention Policies:** More granular control over data lifecycle
208
+
209
+ ---
210
+
211
+ ## Related Use Cases
212
+
213
+ - [UC-009: Cost Optimization](UC-015-cost-optimization.md) - Current cost optimization strategies
214
+ - [UC-014: Adaptive Sampling](UC-014-adaptive-sampling.md) - Current sampling approach
215
+
216
+ ---
217
+
218
+ ## Related ADRs
219
+
220
+ - [ADR-009: Cost Optimization](../ADR-009-cost-optimization.md) - Current implementation
221
+
222
+ ---
223
+
224
+ **Status:** ✅ Draft Complete
225
+ **Next Review:** After v1.0 release
226
+ **Estimated Value:** High (for enterprise customers)
data/e11y.gemspec ADDED
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/e11y/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "e11y"
7
+ spec.version = E11y::VERSION
8
+ spec.authors = ["Artur Seletskiy"]
9
+
10
+ spec.summary = "E11y - Easy Telemetry: Production-grade observability for Rails with zero-config SLO tracking"
11
+ spec.description = <<~DESC
12
+ E11y (Easy Telemetry) - production-ready observability gem for Ruby on Rails applications.
13
+
14
+ KEY FEATURES:
15
+ • 📊 Zero-Config SLO Tracking - automatic Service Level Objectives for HTTP endpoints and background jobs
16
+ • 🎯 Request-Scoped Debug Buffering - buffer debug logs in memory, flush only on errors (reduce log noise by 90%)
17
+ • 📈 Pattern-Based Metrics - auto-generate Prometheus/Yabeda metrics from business events
18
+ • 🔒 GDPR/SOC2 Compliance - built-in PII filtering and audit trails
19
+ • 🔌 Pluggable Adapters - send events to Loki, Sentry, OpenTelemetry, Elasticsearch, or custom backends
20
+ • 🚀 High Performance - zero-allocation event tracking, lock-free ring buffers, adaptive memory limits
21
+ • 🧵 Thread-Safe - designed for multi-threaded Rails apps and Sidekiq workers
22
+ • 🎭 Multi-Tenant Ready - trace context propagation across services with OpenTelemetry integration
23
+ • 📝 Type-Safe Events - declarative event schemas with dry-schema validation
24
+ • ⚡ Rate Limiting & Sampling - protect production from metric storms and cost overruns
25
+
26
+ Perfect for SuperApp architectures, microservices, and high-scale Rails applications.
27
+ Battle-tested patterns from Devise, Sidekiq, Sentry, and Yabeda.
28
+ DESC
29
+ spec.homepage = "https://github.com/arturseletskiy/e11y"
30
+ spec.license = "MIT"
31
+ spec.required_ruby_version = ">= 3.2.0"
32
+
33
+ spec.metadata["homepage_uri"] = spec.homepage
34
+ spec.metadata["source_code_uri"] = "https://github.com/arturseletskiy/e11y"
35
+ spec.metadata["changelog_uri"] = "https://github.com/arturseletskiy/e11y/blob/main/CHANGELOG.md"
36
+ spec.metadata["documentation_uri"] = "https://github.com/arturseletskiy/blob/main/e11y/docs"
37
+ spec.metadata["bug_tracker_uri"] = "https://github.com/arturseletskiy/e11y/issues"
38
+ spec.metadata["rubygems_mfa_required"] = "true"
39
+
40
+ # Specify which files should be added to the gem when it is released.
41
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
42
+ spec.files = Dir.chdir(__dir__) do
43
+ `git ls-files -z`.split("\x0").reject do |f|
44
+ (File.expand_path(f) == __FILE__) ||
45
+ f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile docs/researches/])
46
+ end
47
+ end
48
+ spec.bindir = "exe"
49
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
50
+ spec.require_paths = ["lib"]
51
+
52
+ # Runtime dependencies
53
+ spec.add_dependency "activesupport", ">= 7.0"
54
+ spec.add_dependency "concurrent-ruby", "~> 1.2" # Thread-safe data structures
55
+ spec.add_dependency "dry-schema", "~> 1.13" # Event schema validation
56
+ spec.add_dependency "dry-types", "~> 1.7"
57
+ spec.add_dependency "zeitwerk", "~> 2.6"
58
+
59
+ # Development dependencies
60
+ spec.add_development_dependency "rack", "~> 3.0" # For Rack middleware testing
61
+ spec.add_development_dependency "rake", "~> 13.0"
62
+ spec.add_development_dependency "rspec", "~> 3.12"
63
+ spec.add_development_dependency "rubocop", "~> 1.50"
64
+ spec.add_development_dependency "rubocop-rake", "~> 0.6"
65
+ spec.add_development_dependency "rubocop-rspec", "~> 2.22"
66
+ spec.add_development_dependency "simplecov", "~> 0.22"
67
+ spec.add_development_dependency "webmock", "~> 3.19" # For HTTP adapter testing
68
+ spec.add_development_dependency "yard", "~> 0.9"
69
+
70
+ # Optional adapter dependencies (install only if using specific adapters)
71
+ # LokiAdapter: gem install faraday faraday-retry
72
+ # SentryAdapter: gem install sentry-ruby
73
+ spec.add_development_dependency "faraday", "~> 2.7" # For LokiAdapter
74
+ spec.add_development_dependency "faraday-retry", "~> 2.2" # For LokiAdapter retry middleware
75
+ spec.add_development_dependency "sentry-ruby", "~> 5.15" # For SentryAdapter
76
+ end
@@ -0,0 +1,207 @@
1
+ # frozen_string_literal: true
2
+
3
+ module E11y
4
+ module Adapters
5
+ # Adaptive batching helper for adapters
6
+ #
7
+ # Provides efficient event batching with automatic flushing based on:
8
+ # - Batch size threshold (max_size)
9
+ # - Time threshold (timeout)
10
+ # - Minimum batch size (min_size) for latency optimization
11
+ #
12
+ # Thread-safe implementation with mutex-protected buffer.
13
+ #
14
+ # @example Use in adapter
15
+ # class MyAdapter < E11y::Adapters::Base
16
+ # def initialize(config = {})
17
+ # super
18
+ # @batcher = AdaptiveBatcher.new(
19
+ # max_size: 500,
20
+ # timeout: 5.0,
21
+ # flush_callback: method(:send_batch)
22
+ # )
23
+ # end
24
+ #
25
+ # def write(event_data)
26
+ # @batcher.add(event_data)
27
+ # end
28
+ #
29
+ # def write_batch(events)
30
+ # @batcher.flush!
31
+ # super
32
+ # end
33
+ #
34
+ # def close
35
+ # @batcher.close
36
+ # super
37
+ # end
38
+ #
39
+ # private
40
+ #
41
+ # def send_batch(events)
42
+ # # Send events to external system
43
+ # http_client.post(events)
44
+ # end
45
+ # end
46
+ #
47
+ # @see ADR-004 Section 8.1 (Adaptive Batching)
48
+ class AdaptiveBatcher
49
+ # Initialize adaptive batcher
50
+ #
51
+ # @param min_size [Integer] Minimum batch size before timeout flush (default: 10)
52
+ # @param max_size [Integer] Maximum batch size (triggers immediate flush, default: 500)
53
+ # @param timeout [Float] Timeout in seconds for automatic flush (default: 5.0)
54
+ # @param flush_callback [Proc, Method] Callback to invoke on flush with events array
55
+ def initialize(flush_callback:, min_size: 10, max_size: 500, timeout: 5.0)
56
+ @min_size = min_size
57
+ @max_size = max_size
58
+ @timeout = timeout
59
+ @flush_callback = flush_callback
60
+
61
+ @buffer = []
62
+ @mutex = Mutex.new
63
+ @last_flush = Time.now
64
+ @closed = false
65
+ @timer_thread = nil
66
+
67
+ start_timer_thread!
68
+ end
69
+
70
+ # Add event to buffer
71
+ #
72
+ # Automatically flushes if max_size reached.
73
+ # Thread-safe operation.
74
+ #
75
+ # @param event_data [Hash] Event to add to buffer
76
+ # @return [Boolean] true if added successfully
77
+ def add(event_data)
78
+ return false if @closed
79
+
80
+ @mutex.synchronize do
81
+ @buffer << event_data
82
+
83
+ flush_unlocked! if should_flush_immediately?
84
+ end
85
+
86
+ true
87
+ end
88
+
89
+ # Flush buffer immediately
90
+ #
91
+ # Sends all buffered events to flush_callback.
92
+ # Thread-safe operation.
93
+ #
94
+ # @return [Boolean] true if flushed, false if buffer empty
95
+ def flush!
96
+ @mutex.synchronize { flush_unlocked! }
97
+ end
98
+
99
+ # Get current buffer size
100
+ #
101
+ # @return [Integer] Number of events in buffer
102
+ def buffer_size
103
+ @mutex.synchronize { @buffer.size }
104
+ end
105
+
106
+ # Check if buffer is empty
107
+ #
108
+ # @return [Boolean] true if buffer is empty
109
+ def empty?
110
+ @mutex.synchronize { @buffer.empty? }
111
+ end
112
+
113
+ # Close batcher and flush remaining events
114
+ #
115
+ # Stops timer thread and flushes any remaining events.
116
+ # Safe to call multiple times.
117
+ #
118
+ # @return [void]
119
+ def close
120
+ return if @closed
121
+
122
+ @closed = true
123
+ @timer_thread&.kill
124
+ @timer_thread = nil
125
+
126
+ flush!
127
+ end
128
+
129
+ private
130
+
131
+ # Start background timer thread for automatic flushing
132
+ #
133
+ # Timer thread checks periodically if timeout has expired
134
+ # and flushes buffer if min_size threshold is met.
135
+ #
136
+ # Check interval is min(timeout/2, 1 second) for responsiveness.
137
+ #
138
+ # @api private
139
+ def start_timer_thread!
140
+ check_interval = [@timeout / 2.0, 1.0].min
141
+
142
+ @timer_thread = Thread.new do
143
+ loop do
144
+ sleep check_interval
145
+
146
+ break if @closed
147
+
148
+ @mutex.synchronize do
149
+ flush_unlocked! if should_flush_timeout?
150
+ rescue StandardError => e
151
+ warn "[E11y] AdaptiveBatcher timer error: #{e.message}"
152
+ end
153
+ end
154
+ end
155
+
156
+ @timer_thread.name = "e11y-adaptive-batcher-timer"
157
+ end
158
+
159
+ # Flush buffer (unlocked - must be called within mutex.synchronize)
160
+ #
161
+ # @return [Boolean] true if flushed, false if buffer empty
162
+ # @api private
163
+ def flush_unlocked!
164
+ return false if @buffer.empty?
165
+
166
+ events = @buffer.dup
167
+ @buffer.clear
168
+ @last_flush = Time.now
169
+
170
+ # Release mutex before I/O operation
171
+ @mutex.unlock
172
+ begin
173
+ @flush_callback.call(events)
174
+ true
175
+ ensure
176
+ @mutex.lock
177
+ end
178
+ end
179
+
180
+ # Check if should flush immediately (max_size reached)
181
+ #
182
+ # @return [Boolean]
183
+ # @api private
184
+ def should_flush_immediately?
185
+ @buffer.size >= @max_size
186
+ end
187
+
188
+ # Check if should flush on timeout
189
+ #
190
+ # @return [Boolean]
191
+ # @api private
192
+ def should_flush_timeout?
193
+ return false if @buffer.empty?
194
+
195
+ timeout_expired? && @buffer.size >= @min_size
196
+ end
197
+
198
+ # Check if timeout has expired since last flush
199
+ #
200
+ # @return [Boolean]
201
+ # @api private
202
+ def timeout_expired?
203
+ (Time.now - @last_flush) >= @timeout
204
+ end
205
+ end
206
+ end
207
+ end