e11y 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +4 -0
- data/.rubocop.yml +69 -0
- data/CHANGELOG.md +26 -0
- data/CODE_OF_CONDUCT.md +64 -0
- data/LICENSE.txt +21 -0
- data/README.md +179 -0
- data/Rakefile +37 -0
- data/benchmarks/run_all.rb +33 -0
- data/config/README.md +83 -0
- data/config/loki-local-config.yaml +35 -0
- data/config/prometheus.yml +15 -0
- data/docker-compose.yml +78 -0
- data/docs/00-ICP-AND-TIMELINE.md +483 -0
- data/docs/01-SCALE-REQUIREMENTS.md +858 -0
- data/docs/ADR-001-architecture.md +2617 -0
- data/docs/ADR-002-metrics-yabeda.md +1395 -0
- data/docs/ADR-003-slo-observability.md +3337 -0
- data/docs/ADR-004-adapter-architecture.md +2385 -0
- data/docs/ADR-005-tracing-context.md +1372 -0
- data/docs/ADR-006-security-compliance.md +4143 -0
- data/docs/ADR-007-opentelemetry-integration.md +1385 -0
- data/docs/ADR-008-rails-integration.md +1911 -0
- data/docs/ADR-009-cost-optimization.md +2993 -0
- data/docs/ADR-010-developer-experience.md +2166 -0
- data/docs/ADR-011-testing-strategy.md +1836 -0
- data/docs/ADR-012-event-evolution.md +958 -0
- data/docs/ADR-013-reliability-error-handling.md +2750 -0
- data/docs/ADR-014-event-driven-slo.md +1533 -0
- data/docs/ADR-015-middleware-order.md +1061 -0
- data/docs/ADR-016-self-monitoring-slo.md +1234 -0
- data/docs/API-REFERENCE-L28.md +914 -0
- data/docs/COMPREHENSIVE-CONFIGURATION.md +2366 -0
- data/docs/IMPLEMENTATION_NOTES.md +2804 -0
- data/docs/IMPLEMENTATION_PLAN.md +1971 -0
- data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +586 -0
- data/docs/PLAN.md +148 -0
- data/docs/QUICK-START.md +934 -0
- data/docs/README.md +296 -0
- data/docs/design/00-memory-optimization.md +593 -0
- data/docs/guides/MIGRATION-L27-L28.md +692 -0
- data/docs/guides/PERFORMANCE-BENCHMARKS.md +434 -0
- data/docs/guides/README.md +44 -0
- data/docs/prd/01-overview-vision.md +440 -0
- data/docs/use_cases/README.md +119 -0
- data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +813 -0
- data/docs/use_cases/UC-002-business-event-tracking.md +1953 -0
- data/docs/use_cases/UC-003-pattern-based-metrics.md +1627 -0
- data/docs/use_cases/UC-004-zero-config-slo-tracking.md +728 -0
- data/docs/use_cases/UC-005-sentry-integration.md +759 -0
- data/docs/use_cases/UC-006-trace-context-management.md +905 -0
- data/docs/use_cases/UC-007-pii-filtering.md +2648 -0
- data/docs/use_cases/UC-008-opentelemetry-integration.md +1153 -0
- data/docs/use_cases/UC-009-multi-service-tracing.md +1043 -0
- data/docs/use_cases/UC-010-background-job-tracking.md +1018 -0
- data/docs/use_cases/UC-011-rate-limiting.md +1906 -0
- data/docs/use_cases/UC-012-audit-trail.md +2301 -0
- data/docs/use_cases/UC-013-high-cardinality-protection.md +2127 -0
- data/docs/use_cases/UC-014-adaptive-sampling.md +1940 -0
- data/docs/use_cases/UC-015-cost-optimization.md +735 -0
- data/docs/use_cases/UC-016-rails-logger-migration.md +785 -0
- data/docs/use_cases/UC-017-local-development.md +867 -0
- data/docs/use_cases/UC-018-testing-events.md +1081 -0
- data/docs/use_cases/UC-019-tiered-storage-migration.md +562 -0
- data/docs/use_cases/UC-020-event-versioning.md +708 -0
- data/docs/use_cases/UC-021-error-handling-retry-dlq.md +956 -0
- data/docs/use_cases/UC-022-event-registry.md +648 -0
- data/docs/use_cases/backlog.md +226 -0
- data/e11y.gemspec +76 -0
- data/lib/e11y/adapters/adaptive_batcher.rb +207 -0
- data/lib/e11y/adapters/audit_encrypted.rb +239 -0
- data/lib/e11y/adapters/base.rb +580 -0
- data/lib/e11y/adapters/file.rb +224 -0
- data/lib/e11y/adapters/in_memory.rb +216 -0
- data/lib/e11y/adapters/loki.rb +333 -0
- data/lib/e11y/adapters/otel_logs.rb +203 -0
- data/lib/e11y/adapters/registry.rb +141 -0
- data/lib/e11y/adapters/sentry.rb +230 -0
- data/lib/e11y/adapters/stdout.rb +108 -0
- data/lib/e11y/adapters/yabeda.rb +370 -0
- data/lib/e11y/buffers/adaptive_buffer.rb +339 -0
- data/lib/e11y/buffers/base_buffer.rb +40 -0
- data/lib/e11y/buffers/request_scoped_buffer.rb +246 -0
- data/lib/e11y/buffers/ring_buffer.rb +267 -0
- data/lib/e11y/buffers.rb +14 -0
- data/lib/e11y/console.rb +122 -0
- data/lib/e11y/current.rb +48 -0
- data/lib/e11y/event/base.rb +894 -0
- data/lib/e11y/event/value_sampling_config.rb +84 -0
- data/lib/e11y/events/base_audit_event.rb +43 -0
- data/lib/e11y/events/base_payment_event.rb +33 -0
- data/lib/e11y/events/rails/cache/delete.rb +21 -0
- data/lib/e11y/events/rails/cache/read.rb +23 -0
- data/lib/e11y/events/rails/cache/write.rb +22 -0
- data/lib/e11y/events/rails/database/query.rb +45 -0
- data/lib/e11y/events/rails/http/redirect.rb +21 -0
- data/lib/e11y/events/rails/http/request.rb +26 -0
- data/lib/e11y/events/rails/http/send_file.rb +21 -0
- data/lib/e11y/events/rails/http/start_processing.rb +26 -0
- data/lib/e11y/events/rails/job/completed.rb +22 -0
- data/lib/e11y/events/rails/job/enqueued.rb +22 -0
- data/lib/e11y/events/rails/job/failed.rb +22 -0
- data/lib/e11y/events/rails/job/scheduled.rb +23 -0
- data/lib/e11y/events/rails/job/started.rb +22 -0
- data/lib/e11y/events/rails/log.rb +56 -0
- data/lib/e11y/events/rails/view/render.rb +23 -0
- data/lib/e11y/events.rb +18 -0
- data/lib/e11y/instruments/active_job.rb +201 -0
- data/lib/e11y/instruments/rails_instrumentation.rb +141 -0
- data/lib/e11y/instruments/sidekiq.rb +175 -0
- data/lib/e11y/logger/bridge.rb +205 -0
- data/lib/e11y/metrics/cardinality_protection.rb +172 -0
- data/lib/e11y/metrics/cardinality_tracker.rb +134 -0
- data/lib/e11y/metrics/registry.rb +234 -0
- data/lib/e11y/metrics/relabeling.rb +226 -0
- data/lib/e11y/metrics.rb +102 -0
- data/lib/e11y/middleware/audit_signing.rb +174 -0
- data/lib/e11y/middleware/base.rb +140 -0
- data/lib/e11y/middleware/event_slo.rb +167 -0
- data/lib/e11y/middleware/pii_filter.rb +266 -0
- data/lib/e11y/middleware/pii_filtering.rb +280 -0
- data/lib/e11y/middleware/rate_limiting.rb +214 -0
- data/lib/e11y/middleware/request.rb +163 -0
- data/lib/e11y/middleware/routing.rb +157 -0
- data/lib/e11y/middleware/sampling.rb +254 -0
- data/lib/e11y/middleware/slo.rb +168 -0
- data/lib/e11y/middleware/trace_context.rb +131 -0
- data/lib/e11y/middleware/validation.rb +118 -0
- data/lib/e11y/middleware/versioning.rb +132 -0
- data/lib/e11y/middleware.rb +12 -0
- data/lib/e11y/pii/patterns.rb +90 -0
- data/lib/e11y/pii.rb +13 -0
- data/lib/e11y/pipeline/builder.rb +155 -0
- data/lib/e11y/pipeline/zone_validator.rb +110 -0
- data/lib/e11y/pipeline.rb +12 -0
- data/lib/e11y/presets/audit_event.rb +65 -0
- data/lib/e11y/presets/debug_event.rb +34 -0
- data/lib/e11y/presets/high_value_event.rb +51 -0
- data/lib/e11y/presets.rb +19 -0
- data/lib/e11y/railtie.rb +138 -0
- data/lib/e11y/reliability/circuit_breaker.rb +216 -0
- data/lib/e11y/reliability/dlq/file_storage.rb +277 -0
- data/lib/e11y/reliability/dlq/filter.rb +117 -0
- data/lib/e11y/reliability/retry_handler.rb +207 -0
- data/lib/e11y/reliability/retry_rate_limiter.rb +117 -0
- data/lib/e11y/sampling/error_spike_detector.rb +225 -0
- data/lib/e11y/sampling/load_monitor.rb +161 -0
- data/lib/e11y/sampling/stratified_tracker.rb +92 -0
- data/lib/e11y/sampling/value_extractor.rb +82 -0
- data/lib/e11y/self_monitoring/buffer_monitor.rb +79 -0
- data/lib/e11y/self_monitoring/performance_monitor.rb +97 -0
- data/lib/e11y/self_monitoring/reliability_monitor.rb +146 -0
- data/lib/e11y/slo/event_driven.rb +150 -0
- data/lib/e11y/slo/tracker.rb +119 -0
- data/lib/e11y/version.rb +9 -0
- data/lib/e11y.rb +283 -0
- metadata +452 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# Backlog (Future Enhancements)
|
|
2
|
+
|
|
3
|
+
**Status:** Draft
|
|
4
|
+
**Priority:** Low (v1.1+)
|
|
5
|
+
**Category:** Future Ideas
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Overview
|
|
10
|
+
|
|
11
|
+
This document captures promising ideas for future versions of `e11y` that are not planned for v1.0 but may provide significant value in subsequent releases.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## 1. Quick Start Presets
|
|
16
|
+
|
|
17
|
+
### Problem
|
|
18
|
+
|
|
19
|
+
Configuration complexity can be overwhelming for new users. Setting up production-ready configuration requires understanding multiple subsystems (sampling, compression, retention, payload optimization).
|
|
20
|
+
|
|
21
|
+
### Proposal
|
|
22
|
+
|
|
23
|
+
Provide pre-configured profiles for common scenarios:
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
E11y.configure do |config|
|
|
27
|
+
# Option 1: Use a preset
|
|
28
|
+
config.use_preset :production_high_traffic
|
|
29
|
+
|
|
30
|
+
# Option 2: Use a preset with overrides
|
|
31
|
+
config.use_preset :production_high_traffic do |preset|
|
|
32
|
+
preset.sampling.max_events_per_sec = 5_000 # Override default
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Available Presets
|
|
38
|
+
|
|
39
|
+
| Preset | Description | Sample Rate | Compression | Retention |
|
|
40
|
+
|--------|-------------|-------------|-------------|-----------|
|
|
41
|
+
| `:development` | Local dev, no sampling | 100% | None | 1 day |
|
|
42
|
+
| `:staging` | Pre-prod testing | 50% | Gzip | 7 days |
|
|
43
|
+
| `:production_low_traffic` | < 1K events/sec | 80% | Gzip | 30 days |
|
|
44
|
+
| `:production_high_traffic` | > 10K events/sec | 10% | Zstd | 7 days hot, 90 days warm |
|
|
45
|
+
| `:production_cost_optimized` | Aggressive cost reduction | 5% | Zstd level 9 | 3 days hot, 30 days warm |
|
|
46
|
+
|
|
47
|
+
### Implementation Example
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
module E11y
|
|
51
|
+
module Presets
|
|
52
|
+
PRODUCTION_HIGH_TRAFFIC = {
|
|
53
|
+
adaptive_sampling: {
|
|
54
|
+
enabled: true,
|
|
55
|
+
load_based: { max_events_per_sec: 10_000 },
|
|
56
|
+
error_based: { enabled: true },
|
|
57
|
+
value_based: {
|
|
58
|
+
high_value_patterns: [/^payment\./, /^order\./, /^error\./],
|
|
59
|
+
low_value_patterns: [/^debug\./, /^health_check/]
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
compression: {
|
|
63
|
+
enabled: true,
|
|
64
|
+
algorithm: :zstd,
|
|
65
|
+
level: 3
|
|
66
|
+
},
|
|
67
|
+
retention_tagging: {
|
|
68
|
+
enabled: true,
|
|
69
|
+
default_retention: 7.days,
|
|
70
|
+
retention_by_pattern: {
|
|
71
|
+
'audit.*' => 7.years,
|
|
72
|
+
'payment.*' => 1.year,
|
|
73
|
+
'debug.*' => 1.day
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
payload_minimization: {
|
|
77
|
+
enabled: true,
|
|
78
|
+
truncate_strings_at: 1000,
|
|
79
|
+
truncate_arrays_at: 100,
|
|
80
|
+
remove_null_fields: true
|
|
81
|
+
}
|
|
82
|
+
}.freeze
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Benefits
|
|
88
|
+
|
|
89
|
+
- ✅ Faster onboarding (< 5 minutes to production-ready config)
|
|
90
|
+
- ✅ Best practices baked in
|
|
91
|
+
- ✅ Easy to customize (override specific settings)
|
|
92
|
+
- ✅ Reduces configuration errors
|
|
93
|
+
|
|
94
|
+
### Priority
|
|
95
|
+
|
|
96
|
+
**Medium (v1.1)**
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## 2. Sampling Budget
|
|
101
|
+
|
|
102
|
+
### Problem
|
|
103
|
+
|
|
104
|
+
Current sampling is reactive (based on load). Hard to predict costs. Organizations need predictable telemetry budgets.
|
|
105
|
+
|
|
106
|
+
### Proposal
|
|
107
|
+
|
|
108
|
+
Proactive budget-based sampling:
|
|
109
|
+
|
|
110
|
+
```ruby
|
|
111
|
+
E11y.configure do |config|
|
|
112
|
+
config.cost_optimization do
|
|
113
|
+
sampling_budget do
|
|
114
|
+
enabled true
|
|
115
|
+
|
|
116
|
+
# Set a daily event budget
|
|
117
|
+
daily_budget 10_000_000 # 10M events/day
|
|
118
|
+
|
|
119
|
+
# Budget allocation by event type
|
|
120
|
+
allocate_by_pattern do
|
|
121
|
+
pattern 'payment.*', percent: 20 # 2M events/day
|
|
122
|
+
pattern 'order.*', percent: 15 # 1.5M events/day
|
|
123
|
+
pattern 'error.*', percent: 10 # 1M events/day (always track)
|
|
124
|
+
pattern 'debug.*', percent: 5 # 500K events/day
|
|
125
|
+
pattern '*', percent: 50 # 5M events/day (everything else)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Dynamic adjustment
|
|
129
|
+
rebalance_interval 1.hour # Recalculate sample rates every hour
|
|
130
|
+
|
|
131
|
+
# Overflow strategy
|
|
132
|
+
on_budget_exceeded :reduce_low_value # or :stop_sampling, :alert_only
|
|
133
|
+
|
|
134
|
+
# Alert when budget is 80% consumed
|
|
135
|
+
alert_threshold 0.8
|
|
136
|
+
alert_to :slack # or :pagerduty, :email
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### How It Works
|
|
143
|
+
|
|
144
|
+
#### 1. Budget Calculation
|
|
145
|
+
|
|
146
|
+
```ruby
|
|
147
|
+
# At start of each hour:
|
|
148
|
+
hourly_budget = daily_budget / 24 # 416,666 events/hour
|
|
149
|
+
|
|
150
|
+
# Per-pattern budget:
|
|
151
|
+
payment_budget = hourly_budget * 0.20 # 83,333 events/hour
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
#### 2. Dynamic Sample Rate Adjustment
|
|
155
|
+
|
|
156
|
+
```ruby
|
|
157
|
+
# If payment events are at 50% of budget after 30 min:
|
|
158
|
+
current_rate = 41,666 / (83,333 / 2) # = 1.0 (100% sampling)
|
|
159
|
+
|
|
160
|
+
# If payment events are at 90% of budget after 30 min:
|
|
161
|
+
current_rate = 74,999 / (83,333 / 2) # = 1.8 (need to reduce)
|
|
162
|
+
new_sample_rate = 1.0 / 1.8 # = 55% sampling for next 30 min
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
#### 3. Cost Predictability
|
|
166
|
+
|
|
167
|
+
```ruby
|
|
168
|
+
# Known daily cost:
|
|
169
|
+
cost_per_event = $0.0001 # e.g., Datadog pricing
|
|
170
|
+
daily_cost = 10_000_000 * $0.0001 = $1,000/day
|
|
171
|
+
monthly_cost = $1,000 * 30 = $30,000/month
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Benefits
|
|
175
|
+
|
|
176
|
+
- ✅ Predictable costs (no surprises)
|
|
177
|
+
- ✅ Budget enforcement (hard cap on events)
|
|
178
|
+
- ✅ Intelligent allocation (high-value events get more budget)
|
|
179
|
+
- ✅ Real-time alerts (before overspending)
|
|
180
|
+
|
|
181
|
+
### Trade-offs
|
|
182
|
+
|
|
183
|
+
- ⚠️ Complexity (requires Redis for distributed state)
|
|
184
|
+
- ⚠️ Potential data loss (if budget exceeded)
|
|
185
|
+
- ⚠️ Tuning required (optimal allocation per app)
|
|
186
|
+
|
|
187
|
+
### Priority
|
|
188
|
+
|
|
189
|
+
**Low (v1.2+)**
|
|
190
|
+
|
|
191
|
+
### Alternative Approach
|
|
192
|
+
|
|
193
|
+
- Use cloud cost management tools (AWS Cost Anomaly Detection, Datadog Cost Management)
|
|
194
|
+
- Set alerts on actual billing, not event counts
|
|
195
|
+
- Simpler, but less proactive
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## 3. Additional Ideas (Placeholder)
|
|
200
|
+
|
|
201
|
+
Future ideas to be added:
|
|
202
|
+
|
|
203
|
+
- **ML-Based Anomaly Detection:** Automatically detect unusual patterns in events
|
|
204
|
+
- **Event Replay from Storage:** Replay events from cold storage for debugging
|
|
205
|
+
- **Multi-Tenant Support:** Isolate events by tenant/customer
|
|
206
|
+
- **Event Transformation Rules:** Transform events before sending to adapters
|
|
207
|
+
- **Custom Retention Policies:** More granular control over data lifecycle
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## Related Use Cases
|
|
212
|
+
|
|
213
|
+
- [UC-009: Cost Optimization](UC-015-cost-optimization.md) - Current cost optimization strategies
|
|
214
|
+
- [UC-014: Adaptive Sampling](UC-014-adaptive-sampling.md) - Current sampling approach
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## Related ADRs
|
|
219
|
+
|
|
220
|
+
- [ADR-009: Cost Optimization](../ADR-009-cost-optimization.md) - Current implementation
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
**Status:** ✅ Draft Complete
|
|
225
|
+
**Next Review:** After v1.0 release
|
|
226
|
+
**Estimated Value:** High (for enterprise customers)
|
data/e11y.gemspec
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/e11y/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "e11y"
|
|
7
|
+
spec.version = E11y::VERSION
|
|
8
|
+
spec.authors = ["Artur Seletskiy"]
|
|
9
|
+
|
|
10
|
+
spec.summary = "E11y - Easy Telemetry: Production-grade observability for Rails with zero-config SLO tracking"
|
|
11
|
+
spec.description = <<~DESC
|
|
12
|
+
E11y (Easy Telemetry) - production-ready observability gem for Ruby on Rails applications.
|
|
13
|
+
|
|
14
|
+
KEY FEATURES:
|
|
15
|
+
• 📊 Zero-Config SLO Tracking - automatic Service Level Objectives for HTTP endpoints and background jobs
|
|
16
|
+
• 🎯 Request-Scoped Debug Buffering - buffer debug logs in memory, flush only on errors (reduce log noise by 90%)
|
|
17
|
+
• 📈 Pattern-Based Metrics - auto-generate Prometheus/Yabeda metrics from business events
|
|
18
|
+
• 🔒 GDPR/SOC2 Compliance - built-in PII filtering and audit trails
|
|
19
|
+
• 🔌 Pluggable Adapters - send events to Loki, Sentry, OpenTelemetry, Elasticsearch, or custom backends
|
|
20
|
+
• 🚀 High Performance - zero-allocation event tracking, lock-free ring buffers, adaptive memory limits
|
|
21
|
+
• 🧵 Thread-Safe - designed for multi-threaded Rails apps and Sidekiq workers
|
|
22
|
+
• 🎭 Multi-Tenant Ready - trace context propagation across services with OpenTelemetry integration
|
|
23
|
+
• 📝 Type-Safe Events - declarative event schemas with dry-schema validation
|
|
24
|
+
• ⚡ Rate Limiting & Sampling - protect production from metric storms and cost overruns
|
|
25
|
+
|
|
26
|
+
Perfect for SuperApp architectures, microservices, and high-scale Rails applications.
|
|
27
|
+
Battle-tested patterns from Devise, Sidekiq, Sentry, and Yabeda.
|
|
28
|
+
DESC
|
|
29
|
+
spec.homepage = "https://github.com/arturseletskiy/e11y"
|
|
30
|
+
spec.license = "MIT"
|
|
31
|
+
spec.required_ruby_version = ">= 3.2.0"
|
|
32
|
+
|
|
33
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
34
|
+
spec.metadata["source_code_uri"] = "https://github.com/arturseletskiy/e11y"
|
|
35
|
+
spec.metadata["changelog_uri"] = "https://github.com/arturseletskiy/e11y/blob/main/CHANGELOG.md"
|
|
36
|
+
spec.metadata["documentation_uri"] = "https://github.com/arturseletskiy/blob/main/e11y/docs"
|
|
37
|
+
spec.metadata["bug_tracker_uri"] = "https://github.com/arturseletskiy/e11y/issues"
|
|
38
|
+
spec.metadata["rubygems_mfa_required"] = "true"
|
|
39
|
+
|
|
40
|
+
# Specify which files should be added to the gem when it is released.
|
|
41
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
42
|
+
spec.files = Dir.chdir(__dir__) do
|
|
43
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
|
44
|
+
(File.expand_path(f) == __FILE__) ||
|
|
45
|
+
f.start_with?(*%w[bin/ test/ spec/ features/ .git .github appveyor Gemfile docs/researches/])
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
spec.bindir = "exe"
|
|
49
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
50
|
+
spec.require_paths = ["lib"]
|
|
51
|
+
|
|
52
|
+
# Runtime dependencies
|
|
53
|
+
spec.add_dependency "activesupport", ">= 7.0"
|
|
54
|
+
spec.add_dependency "concurrent-ruby", "~> 1.2" # Thread-safe data structures
|
|
55
|
+
spec.add_dependency "dry-schema", "~> 1.13" # Event schema validation
|
|
56
|
+
spec.add_dependency "dry-types", "~> 1.7"
|
|
57
|
+
spec.add_dependency "zeitwerk", "~> 2.6"
|
|
58
|
+
|
|
59
|
+
# Development dependencies
|
|
60
|
+
spec.add_development_dependency "rack", "~> 3.0" # For Rack middleware testing
|
|
61
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
|
62
|
+
spec.add_development_dependency "rspec", "~> 3.12"
|
|
63
|
+
spec.add_development_dependency "rubocop", "~> 1.50"
|
|
64
|
+
spec.add_development_dependency "rubocop-rake", "~> 0.6"
|
|
65
|
+
spec.add_development_dependency "rubocop-rspec", "~> 2.22"
|
|
66
|
+
spec.add_development_dependency "simplecov", "~> 0.22"
|
|
67
|
+
spec.add_development_dependency "webmock", "~> 3.19" # For HTTP adapter testing
|
|
68
|
+
spec.add_development_dependency "yard", "~> 0.9"
|
|
69
|
+
|
|
70
|
+
# Optional adapter dependencies (install only if using specific adapters)
|
|
71
|
+
# LokiAdapter: gem install faraday faraday-retry
|
|
72
|
+
# SentryAdapter: gem install sentry-ruby
|
|
73
|
+
spec.add_development_dependency "faraday", "~> 2.7" # For LokiAdapter
|
|
74
|
+
spec.add_development_dependency "faraday-retry", "~> 2.2" # For LokiAdapter retry middleware
|
|
75
|
+
spec.add_development_dependency "sentry-ruby", "~> 5.15" # For SentryAdapter
|
|
76
|
+
end
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module E11y
|
|
4
|
+
module Adapters
|
|
5
|
+
# Adaptive batching helper for adapters
|
|
6
|
+
#
|
|
7
|
+
# Provides efficient event batching with automatic flushing based on:
|
|
8
|
+
# - Batch size threshold (max_size)
|
|
9
|
+
# - Time threshold (timeout)
|
|
10
|
+
# - Minimum batch size (min_size) for latency optimization
|
|
11
|
+
#
|
|
12
|
+
# Thread-safe implementation with mutex-protected buffer.
|
|
13
|
+
#
|
|
14
|
+
# @example Use in adapter
|
|
15
|
+
# class MyAdapter < E11y::Adapters::Base
|
|
16
|
+
# def initialize(config = {})
|
|
17
|
+
# super
|
|
18
|
+
# @batcher = AdaptiveBatcher.new(
|
|
19
|
+
# max_size: 500,
|
|
20
|
+
# timeout: 5.0,
|
|
21
|
+
# flush_callback: method(:send_batch)
|
|
22
|
+
# )
|
|
23
|
+
# end
|
|
24
|
+
#
|
|
25
|
+
# def write(event_data)
|
|
26
|
+
# @batcher.add(event_data)
|
|
27
|
+
# end
|
|
28
|
+
#
|
|
29
|
+
# def write_batch(events)
|
|
30
|
+
# @batcher.flush!
|
|
31
|
+
# super
|
|
32
|
+
# end
|
|
33
|
+
#
|
|
34
|
+
# def close
|
|
35
|
+
# @batcher.close
|
|
36
|
+
# super
|
|
37
|
+
# end
|
|
38
|
+
#
|
|
39
|
+
# private
|
|
40
|
+
#
|
|
41
|
+
# def send_batch(events)
|
|
42
|
+
# # Send events to external system
|
|
43
|
+
# http_client.post(events)
|
|
44
|
+
# end
|
|
45
|
+
# end
|
|
46
|
+
#
|
|
47
|
+
# @see ADR-004 Section 8.1 (Adaptive Batching)
|
|
48
|
+
class AdaptiveBatcher
|
|
49
|
+
# Initialize adaptive batcher
|
|
50
|
+
#
|
|
51
|
+
# @param min_size [Integer] Minimum batch size before timeout flush (default: 10)
|
|
52
|
+
# @param max_size [Integer] Maximum batch size (triggers immediate flush, default: 500)
|
|
53
|
+
# @param timeout [Float] Timeout in seconds for automatic flush (default: 5.0)
|
|
54
|
+
# @param flush_callback [Proc, Method] Callback to invoke on flush with events array
|
|
55
|
+
def initialize(flush_callback:, min_size: 10, max_size: 500, timeout: 5.0)
|
|
56
|
+
@min_size = min_size
|
|
57
|
+
@max_size = max_size
|
|
58
|
+
@timeout = timeout
|
|
59
|
+
@flush_callback = flush_callback
|
|
60
|
+
|
|
61
|
+
@buffer = []
|
|
62
|
+
@mutex = Mutex.new
|
|
63
|
+
@last_flush = Time.now
|
|
64
|
+
@closed = false
|
|
65
|
+
@timer_thread = nil
|
|
66
|
+
|
|
67
|
+
start_timer_thread!
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Add event to buffer
|
|
71
|
+
#
|
|
72
|
+
# Automatically flushes if max_size reached.
|
|
73
|
+
# Thread-safe operation.
|
|
74
|
+
#
|
|
75
|
+
# @param event_data [Hash] Event to add to buffer
|
|
76
|
+
# @return [Boolean] true if added successfully
|
|
77
|
+
def add(event_data)
|
|
78
|
+
return false if @closed
|
|
79
|
+
|
|
80
|
+
@mutex.synchronize do
|
|
81
|
+
@buffer << event_data
|
|
82
|
+
|
|
83
|
+
flush_unlocked! if should_flush_immediately?
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
true
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Flush buffer immediately
|
|
90
|
+
#
|
|
91
|
+
# Sends all buffered events to flush_callback.
|
|
92
|
+
# Thread-safe operation.
|
|
93
|
+
#
|
|
94
|
+
# @return [Boolean] true if flushed, false if buffer empty
|
|
95
|
+
def flush!
|
|
96
|
+
@mutex.synchronize { flush_unlocked! }
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Get current buffer size
|
|
100
|
+
#
|
|
101
|
+
# @return [Integer] Number of events in buffer
|
|
102
|
+
def buffer_size
|
|
103
|
+
@mutex.synchronize { @buffer.size }
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Check if buffer is empty
|
|
107
|
+
#
|
|
108
|
+
# @return [Boolean] true if buffer is empty
|
|
109
|
+
def empty?
|
|
110
|
+
@mutex.synchronize { @buffer.empty? }
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Close batcher and flush remaining events
|
|
114
|
+
#
|
|
115
|
+
# Stops timer thread and flushes any remaining events.
|
|
116
|
+
# Safe to call multiple times.
|
|
117
|
+
#
|
|
118
|
+
# @return [void]
|
|
119
|
+
def close
|
|
120
|
+
return if @closed
|
|
121
|
+
|
|
122
|
+
@closed = true
|
|
123
|
+
@timer_thread&.kill
|
|
124
|
+
@timer_thread = nil
|
|
125
|
+
|
|
126
|
+
flush!
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
private
|
|
130
|
+
|
|
131
|
+
# Start background timer thread for automatic flushing
|
|
132
|
+
#
|
|
133
|
+
# Timer thread checks periodically if timeout has expired
|
|
134
|
+
# and flushes buffer if min_size threshold is met.
|
|
135
|
+
#
|
|
136
|
+
# Check interval is min(timeout/2, 1 second) for responsiveness.
|
|
137
|
+
#
|
|
138
|
+
# @api private
|
|
139
|
+
def start_timer_thread!
|
|
140
|
+
check_interval = [@timeout / 2.0, 1.0].min
|
|
141
|
+
|
|
142
|
+
@timer_thread = Thread.new do
|
|
143
|
+
loop do
|
|
144
|
+
sleep check_interval
|
|
145
|
+
|
|
146
|
+
break if @closed
|
|
147
|
+
|
|
148
|
+
@mutex.synchronize do
|
|
149
|
+
flush_unlocked! if should_flush_timeout?
|
|
150
|
+
rescue StandardError => e
|
|
151
|
+
warn "[E11y] AdaptiveBatcher timer error: #{e.message}"
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
@timer_thread.name = "e11y-adaptive-batcher-timer"
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Flush buffer (unlocked - must be called within mutex.synchronize)
|
|
160
|
+
#
|
|
161
|
+
# @return [Boolean] true if flushed, false if buffer empty
|
|
162
|
+
# @api private
|
|
163
|
+
def flush_unlocked!
|
|
164
|
+
return false if @buffer.empty?
|
|
165
|
+
|
|
166
|
+
events = @buffer.dup
|
|
167
|
+
@buffer.clear
|
|
168
|
+
@last_flush = Time.now
|
|
169
|
+
|
|
170
|
+
# Release mutex before I/O operation
|
|
171
|
+
@mutex.unlock
|
|
172
|
+
begin
|
|
173
|
+
@flush_callback.call(events)
|
|
174
|
+
true
|
|
175
|
+
ensure
|
|
176
|
+
@mutex.lock
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Check if should flush immediately (max_size reached)
|
|
181
|
+
#
|
|
182
|
+
# @return [Boolean]
|
|
183
|
+
# @api private
|
|
184
|
+
def should_flush_immediately?
|
|
185
|
+
@buffer.size >= @max_size
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Check if should flush on timeout
|
|
189
|
+
#
|
|
190
|
+
# @return [Boolean]
|
|
191
|
+
# @api private
|
|
192
|
+
def should_flush_timeout?
|
|
193
|
+
return false if @buffer.empty?
|
|
194
|
+
|
|
195
|
+
timeout_expired? && @buffer.size >= @min_size
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Check if timeout has expired since last flush
|
|
199
|
+
#
|
|
200
|
+
# @return [Boolean]
|
|
201
|
+
# @api private
|
|
202
|
+
def timeout_expired?
|
|
203
|
+
(Time.now - @last_flush) >= @timeout
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|