e11y 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -0
- data/.rubocop.yml +20 -0
- data/CHANGELOG.md +151 -13
- data/README.md +1138 -104
- data/RELEASE.md +254 -0
- data/Rakefile +377 -0
- data/benchmarks/OPTIMIZATION.md +246 -0
- data/benchmarks/README.md +103 -0
- data/benchmarks/allocation_profiling.rb +253 -0
- data/benchmarks/e11y_benchmarks.rb +447 -0
- data/benchmarks/ruby_baseline_allocations.rb +175 -0
- data/benchmarks/run_all.rb +9 -21
- data/docs/00-ICP-AND-TIMELINE.md +2 -2
- data/docs/ADR-001-architecture.md +1 -1
- data/docs/ADR-004-adapter-architecture.md +247 -0
- data/docs/ADR-009-cost-optimization.md +231 -115
- data/docs/ADR-017-multi-rails-compatibility.md +103 -0
- data/docs/ADR-INDEX.md +99 -0
- data/docs/CONTRIBUTING.md +312 -0
- data/docs/IMPLEMENTATION_PLAN.md +1 -1
- data/docs/QUICK-START.md +0 -6
- data/docs/use_cases/UC-019-retention-based-routing.md +584 -0
- data/e11y.gemspec +28 -17
- data/lib/e11y/adapters/adaptive_batcher.rb +3 -0
- data/lib/e11y/adapters/audit_encrypted.rb +10 -4
- data/lib/e11y/adapters/base.rb +15 -0
- data/lib/e11y/adapters/file.rb +4 -1
- data/lib/e11y/adapters/in_memory.rb +6 -0
- data/lib/e11y/adapters/loki.rb +9 -0
- data/lib/e11y/adapters/otel_logs.rb +11 -9
- data/lib/e11y/adapters/sentry.rb +9 -0
- data/lib/e11y/adapters/yabeda.rb +54 -10
- data/lib/e11y/buffers.rb +8 -8
- data/lib/e11y/console.rb +52 -60
- data/lib/e11y/event/base.rb +75 -10
- data/lib/e11y/event/value_sampling_config.rb +10 -4
- data/lib/e11y/events/rails/http/request.rb +1 -1
- data/lib/e11y/instruments/active_job.rb +6 -3
- data/lib/e11y/instruments/rails_instrumentation.rb +51 -28
- data/lib/e11y/instruments/sidekiq.rb +7 -7
- data/lib/e11y/logger/bridge.rb +24 -54
- data/lib/e11y/metrics/cardinality_protection.rb +257 -12
- data/lib/e11y/metrics/cardinality_tracker.rb +17 -0
- data/lib/e11y/metrics/registry.rb +6 -2
- data/lib/e11y/metrics/relabeling.rb +0 -56
- data/lib/e11y/metrics.rb +6 -1
- data/lib/e11y/middleware/audit_signing.rb +12 -9
- data/lib/e11y/middleware/pii_filter.rb +18 -10
- data/lib/e11y/middleware/request.rb +10 -4
- data/lib/e11y/middleware/routing.rb +117 -90
- data/lib/e11y/middleware/sampling.rb +47 -28
- data/lib/e11y/middleware/trace_context.rb +40 -11
- data/lib/e11y/middleware/validation.rb +20 -2
- data/lib/e11y/middleware/versioning.rb +1 -1
- data/lib/e11y/pii.rb +7 -7
- data/lib/e11y/railtie.rb +24 -20
- data/lib/e11y/reliability/circuit_breaker.rb +3 -0
- data/lib/e11y/reliability/dlq/file_storage.rb +16 -5
- data/lib/e11y/reliability/dlq/filter.rb +3 -0
- data/lib/e11y/reliability/retry_handler.rb +4 -0
- data/lib/e11y/sampling/error_spike_detector.rb +16 -5
- data/lib/e11y/sampling/load_monitor.rb +13 -4
- data/lib/e11y/self_monitoring/reliability_monitor.rb +3 -0
- data/lib/e11y/version.rb +1 -1
- data/lib/e11y.rb +86 -9
- metadata +83 -38
- data/docs/use_cases/UC-019-tiered-storage-migration.md +0 -562
- data/lib/e11y/middleware/pii_filtering.rb +0 -280
- data/lib/e11y/middleware/slo.rb +0 -168
|
@@ -0,0 +1,584 @@
|
|
|
1
|
+
# UC-019: Retention-Based Event Routing
|
|
2
|
+
|
|
3
|
+
**Status:** ✅ Proposed (Phase 5 Extension, 2026-01-21)
|
|
4
|
+
**Complexity:** Medium (Event DSL + Routing Middleware)
|
|
5
|
+
**Setup Time:** 30 minutes (DSL + Config + Tests)
|
|
6
|
+
**Target Users:** Platform Engineers, DevOps, Compliance Teams, Cost Optimization
|
|
7
|
+
|
|
8
|
+
**Related:**
|
|
9
|
+
- ADR-004 §14 (Retention-Based Routing)
|
|
10
|
+
- ADR-009 §6 (Cost Optimization)
|
|
11
|
+
- UC-015 (Cost Optimization)
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## 📋 Overview
|
|
16
|
+
|
|
17
|
+
### Problem Statement
|
|
18
|
+
|
|
19
|
+
**Current Pain Points:**
|
|
20
|
+
|
|
21
|
+
1. **Manual adapter selection per event**
|
|
22
|
+
```ruby
|
|
23
|
+
# ❌ Developer must remember correct adapter
|
|
24
|
+
class DebugEvent < E11y::Event::Base
|
|
25
|
+
adapters :loki # Expensive! Stores debug for 30 days
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
class AuditEvent < E11y::Event::Base
|
|
29
|
+
adapters :audit_encrypted # Correct but easy to forget
|
|
30
|
+
end
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
2. **No cost optimization**
|
|
34
|
+
- Debug logs stored in expensive Loki (30 days): $500/month
|
|
35
|
+
- Audit logs might go to wrong storage (compliance risk)
|
|
36
|
+
- No automatic tiering based on retention needs
|
|
37
|
+
|
|
38
|
+
3. **No compliance enforcement**
|
|
39
|
+
- Audit events can accidentally go to short-term storage
|
|
40
|
+
- No guarantee of retention policy adherence
|
|
41
|
+
|
|
42
|
+
### E11y Solution: Declarative Retention + Lambda Routing
|
|
43
|
+
|
|
44
|
+
**Declarative Intent:**
|
|
45
|
+
```ruby
|
|
46
|
+
class DebugEvent < E11y::Event::Base
|
|
47
|
+
retention_period 7.days # ← Declare intent
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class AuditEvent < E11y::Event::Base
|
|
51
|
+
audit_event true
|
|
52
|
+
retention_period 7.years # ← Declare intent
|
|
53
|
+
end
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Centralized Routing:**
|
|
57
|
+
```ruby
|
|
58
|
+
E11y.configure do |config|
|
|
59
|
+
config.routing_rules = [
|
|
60
|
+
->(event) { :audit_encrypted if event[:audit_event] },
|
|
61
|
+
->(event) {
|
|
62
|
+
days = (Time.parse(event[:retention_until]) - Time.now) / 86400
|
|
63
|
+
days > 90 ? :s3_glacier : :loki
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
end
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Result:**
|
|
70
|
+
- ✅ **80-97% cost savings** (automatic tiered routing)
|
|
71
|
+
- ✅ **Compliance enforcement** (audit → encrypted storage)
|
|
72
|
+
- ✅ **Developer experience** (declare intent, routing handles rest)
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## 🎯 Use Case Scenarios
|
|
77
|
+
|
|
78
|
+
### Scenario 1: Debug Logs (Short Retention)
|
|
79
|
+
|
|
80
|
+
**Context:** High-volume debug logs for troubleshooting (7 days retention)
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
# Event definition
|
|
84
|
+
class DebugQueryEvent < E11y::Event::Base
|
|
85
|
+
retention_period 7.days # Short retention
|
|
86
|
+
|
|
87
|
+
schema do
|
|
88
|
+
required(:query).filled(:string)
|
|
89
|
+
required(:duration_ms).filled(:integer)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Configuration
|
|
94
|
+
E11y.configure do |config|
|
|
95
|
+
config.routing_rules = [
|
|
96
|
+
->(event) {
|
|
97
|
+
days = (Time.parse(event[:retention_until]) - Time.now) / 86400
|
|
98
|
+
:stdout if days <= 7 # Short retention → stdout (free!)
|
|
99
|
+
}
|
|
100
|
+
]
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Usage
|
|
104
|
+
DebugQueryEvent.track(query: "SELECT...", duration_ms: 123)
|
|
105
|
+
# ↓
|
|
106
|
+
# retention_until: "2026-01-28T10:30:00Z" (7 days from now)
|
|
107
|
+
# ↓
|
|
108
|
+
# Routing: days = 7 → :stdout adapter
|
|
109
|
+
# ↓
|
|
110
|
+
# Event printed to console (free storage!)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**Cost Impact:**
|
|
114
|
+
- **Before:** Loki storage (30 days) = $500/month
|
|
115
|
+
- **After:** Stdout (7 days) = $0/month
|
|
116
|
+
- **Savings:** 100% ($500/month)
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
### Scenario 2: Audit Events (Long Retention, Compliance)
|
|
121
|
+
|
|
122
|
+
**Context:** User deletion audit trail (7 years GDPR requirement)
|
|
123
|
+
|
|
124
|
+
```ruby
|
|
125
|
+
# Event definition
|
|
126
|
+
class UserDeletedEvent < E11y::Event::Base
|
|
127
|
+
audit_event true
|
|
128
|
+
retention_period 7.years # GDPR compliance
|
|
129
|
+
|
|
130
|
+
schema do
|
|
131
|
+
required(:user_id).filled(:integer)
|
|
132
|
+
required(:deleted_by).filled(:integer)
|
|
133
|
+
required(:reason).filled(:string)
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Configuration
|
|
138
|
+
E11y.configure do |config|
|
|
139
|
+
config.routing_rules = [
|
|
140
|
+
# Priority 1: Audit events always to encrypted storage
|
|
141
|
+
->(event) { :audit_encrypted if event[:audit_event] },
|
|
142
|
+
|
|
143
|
+
# Priority 2: Long retention to cold storage
|
|
144
|
+
->(event) {
|
|
145
|
+
days = (Time.parse(event[:retention_until]) - Time.now) / 86400
|
|
146
|
+
:s3_glacier if days > 90 && !event[:audit_event]
|
|
147
|
+
}
|
|
148
|
+
]
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Usage
|
|
152
|
+
UserDeletedEvent.track(
|
|
153
|
+
user_id: 123,
|
|
154
|
+
deleted_by: 456,
|
|
155
|
+
reason: "GDPR right to be forgotten"
|
|
156
|
+
)
|
|
157
|
+
# ↓
|
|
158
|
+
# audit_event: true
|
|
159
|
+
# retention_until: "2033-01-21T10:30:00Z" (7 years from now)
|
|
160
|
+
# ↓
|
|
161
|
+
# Routing: audit_event=true → :audit_encrypted adapter
|
|
162
|
+
# ↓
|
|
163
|
+
# Event written to encrypted, tamper-proof storage
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Compliance Guarantees:**
|
|
167
|
+
- ✅ **Automatic routing** → audit events can't go to wrong storage
|
|
168
|
+
- ✅ **Encrypted storage** → GDPR/SOX compliant
|
|
169
|
+
- ✅ **7-year retention** → legal requirement met
|
|
170
|
+
- ✅ **Immutable** → audit trail tamper-proof
|
|
171
|
+
|
|
172
|
+
**Cost Impact:**
|
|
173
|
+
- **Before:** Loki storage (30 days, then manual S3) = $5000/month
|
|
174
|
+
- **After:** Audit-encrypted + S3 Glacier (automatic) = $50/month
|
|
175
|
+
- **Savings:** 99% ($4950/month)
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
### Scenario 3: Business Events (Medium Retention)
|
|
180
|
+
|
|
181
|
+
**Context:** Order placement events (90 days for analytics)
|
|
182
|
+
|
|
183
|
+
```ruby
|
|
184
|
+
# Event definition
|
|
185
|
+
class OrderPlacedEvent < E11y::Event::Base
|
|
186
|
+
retention_period 90.days # Business analytics
|
|
187
|
+
|
|
188
|
+
schema do
|
|
189
|
+
required(:order_id).filled(:string)
|
|
190
|
+
required(:amount).filled(:integer)
|
|
191
|
+
required(:currency).filled(:string)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Configuration
|
|
196
|
+
E11y.configure do |config|
|
|
197
|
+
config.routing_rules = [
|
|
198
|
+
->(event) {
|
|
199
|
+
days = (Time.parse(event[:retention_until]) - Time.now) / 86400
|
|
200
|
+
case days
|
|
201
|
+
when 0..30 then :loki # Hot storage
|
|
202
|
+
when 31..90 then :s3_standard # Warm storage
|
|
203
|
+
else :s3_glacier # Cold storage
|
|
204
|
+
end
|
|
205
|
+
}
|
|
206
|
+
]
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Usage
|
|
210
|
+
OrderPlacedEvent.track(
|
|
211
|
+
order_id: "ORD-123",
|
|
212
|
+
amount: 10000,
|
|
213
|
+
currency: "USD"
|
|
214
|
+
)
|
|
215
|
+
# ↓
|
|
216
|
+
# retention_until: "2026-04-21T10:30:00Z" (90 days from now)
|
|
217
|
+
# ↓
|
|
218
|
+
# Routing: days = 90 → :s3_standard adapter (warm storage)
|
|
219
|
+
# ↓
|
|
220
|
+
# Event written to S3 Standard (cost-optimized)
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**Cost Impact:**
|
|
224
|
+
- **Before:** Loki only = $200/month
|
|
225
|
+
- **After:** Loki (30d) + S3 Standard (60d) = $120/month
|
|
226
|
+
- **Savings:** 40% ($80/month)
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
### Scenario 4: Error Events (Multiple Destinations)
|
|
231
|
+
|
|
232
|
+
**Context:** Payment failures (90 days + Sentry for alerting)
|
|
233
|
+
|
|
234
|
+
```ruby
|
|
235
|
+
# Event definition
|
|
236
|
+
class PaymentFailedEvent < E11y::Event::Base
|
|
237
|
+
retention_period 90.days
|
|
238
|
+
severity :error
|
|
239
|
+
|
|
240
|
+
schema do
|
|
241
|
+
required(:order_id).filled(:string)
|
|
242
|
+
required(:error_message).filled(:string)
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Configuration
|
|
247
|
+
E11y.configure do |config|
|
|
248
|
+
config.routing_rules = [
|
|
249
|
+
# Rule 1: Errors always to Sentry
|
|
250
|
+
->(event) { :sentry if event[:severity] == :error },
|
|
251
|
+
|
|
252
|
+
# Rule 2: Retention-based storage
|
|
253
|
+
->(event) {
|
|
254
|
+
days = (Time.parse(event[:retention_until]) - Time.now) / 86400
|
|
255
|
+
days > 30 ? :s3_standard : :loki
|
|
256
|
+
}
|
|
257
|
+
]
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Usage
|
|
261
|
+
PaymentFailedEvent.track(
|
|
262
|
+
order_id: "ORD-456",
|
|
263
|
+
error_message: "Card declined"
|
|
264
|
+
)
|
|
265
|
+
# ↓
|
|
266
|
+
# retention_until: "2026-04-21T10:30:00Z" (90 days)
|
|
267
|
+
# severity: :error
|
|
268
|
+
# ↓
|
|
269
|
+
# Routing:
|
|
270
|
+
# Rule 1: :sentry (error alerting)
|
|
271
|
+
# Rule 2: :s3_standard (90 days storage)
|
|
272
|
+
# ↓
|
|
273
|
+
# Event written to BOTH adapters
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
**Benefits:**
|
|
277
|
+
- ✅ **Alerting:** Sentry catches errors immediately
|
|
278
|
+
- ✅ **Storage:** S3 Standard for 90-day retention
|
|
279
|
+
- ✅ **Cost:** No duplicate Loki storage ($100/month savings)
|
|
280
|
+
|
|
281
|
+
---
|
|
282
|
+
|
|
283
|
+
### Scenario 5: Explicit Adapters (Bypass Routing)
|
|
284
|
+
|
|
285
|
+
**Context:** Critical payment events requiring dual storage
|
|
286
|
+
|
|
287
|
+
```ruby
|
|
288
|
+
# Event definition
|
|
289
|
+
class CriticalPaymentEvent < E11y::Event::Base
|
|
290
|
+
retention_period 2.years
|
|
291
|
+
adapters :audit_encrypted, :loki # ← Explicit adapters bypass routing
|
|
292
|
+
|
|
293
|
+
schema do
|
|
294
|
+
required(:amount).filled(:integer)
|
|
295
|
+
required(:user_id).filled(:integer)
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
# Usage
|
|
300
|
+
CriticalPaymentEvent.track(amount: 100000, user_id: 789)
|
|
301
|
+
# ↓
|
|
302
|
+
# adapters: [:audit_encrypted, :loki] # ← Explicit
|
|
303
|
+
# retention_until: "2028-01-21T10:30:00Z" (2 years)
|
|
304
|
+
# ↓
|
|
305
|
+
# Routing: BYPASSED (explicit adapters have priority)
|
|
306
|
+
# ↓
|
|
307
|
+
# Event written to :audit_encrypted AND :loki
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
**Use Cases for Explicit Adapters:**
|
|
311
|
+
- ✅ High-value transactions (dual storage for redundancy)
|
|
312
|
+
- ✅ Legacy events (gradual migration from old adapters)
|
|
313
|
+
- ✅ Custom requirements (override default routing)
|
|
314
|
+
|
|
315
|
+
---
|
|
316
|
+
|
|
317
|
+
## 🏗️ Architecture
|
|
318
|
+
|
|
319
|
+
### Data Flow
|
|
320
|
+
|
|
321
|
+
```
|
|
322
|
+
┌────────────────────┐
|
|
323
|
+
│ Event Class │
|
|
324
|
+
│ │
|
|
325
|
+
│ retention_period │───┐
|
|
326
|
+
│ 30.days │ │
|
|
327
|
+
└────────────────────┘ │ Calculate retention_until
|
|
328
|
+
│ at track() time
|
|
329
|
+
▼
|
|
330
|
+
┌─────────────────────────────────┐
|
|
331
|
+
│ Event Instance (Hash) │
|
|
332
|
+
│ │
|
|
333
|
+
│ { │
|
|
334
|
+
│ event_name: "order.placed", │
|
|
335
|
+
│ retention_until: "2026-02-20"│◄─── Auto-calculated
|
|
336
|
+
│ audit_event: false, │
|
|
337
|
+
│ severity: :info │
|
|
338
|
+
│ } │
|
|
339
|
+
└─────────────────────────────────┘
|
|
340
|
+
│
|
|
341
|
+
│ Pipeline
|
|
342
|
+
▼
|
|
343
|
+
┌─────────────────────────────────┐
|
|
344
|
+
│ Routing Middleware │
|
|
345
|
+
│ │
|
|
346
|
+
│ Apply routing rules: │
|
|
347
|
+
│ - Rule 1: audit → encrypted │
|
|
348
|
+
│ - Rule 2: >90d → cold storage │
|
|
349
|
+
│ - Rule 3: <30d → hot storage │
|
|
350
|
+
└─────────────────────────────────┘
|
|
351
|
+
│
|
|
352
|
+
▼
|
|
353
|
+
┌───────┴───────┐
|
|
354
|
+
│ │
|
|
355
|
+
┌─────▼─────┐ ┌─────▼─────┐
|
|
356
|
+
│ Adapter │ │ Adapter │
|
|
357
|
+
│ Loki │ │ S3 │
|
|
358
|
+
└───────────┘ └───────────┘
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
### Component Responsibilities
|
|
362
|
+
|
|
363
|
+
| Component | Responsibility |
|
|
364
|
+
|-----------|----------------|
|
|
365
|
+
| **Event::Base** | Declare `retention_period`, calculate `retention_until` |
|
|
366
|
+
| **Configuration** | Define `routing_rules` (lambdas), `default_retention_period` |
|
|
367
|
+
| **Routing Middleware** | Apply rules, select adapters, write events |
|
|
368
|
+
| **Adapters** | Write events to storage (Loki, S3, Sentry, etc.) |
|
|
369
|
+
|
|
370
|
+
---
|
|
371
|
+
|
|
372
|
+
## 🛠️ Implementation Guide
|
|
373
|
+
|
|
374
|
+
### Step 1: Add retention_period to Events
|
|
375
|
+
|
|
376
|
+
```ruby
|
|
377
|
+
# app/events/order_placed_event.rb
|
|
378
|
+
class OrderPlacedEvent < E11y::Event::Base
|
|
379
|
+
retention_period 90.days # ← NEW!
|
|
380
|
+
|
|
381
|
+
schema do
|
|
382
|
+
required(:order_id).filled(:string)
|
|
383
|
+
required(:amount).filled(:integer)
|
|
384
|
+
end
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
# app/events/audit/user_deleted_event.rb
|
|
388
|
+
class Audit::UserDeletedEvent < E11y::Event::Base
|
|
389
|
+
audit_event true
|
|
390
|
+
retention_period 7.years # ← NEW!
|
|
391
|
+
|
|
392
|
+
schema do
|
|
393
|
+
required(:user_id).filled(:integer)
|
|
394
|
+
required(:deleted_by).filled(:integer)
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
### Step 2: Configure Routing Rules
|
|
400
|
+
|
|
401
|
+
```ruby
|
|
402
|
+
# config/initializers/e11y.rb
|
|
403
|
+
E11y.configure do |config|
|
|
404
|
+
# Default retention (fallback)
|
|
405
|
+
config.default_retention_period = 30.days
|
|
406
|
+
|
|
407
|
+
# Routing rules (evaluated in order)
|
|
408
|
+
config.routing_rules = [
|
|
409
|
+
# Priority 1: Audit events → encrypted storage
|
|
410
|
+
->(event) {
|
|
411
|
+
:audit_encrypted if event[:audit_event]
|
|
412
|
+
},
|
|
413
|
+
|
|
414
|
+
# Priority 2: Errors → Sentry + storage
|
|
415
|
+
->(event) {
|
|
416
|
+
[:sentry, :loki] if event[:severity] == :error
|
|
417
|
+
},
|
|
418
|
+
|
|
419
|
+
# Priority 3: Retention-based tiering
|
|
420
|
+
->(event) {
|
|
421
|
+
days = (Time.parse(event[:retention_until]) - Time.now) / 86400
|
|
422
|
+
case days
|
|
423
|
+
when 0..7 then :stdout # Very short → console
|
|
424
|
+
when 8..30 then :loki # Short → hot storage
|
|
425
|
+
when 31..90 then :s3_standard # Medium → warm storage
|
|
426
|
+
else :s3_glacier # Long → cold storage
|
|
427
|
+
end
|
|
428
|
+
}
|
|
429
|
+
]
|
|
430
|
+
|
|
431
|
+
# Fallback if no rule matches
|
|
432
|
+
config.fallback_adapters = [:stdout]
|
|
433
|
+
|
|
434
|
+
# Register adapters
|
|
435
|
+
config.add_adapter :loki, E11y::Adapters::Loki.new(...)
|
|
436
|
+
config.add_adapter :s3_standard, E11y::Adapters::File.new(path: 's3://bucket/warm/')
|
|
437
|
+
config.add_adapter :s3_glacier, E11y::Adapters::File.new(path: 's3://bucket/cold/')
|
|
438
|
+
config.add_adapter :audit_encrypted, E11y::Adapters::AuditEncrypted.new(...)
|
|
439
|
+
config.add_adapter :sentry, E11y::Adapters::Sentry.new(...)
|
|
440
|
+
end
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
### Step 3: Test Routing
|
|
444
|
+
|
|
445
|
+
```ruby
|
|
446
|
+
# spec/e11y/routing_spec.rb
|
|
447
|
+
RSpec.describe "Retention-based routing" do
|
|
448
|
+
it "routes debug events to stdout" do
|
|
449
|
+
event = DebugEvent.track(query: "SELECT...")
|
|
450
|
+
|
|
451
|
+
expect(event[:retention_until]).to eq(7.days.from_now.iso8601)
|
|
452
|
+
expect(E11y.configuration.adapters[:stdout]).to have_received(:write)
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
it "routes audit events to encrypted storage" do
|
|
456
|
+
event = UserDeletedEvent.track(user_id: 123, deleted_by: 456)
|
|
457
|
+
|
|
458
|
+
expect(event[:retention_until]).to eq(7.years.from_now.iso8601)
|
|
459
|
+
expect(event[:audit_event]).to be true
|
|
460
|
+
expect(E11y.configuration.adapters[:audit_encrypted]).to have_received(:write)
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
it "routes long retention to cold storage" do
|
|
464
|
+
event = BusinessEvent.track(data: "...")
|
|
465
|
+
allow(event).to receive(:[]).with(:retention_until).and_return(365.days.from_now.iso8601)
|
|
466
|
+
|
|
467
|
+
expect(E11y.configuration.adapters[:s3_glacier]).to have_received(:write)
|
|
468
|
+
end
|
|
469
|
+
end
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
---
|
|
473
|
+
|
|
474
|
+
## 📊 Cost Comparison
|
|
475
|
+
|
|
476
|
+
### Before: Manual Adapter Selection
|
|
477
|
+
|
|
478
|
+
```ruby
|
|
479
|
+
class DebugEvent < E11y::Event::Base
|
|
480
|
+
adapters :loki # Expensive!
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
class AuditEvent < E11y::Event::Base
|
|
484
|
+
adapters :audit_encrypted # Manual
|
|
485
|
+
end
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
**Monthly Costs:**
|
|
489
|
+
- Debug logs (7d, but stored 30d in Loki): **$500**
|
|
490
|
+
- Business events (90d in Loki): **$200**
|
|
491
|
+
- Audit logs (7y in Loki): **$5000**
|
|
492
|
+
- **Total: $5,700/month**
|
|
493
|
+
|
|
494
|
+
### After: Retention-Based Routing
|
|
495
|
+
|
|
496
|
+
```ruby
|
|
497
|
+
class DebugEvent < E11y::Event::Base
|
|
498
|
+
retention_period 7.days # Automatic routing
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
class AuditEvent < E11y::Event::Base
|
|
502
|
+
audit_event true
|
|
503
|
+
retention_period 7.years # Automatic routing
|
|
504
|
+
end
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
**Monthly Costs:**
|
|
508
|
+
- Debug logs (7d in stdout): **$0** ✅
|
|
509
|
+
- Business events (30d Loki + 60d S3): **$120** ✅
|
|
510
|
+
- Audit logs (7y S3 Glacier): **$50** ✅
|
|
511
|
+
- **Total: $170/month**
|
|
512
|
+
|
|
513
|
+
**Savings: 97% ($5,530/month)**
|
|
514
|
+
|
|
515
|
+
---
|
|
516
|
+
|
|
517
|
+
## ✅ Benefits
|
|
518
|
+
|
|
519
|
+
| Benefit | Impact |
|
|
520
|
+
|---------|--------|
|
|
521
|
+
| **Cost Optimization** | 80-97% savings via automatic tiered routing |
|
|
522
|
+
| **Compliance** | Audit events guaranteed in encrypted storage |
|
|
523
|
+
| **Developer Experience** | Declare intent (`retention_period`), routing handles rest |
|
|
524
|
+
| **Flexibility** | Lambda rules allow complex business logic |
|
|
525
|
+
| **Maintainability** | Centralized routing config (not per-event) |
|
|
526
|
+
| **Testing** | Test routing rules once, not per event |
|
|
527
|
+
|
|
528
|
+
---
|
|
529
|
+
|
|
530
|
+
## 🚀 Migration Strategy
|
|
531
|
+
|
|
532
|
+
### Phase 1: Add DSL (Backward Compatible)
|
|
533
|
+
|
|
534
|
+
```ruby
|
|
535
|
+
# Existing events work without changes (use default 30 days)
|
|
536
|
+
class OrderEvent < E11y::Event::Base
|
|
537
|
+
# No changes needed
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
# New events can specify retention
|
|
541
|
+
class AuditEvent < E11y::Event::Base
|
|
542
|
+
retention_period 7.years
|
|
543
|
+
end
|
|
544
|
+
```
|
|
545
|
+
|
|
546
|
+
### Phase 2: Enable Routing
|
|
547
|
+
|
|
548
|
+
```ruby
|
|
549
|
+
# Add routing rules to config
|
|
550
|
+
E11y.configure do |config|
|
|
551
|
+
config.routing_rules = [...]
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
# Explicit adapters still work (bypass routing)
|
|
555
|
+
class LegacyEvent < E11y::Event::Base
|
|
556
|
+
adapters :loki # Still works!
|
|
557
|
+
end
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
### Phase 3: Gradual Migration
|
|
561
|
+
|
|
562
|
+
```ruby
|
|
563
|
+
# Update events one by one
|
|
564
|
+
class OrderEvent < E11y::Event::Base
|
|
565
|
+
retention_period 90.days # Now uses routing!
|
|
566
|
+
# Remove: adapters :loki (no longer needed)
|
|
567
|
+
end
|
|
568
|
+
```
|
|
569
|
+
|
|
570
|
+
---
|
|
571
|
+
|
|
572
|
+
## 🎯 Success Criteria
|
|
573
|
+
|
|
574
|
+
- ✅ **100% of audit events** go to `audit_encrypted` adapter
|
|
575
|
+
- ✅ **Debug logs** (7d retention) → stdout (free)
|
|
576
|
+
- ✅ **Business events** (90d retention) → tiered storage (Loki + S3)
|
|
577
|
+
- ✅ **Cost reduction** of 80%+ compared to manual adapter selection
|
|
578
|
+
- ✅ **Zero manual intervention** (routing is automatic)
|
|
579
|
+
|
|
580
|
+
---
|
|
581
|
+
|
|
582
|
+
**Status:** ✅ Ready for Implementation (2026-01-21)
|
|
583
|
+
**Estimated Effort:** 1 week (Event DSL + Routing Middleware + Tests)
|
|
584
|
+
**Cost Impact:** 80-97% savings on storage costs
|
data/e11y.gemspec
CHANGED
|
@@ -7,24 +7,33 @@ Gem::Specification.new do |spec|
|
|
|
7
7
|
spec.version = E11y::VERSION
|
|
8
8
|
spec.authors = ["Artur Seletskiy"]
|
|
9
9
|
|
|
10
|
-
spec.summary = "E11y - Easy Telemetry:
|
|
10
|
+
spec.summary = "E11y - Easy Telemetry: Observability for Rails developers who hate noise"
|
|
11
11
|
spec.description = <<~DESC
|
|
12
|
-
E11y (Easy Telemetry) -
|
|
12
|
+
E11y (Easy Telemetry) - Observability for Rails developers who hate noise.
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
•
|
|
16
|
-
•
|
|
17
|
-
•
|
|
18
|
-
• 🔒 GDPR/SOC2 Compliance - built-in PII filtering and audit trails
|
|
19
|
-
• 🔌 Pluggable Adapters - send events to Loki, Sentry, OpenTelemetry, Elasticsearch, or custom backends
|
|
20
|
-
• 🚀 High Performance - zero-allocation event tracking, lock-free ring buffers, adaptive memory limits
|
|
21
|
-
• 🧵 Thread-Safe - designed for multi-threaded Rails apps and Sidekiq workers
|
|
22
|
-
• 🎭 Multi-Tenant Ready - trace context propagation across services with OpenTelemetry integration
|
|
23
|
-
• 📝 Type-Safe Events - declarative event schemas with dry-schema validation
|
|
24
|
-
• ⚡ Rate Limiting & Sampling - protect production from metric storms and cost overruns
|
|
14
|
+
UNIQUE FEATURES:
|
|
15
|
+
• Request-scoped debug buffering - buffers debug logs in memory, flushes ONLY on errors
|
|
16
|
+
• Zero-config SLO tracking - automatic Service Level Objectives for HTTP endpoints and jobs
|
|
17
|
+
• Schema-validated events - catch bugs before production with dry-schema
|
|
25
18
|
|
|
26
|
-
|
|
27
|
-
|
|
19
|
+
DEVELOPER EXPERIENCE:
|
|
20
|
+
• 5-minute setup (not 2-week migration)
|
|
21
|
+
• Auto-metrics from events (no manual Yabeda.increment)
|
|
22
|
+
• Rails-first design (follows Rails conventions)
|
|
23
|
+
• Pluggable adapters (Loki, Sentry, OpenTelemetry, custom backends)
|
|
24
|
+
|
|
25
|
+
COST SAVINGS:
|
|
26
|
+
• Reduce log storage costs by 90% (request-scoped buffering)
|
|
27
|
+
• Replace expensive APM SaaS ($500-5k/month → infra costs only)
|
|
28
|
+
• Own your observability data (no vendor lock-in)
|
|
29
|
+
|
|
30
|
+
PRODUCTION-READY:
|
|
31
|
+
• Thread-safe for multi-threaded Rails + Sidekiq
|
|
32
|
+
• Adaptive sampling (error-based, load-based, value-based)
|
|
33
|
+
• PII filtering (GDPR-compliant masking/hashing)
|
|
34
|
+
• Performance optimized (hash-based events, minimal allocations)
|
|
35
|
+
|
|
36
|
+
Perfect for Rails 7.0+ teams who need observability without complexity or high costs.
|
|
28
37
|
DESC
|
|
29
38
|
spec.homepage = "https://github.com/arturseletskiy/e11y"
|
|
30
39
|
spec.license = "MIT"
|
|
@@ -50,14 +59,16 @@ Gem::Specification.new do |spec|
|
|
|
50
59
|
spec.require_paths = ["lib"]
|
|
51
60
|
|
|
52
61
|
# Runtime dependencies
|
|
53
|
-
spec.add_dependency "activesupport", ">= 7.0"
|
|
54
62
|
spec.add_dependency "concurrent-ruby", "~> 1.2" # Thread-safe data structures
|
|
55
63
|
spec.add_dependency "dry-schema", "~> 1.13" # Event schema validation
|
|
56
64
|
spec.add_dependency "dry-types", "~> 1.7"
|
|
65
|
+
spec.add_dependency "rails", ">= 7.0"
|
|
57
66
|
spec.add_dependency "zeitwerk", "~> 2.6"
|
|
58
67
|
|
|
59
68
|
# Development dependencies
|
|
60
|
-
spec.add_development_dependency "
|
|
69
|
+
spec.add_development_dependency "benchmark-ips", "~> 2.13" # For performance benchmarks
|
|
70
|
+
spec.add_development_dependency "memory_profiler", "~> 1.0" # For memory profiling
|
|
71
|
+
spec.add_development_dependency "rack", ">= 2.2.4" # For Rack middleware testing (supports Rails 7.0+)
|
|
61
72
|
spec.add_development_dependency "rake", "~> 13.0"
|
|
62
73
|
spec.add_development_dependency "rspec", "~> 3.12"
|
|
63
74
|
spec.add_development_dependency "rubocop", "~> 1.50"
|
|
@@ -74,6 +74,8 @@ module E11y
|
|
|
74
74
|
#
|
|
75
75
|
# @param event_data [Hash] Event to add to buffer
|
|
76
76
|
# @return [Boolean] true if added successfully
|
|
77
|
+
# rubocop:disable Naming/PredicateMethod
|
|
78
|
+
# This is an action method (add to buffer), not a predicate (is added?)
|
|
77
79
|
def add(event_data)
|
|
78
80
|
return false if @closed
|
|
79
81
|
|
|
@@ -85,6 +87,7 @@ module E11y
|
|
|
85
87
|
|
|
86
88
|
true
|
|
87
89
|
end
|
|
90
|
+
# rubocop:enable Naming/PredicateMethod
|
|
88
91
|
|
|
89
92
|
# Flush buffer immediately
|
|
90
93
|
#
|