e11y 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +4 -0
- data/.rubocop.yml +69 -0
- data/CHANGELOG.md +26 -0
- data/CODE_OF_CONDUCT.md +64 -0
- data/LICENSE.txt +21 -0
- data/README.md +179 -0
- data/Rakefile +37 -0
- data/benchmarks/run_all.rb +33 -0
- data/config/README.md +83 -0
- data/config/loki-local-config.yaml +35 -0
- data/config/prometheus.yml +15 -0
- data/docker-compose.yml +78 -0
- data/docs/00-ICP-AND-TIMELINE.md +483 -0
- data/docs/01-SCALE-REQUIREMENTS.md +858 -0
- data/docs/ADR-001-architecture.md +2617 -0
- data/docs/ADR-002-metrics-yabeda.md +1395 -0
- data/docs/ADR-003-slo-observability.md +3337 -0
- data/docs/ADR-004-adapter-architecture.md +2385 -0
- data/docs/ADR-005-tracing-context.md +1372 -0
- data/docs/ADR-006-security-compliance.md +4143 -0
- data/docs/ADR-007-opentelemetry-integration.md +1385 -0
- data/docs/ADR-008-rails-integration.md +1911 -0
- data/docs/ADR-009-cost-optimization.md +2993 -0
- data/docs/ADR-010-developer-experience.md +2166 -0
- data/docs/ADR-011-testing-strategy.md +1836 -0
- data/docs/ADR-012-event-evolution.md +958 -0
- data/docs/ADR-013-reliability-error-handling.md +2750 -0
- data/docs/ADR-014-event-driven-slo.md +1533 -0
- data/docs/ADR-015-middleware-order.md +1061 -0
- data/docs/ADR-016-self-monitoring-slo.md +1234 -0
- data/docs/API-REFERENCE-L28.md +914 -0
- data/docs/COMPREHENSIVE-CONFIGURATION.md +2366 -0
- data/docs/IMPLEMENTATION_NOTES.md +2804 -0
- data/docs/IMPLEMENTATION_PLAN.md +1971 -0
- data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +586 -0
- data/docs/PLAN.md +148 -0
- data/docs/QUICK-START.md +934 -0
- data/docs/README.md +296 -0
- data/docs/design/00-memory-optimization.md +593 -0
- data/docs/guides/MIGRATION-L27-L28.md +692 -0
- data/docs/guides/PERFORMANCE-BENCHMARKS.md +434 -0
- data/docs/guides/README.md +44 -0
- data/docs/prd/01-overview-vision.md +440 -0
- data/docs/use_cases/README.md +119 -0
- data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +813 -0
- data/docs/use_cases/UC-002-business-event-tracking.md +1953 -0
- data/docs/use_cases/UC-003-pattern-based-metrics.md +1627 -0
- data/docs/use_cases/UC-004-zero-config-slo-tracking.md +728 -0
- data/docs/use_cases/UC-005-sentry-integration.md +759 -0
- data/docs/use_cases/UC-006-trace-context-management.md +905 -0
- data/docs/use_cases/UC-007-pii-filtering.md +2648 -0
- data/docs/use_cases/UC-008-opentelemetry-integration.md +1153 -0
- data/docs/use_cases/UC-009-multi-service-tracing.md +1043 -0
- data/docs/use_cases/UC-010-background-job-tracking.md +1018 -0
- data/docs/use_cases/UC-011-rate-limiting.md +1906 -0
- data/docs/use_cases/UC-012-audit-trail.md +2301 -0
- data/docs/use_cases/UC-013-high-cardinality-protection.md +2127 -0
- data/docs/use_cases/UC-014-adaptive-sampling.md +1940 -0
- data/docs/use_cases/UC-015-cost-optimization.md +735 -0
- data/docs/use_cases/UC-016-rails-logger-migration.md +785 -0
- data/docs/use_cases/UC-017-local-development.md +867 -0
- data/docs/use_cases/UC-018-testing-events.md +1081 -0
- data/docs/use_cases/UC-019-tiered-storage-migration.md +562 -0
- data/docs/use_cases/UC-020-event-versioning.md +708 -0
- data/docs/use_cases/UC-021-error-handling-retry-dlq.md +956 -0
- data/docs/use_cases/UC-022-event-registry.md +648 -0
- data/docs/use_cases/backlog.md +226 -0
- data/e11y.gemspec +76 -0
- data/lib/e11y/adapters/adaptive_batcher.rb +207 -0
- data/lib/e11y/adapters/audit_encrypted.rb +239 -0
- data/lib/e11y/adapters/base.rb +580 -0
- data/lib/e11y/adapters/file.rb +224 -0
- data/lib/e11y/adapters/in_memory.rb +216 -0
- data/lib/e11y/adapters/loki.rb +333 -0
- data/lib/e11y/adapters/otel_logs.rb +203 -0
- data/lib/e11y/adapters/registry.rb +141 -0
- data/lib/e11y/adapters/sentry.rb +230 -0
- data/lib/e11y/adapters/stdout.rb +108 -0
- data/lib/e11y/adapters/yabeda.rb +370 -0
- data/lib/e11y/buffers/adaptive_buffer.rb +339 -0
- data/lib/e11y/buffers/base_buffer.rb +40 -0
- data/lib/e11y/buffers/request_scoped_buffer.rb +246 -0
- data/lib/e11y/buffers/ring_buffer.rb +267 -0
- data/lib/e11y/buffers.rb +14 -0
- data/lib/e11y/console.rb +122 -0
- data/lib/e11y/current.rb +48 -0
- data/lib/e11y/event/base.rb +894 -0
- data/lib/e11y/event/value_sampling_config.rb +84 -0
- data/lib/e11y/events/base_audit_event.rb +43 -0
- data/lib/e11y/events/base_payment_event.rb +33 -0
- data/lib/e11y/events/rails/cache/delete.rb +21 -0
- data/lib/e11y/events/rails/cache/read.rb +23 -0
- data/lib/e11y/events/rails/cache/write.rb +22 -0
- data/lib/e11y/events/rails/database/query.rb +45 -0
- data/lib/e11y/events/rails/http/redirect.rb +21 -0
- data/lib/e11y/events/rails/http/request.rb +26 -0
- data/lib/e11y/events/rails/http/send_file.rb +21 -0
- data/lib/e11y/events/rails/http/start_processing.rb +26 -0
- data/lib/e11y/events/rails/job/completed.rb +22 -0
- data/lib/e11y/events/rails/job/enqueued.rb +22 -0
- data/lib/e11y/events/rails/job/failed.rb +22 -0
- data/lib/e11y/events/rails/job/scheduled.rb +23 -0
- data/lib/e11y/events/rails/job/started.rb +22 -0
- data/lib/e11y/events/rails/log.rb +56 -0
- data/lib/e11y/events/rails/view/render.rb +23 -0
- data/lib/e11y/events.rb +18 -0
- data/lib/e11y/instruments/active_job.rb +201 -0
- data/lib/e11y/instruments/rails_instrumentation.rb +141 -0
- data/lib/e11y/instruments/sidekiq.rb +175 -0
- data/lib/e11y/logger/bridge.rb +205 -0
- data/lib/e11y/metrics/cardinality_protection.rb +172 -0
- data/lib/e11y/metrics/cardinality_tracker.rb +134 -0
- data/lib/e11y/metrics/registry.rb +234 -0
- data/lib/e11y/metrics/relabeling.rb +226 -0
- data/lib/e11y/metrics.rb +102 -0
- data/lib/e11y/middleware/audit_signing.rb +174 -0
- data/lib/e11y/middleware/base.rb +140 -0
- data/lib/e11y/middleware/event_slo.rb +167 -0
- data/lib/e11y/middleware/pii_filter.rb +266 -0
- data/lib/e11y/middleware/pii_filtering.rb +280 -0
- data/lib/e11y/middleware/rate_limiting.rb +214 -0
- data/lib/e11y/middleware/request.rb +163 -0
- data/lib/e11y/middleware/routing.rb +157 -0
- data/lib/e11y/middleware/sampling.rb +254 -0
- data/lib/e11y/middleware/slo.rb +168 -0
- data/lib/e11y/middleware/trace_context.rb +131 -0
- data/lib/e11y/middleware/validation.rb +118 -0
- data/lib/e11y/middleware/versioning.rb +132 -0
- data/lib/e11y/middleware.rb +12 -0
- data/lib/e11y/pii/patterns.rb +90 -0
- data/lib/e11y/pii.rb +13 -0
- data/lib/e11y/pipeline/builder.rb +155 -0
- data/lib/e11y/pipeline/zone_validator.rb +110 -0
- data/lib/e11y/pipeline.rb +12 -0
- data/lib/e11y/presets/audit_event.rb +65 -0
- data/lib/e11y/presets/debug_event.rb +34 -0
- data/lib/e11y/presets/high_value_event.rb +51 -0
- data/lib/e11y/presets.rb +19 -0
- data/lib/e11y/railtie.rb +138 -0
- data/lib/e11y/reliability/circuit_breaker.rb +216 -0
- data/lib/e11y/reliability/dlq/file_storage.rb +277 -0
- data/lib/e11y/reliability/dlq/filter.rb +117 -0
- data/lib/e11y/reliability/retry_handler.rb +207 -0
- data/lib/e11y/reliability/retry_rate_limiter.rb +117 -0
- data/lib/e11y/sampling/error_spike_detector.rb +225 -0
- data/lib/e11y/sampling/load_monitor.rb +161 -0
- data/lib/e11y/sampling/stratified_tracker.rb +92 -0
- data/lib/e11y/sampling/value_extractor.rb +82 -0
- data/lib/e11y/self_monitoring/buffer_monitor.rb +79 -0
- data/lib/e11y/self_monitoring/performance_monitor.rb +97 -0
- data/lib/e11y/self_monitoring/reliability_monitor.rb +146 -0
- data/lib/e11y/slo/event_driven.rb +150 -0
- data/lib/e11y/slo/tracker.rb +119 -0
- data/lib/e11y/version.rb +9 -0
- data/lib/e11y.rb +283 -0
- metadata +452 -0
|
@@ -0,0 +1,2366 @@
|
|
|
1
|
+
# E11y Comprehensive Configuration Guide
|
|
2
|
+
|
|
3
|
+
**Purpose:** Максимально полный конфиг-пример, покрывающий ВСЕ 22 use cases для анализа конфликтов между фичами.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 📋 Table of Contents
|
|
8
|
+
|
|
9
|
+
1. [Initializer Configuration](#initializer-configuration)
|
|
10
|
+
2. [Event Examples](#event-examples)
|
|
11
|
+
3. [Feature Coverage Matrix](#feature-coverage-matrix)
|
|
12
|
+
4. [Conflict Analysis](#conflict-analysis)
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## 1. Initializer Configuration (v1.1 - RECOMMENDED ✅)
|
|
17
|
+
|
|
18
|
+
> **🎯 v1.1 Philosophy: Infrastructure Only**
|
|
19
|
+
>
|
|
20
|
+
> Global configuration contains **ONLY infrastructure** (adapters, buffer, circuit breaker, hooks).
|
|
21
|
+
> **Event-specific configuration** (severity, rate_limit, sampling, PII) is defined at **event-level** (see Section 2).
|
|
22
|
+
>
|
|
23
|
+
> **Result:** Global config reduced from **1400+ lines (v1.0) to <300 lines (v1.1)** - **78% reduction!**
|
|
24
|
+
>
|
|
25
|
+
> **What's in global config:**
|
|
26
|
+
> - ✅ Adapters registry (register once, reference everywhere)
|
|
27
|
+
> - ✅ Buffer configuration (system-wide resource management)
|
|
28
|
+
> - ✅ Circuit breaker (adapter health protection)
|
|
29
|
+
> - ✅ Global context enrichment (added to ALL events)
|
|
30
|
+
> - ✅ Hooks & lifecycle (system-wide event processing)
|
|
31
|
+
> - ✅ Graceful shutdown
|
|
32
|
+
>
|
|
33
|
+
> **What's NOT in global config (moved to event-level):**
|
|
34
|
+
> - ❌ Per-event severity, rate limits, sampling rates
|
|
35
|
+
> - ❌ Per-event PII filtering rules
|
|
36
|
+
> - ❌ Per-event metrics definitions
|
|
37
|
+
> - ❌ Per-event adapter routing
|
|
38
|
+
> - ❌ Per-event retention policies
|
|
39
|
+
>
|
|
40
|
+
> See [Section 2](#2-event-examples-v11---recommended-) for event-level configuration examples.
|
|
41
|
+
|
|
42
|
+
### config/initializers/e11y.rb (v1.1 - Infrastructure Only)
|
|
43
|
+
|
|
44
|
+
```ruby
|
|
45
|
+
# frozen_string_literal: true
|
|
46
|
+
|
|
47
|
+
# E11y v1.1 Configuration - Infrastructure Only
|
|
48
|
+
# Event-specific config (severity, rate_limit, sampling, PII) is at event-level!
|
|
49
|
+
# See Section 2 for event examples.
|
|
50
|
+
|
|
51
|
+
E11y.configure do |config|
|
|
52
|
+
# ============================================================================
|
|
53
|
+
# ADAPTERS REGISTRY
|
|
54
|
+
# ============================================================================
|
|
55
|
+
# Register adapters once, reference by name in events
|
|
56
|
+
# Related: UC-002 (Business Events), ADR-004 (Adapter Architecture)
|
|
57
|
+
|
|
58
|
+
config.adapters do
|
|
59
|
+
# === Primary: Loki (logs) ===
|
|
60
|
+
register :loki, E11y::Adapters::LokiAdapter.new(
|
|
61
|
+
url: ENV['LOKI_URL'] || 'http://localhost:3100',
|
|
62
|
+
labels: {
|
|
63
|
+
env: Rails.env,
|
|
64
|
+
service: ENV['SERVICE_NAME'] || 'api'
|
|
65
|
+
},
|
|
66
|
+
timeout: 5,
|
|
67
|
+
batch_size: 1000
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# === Primary: Elasticsearch (long-term storage + analytics) ===
|
|
71
|
+
register :elasticsearch, E11y::Adapters::ElasticsearchAdapter.new(
|
|
72
|
+
url: ENV['ELASTICSEARCH_URL'] || 'http://localhost:9200',
|
|
73
|
+
index_prefix: 'e11y-events',
|
|
74
|
+
username: ENV['ES_USERNAME'],
|
|
75
|
+
password: ENV['ES_PASSWORD'],
|
|
76
|
+
timeout: 10
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# === Alerts: Sentry (errors) ===
|
|
80
|
+
register :sentry, E11y::Adapters::SentryAdapter.new(
|
|
81
|
+
dsn: ENV['SENTRY_DSN'],
|
|
82
|
+
environment: Rails.env,
|
|
83
|
+
release: ENV['GIT_SHA']
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# === Alerts: PagerDuty (critical incidents) ===
|
|
87
|
+
register :pagerduty, E11y::Adapters::PagerDutyAdapter.new(
|
|
88
|
+
api_key: ENV['PAGERDUTY_API_KEY'],
|
|
89
|
+
service_id: ENV['PAGERDUTY_SERVICE_ID']
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# === Alerts: Slack (team notifications) ===
|
|
93
|
+
register :slack, E11y::Adapters::SlackAdapter.new(
|
|
94
|
+
webhook_url: ENV['SLACK_WEBHOOK_URL'],
|
|
95
|
+
channel: '#alerts',
|
|
96
|
+
username: 'E11y Bot'
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# === Local: File (development, fallback) ===
|
|
100
|
+
register :file, E11y::Adapters::FileAdapter.new(
|
|
101
|
+
path: Rails.root.join('log', 'e11y'),
|
|
102
|
+
rotation: :daily,
|
|
103
|
+
max_size: 100.megabytes
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# === Archive: S3 (cold storage) ===
|
|
107
|
+
register :s3, E11y::Adapters::S3Adapter.new(
|
|
108
|
+
bucket: ENV['S3_BUCKET'],
|
|
109
|
+
region: ENV['AWS_REGION'],
|
|
110
|
+
prefix: "e11y-events/#{Rails.env}",
|
|
111
|
+
compression: :gzip
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# === Security: Audit Log (compliance) ===
|
|
115
|
+
register :audit_encrypted, E11y::Adapters::FileAdapter.new(
|
|
116
|
+
path: Rails.root.join('log', 'audit'),
|
|
117
|
+
permissions: 0600, # Read-only for owner
|
|
118
|
+
rotation: :never, # Never rotate (append-only)
|
|
119
|
+
encryption: true
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# === OpenTelemetry: OTLP (collector) ===
|
|
123
|
+
register :otlp, E11y::Adapters::OtlpAdapter.new(
|
|
124
|
+
endpoint: ENV['OTEL_COLLECTOR_URL'] || 'http://localhost:4318',
|
|
125
|
+
protocol: :http,
|
|
126
|
+
headers: { 'X-API-Key' => ENV['OTEL_API_KEY'] }
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# === Testing: Memory (tests) ===
|
|
130
|
+
register :memory, E11y::Adapters::MemoryAdapter.new
|
|
131
|
+
|
|
132
|
+
# === Debug: Console (development) ===
|
|
133
|
+
register :console, E11y::Adapters::ConsoleAdapter.new(
|
|
134
|
+
colored: true,
|
|
135
|
+
pretty: true
|
|
136
|
+
)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Default adapters per environment
|
|
140
|
+
config.default_adapters = case Rails.env
|
|
141
|
+
when 'production'
|
|
142
|
+
[:loki, :elasticsearch, :otlp]
|
|
143
|
+
when 'staging'
|
|
144
|
+
[:loki, :elasticsearch]
|
|
145
|
+
when 'development'
|
|
146
|
+
[:console, :file]
|
|
147
|
+
when 'test'
|
|
148
|
+
[:memory]
|
|
149
|
+
else
|
|
150
|
+
[:file]
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# ============================================================================
|
|
154
|
+
# BUFFER CONFIGURATION
|
|
155
|
+
# ============================================================================
|
|
156
|
+
# Main buffer for event processing
|
|
157
|
+
# Related: ADR-001 (Core Architecture), CONTRADICTION_02 (Buffers)
|
|
158
|
+
|
|
159
|
+
config.buffer do
|
|
160
|
+
# Ring buffer (SPSC - Single Producer Single Consumer)
|
|
161
|
+
capacity 100_000 # Max events in buffer
|
|
162
|
+
|
|
163
|
+
# Flush configuration
|
|
164
|
+
flush_interval 200 # milliseconds
|
|
165
|
+
flush_batch_size 500 # events per batch
|
|
166
|
+
|
|
167
|
+
# Worker threads
|
|
168
|
+
worker_threads 2 # Parallel workers for flushing
|
|
169
|
+
|
|
170
|
+
# Overflow strategy
|
|
171
|
+
overflow_strategy :drop_oldest # :drop_oldest, :drop_newest, :block
|
|
172
|
+
|
|
173
|
+
# Backpressure (load-based throttling)
|
|
174
|
+
backpressure do
|
|
175
|
+
enabled true
|
|
176
|
+
high_watermark 0.8 # 80% full → start sampling
|
|
177
|
+
low_watermark 0.5 # 50% full → resume normal
|
|
178
|
+
|
|
179
|
+
# Actions on high watermark
|
|
180
|
+
actions [:sample, :increase_flush_rate]
|
|
181
|
+
sample_rate_under_pressure 0.5 # 50% when buffer is 80%+ full
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# ============================================================================
|
|
186
|
+
# CIRCUIT BREAKER (Adapter Health Protection)
|
|
187
|
+
# ============================================================================
|
|
188
|
+
# Per-adapter circuit breakers to prevent cascading failures
|
|
189
|
+
# Related: ADR-013 (Reliability & Error Handling), UC-021 (Error Handling)
|
|
190
|
+
|
|
191
|
+
config.circuit_breaker do
|
|
192
|
+
enabled true
|
|
193
|
+
per_adapter true # Separate circuit breaker per adapter
|
|
194
|
+
|
|
195
|
+
# Thresholds
|
|
196
|
+
failure_threshold 5 # Open after 5 consecutive failures
|
|
197
|
+
timeout 30.seconds # Wait before attempting reset
|
|
198
|
+
success_threshold 2 # Close after 2 consecutive successes
|
|
199
|
+
window 60.seconds # Rolling window for failure count
|
|
200
|
+
|
|
201
|
+
# Actions when circuit opens
|
|
202
|
+
on_open do |adapter_name|
|
|
203
|
+
Rails.logger.error "E11y circuit breaker opened for adapter: #{adapter_name}"
|
|
204
|
+
|
|
205
|
+
# Send alert (bypass E11y to avoid recursion!)
|
|
206
|
+
Events::CircuitBreakerOpened.track(
|
|
207
|
+
adapter: adapter_name,
|
|
208
|
+
severity: :error
|
|
209
|
+
)
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Fallback adapter when circuit is open
|
|
213
|
+
fallback_adapter :file # Write to file if primary adapter fails
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# ============================================================================
|
|
217
|
+
# GLOBAL CONTEXT ENRICHMENT
|
|
218
|
+
# ============================================================================
|
|
219
|
+
# Context added to ALL events automatically
|
|
220
|
+
# Related: UC-002 (Business Events), UC-006 (Trace Context)
|
|
221
|
+
|
|
222
|
+
config.events do
|
|
223
|
+
# Static context (evaluated once at boot)
|
|
224
|
+
global_context do
|
|
225
|
+
{
|
|
226
|
+
env: Rails.env,
|
|
227
|
+
service: ENV['SERVICE_NAME'] || 'api',
|
|
228
|
+
version: ENV['GIT_SHA'] || 'unknown',
|
|
229
|
+
host: Socket.gethostname,
|
|
230
|
+
deployment_id: ENV['DEPLOYMENT_ID']
|
|
231
|
+
}
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Dynamic context (evaluated per event)
|
|
235
|
+
context_enricher do |event|
|
|
236
|
+
{
|
|
237
|
+
trace_id: Current.trace_id,
|
|
238
|
+
request_id: Current.request_id,
|
|
239
|
+
user_id: Current.user&.id,
|
|
240
|
+
tenant_id: Current.tenant&.id,
|
|
241
|
+
session_id: Current.session_id,
|
|
242
|
+
ip_address: Current.ip_address
|
|
243
|
+
}
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# ============================================================================
|
|
248
|
+
# LIFECYCLE HOOKS
|
|
249
|
+
# ============================================================================
|
|
250
|
+
# System-wide event processing hooks
|
|
251
|
+
# Related: ADR-001 (Core Architecture)
|
|
252
|
+
|
|
253
|
+
config.hooks do
|
|
254
|
+
# Before event is tracked
|
|
255
|
+
before_track do |event|
|
|
256
|
+
# Add custom enrichment
|
|
257
|
+
event.context[:hostname] = Socket.gethostname
|
|
258
|
+
event
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# After event is tracked (but before buffered)
|
|
262
|
+
after_track do |event|
|
|
263
|
+
# Custom logic (e.g., trigger side effects)
|
|
264
|
+
if event.severity == :fatal
|
|
265
|
+
# Immediate notification (bypass buffer)
|
|
266
|
+
FatalErrorNotifier.notify(event)
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Before flush to adapters
|
|
271
|
+
before_flush do |events|
|
|
272
|
+
# Last chance to modify events
|
|
273
|
+
events.each do |event|
|
|
274
|
+
event.metadata[:flushed_at] = Time.now.iso8601
|
|
275
|
+
end
|
|
276
|
+
events
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# After flush to adapters
|
|
280
|
+
after_flush do |events, results|
|
|
281
|
+
# results = { adapter_name => success/failure }
|
|
282
|
+
failed_adapters = results.select { |_, success| !success }.keys
|
|
283
|
+
|
|
284
|
+
if failed_adapters.any?
|
|
285
|
+
Rails.logger.error "E11y flush failed for adapters: #{failed_adapters.join(', ')}"
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# On error (internal E11y error)
|
|
290
|
+
on_error do |error, context|
|
|
291
|
+
# Don't let E11y crash the app
|
|
292
|
+
Rails.logger.error "E11y internal error: #{error.message}"
|
|
293
|
+
Rails.logger.error error.backtrace.join("\n")
|
|
294
|
+
|
|
295
|
+
# Send to Sentry (but not via E11y to avoid recursion!)
|
|
296
|
+
Sentry.capture_exception(error, extra: context) if defined?(Sentry)
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# ============================================================================
|
|
301
|
+
# GRACEFUL SHUTDOWN
|
|
302
|
+
# ============================================================================
|
|
303
|
+
# Ensure all events are flushed on application shutdown
|
|
304
|
+
|
|
305
|
+
config.shutdown do
|
|
306
|
+
# Timeout for graceful shutdown
|
|
307
|
+
timeout 5.seconds
|
|
308
|
+
|
|
309
|
+
# Flush remaining events on shutdown
|
|
310
|
+
flush_on_shutdown true
|
|
311
|
+
|
|
312
|
+
# Wait for workers to finish
|
|
313
|
+
wait_for_workers true
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
# ============================================================================
|
|
317
|
+
# AUDIT RETENTION (Global Default)
|
|
318
|
+
# ============================================================================
|
|
319
|
+
# Default retention for audit events. Can be overridden:
|
|
320
|
+
# 1. Per event: `retention 10.years` in event class
|
|
321
|
+
# 2. Per adapter: tiered storage (hot tier in Loki 30d, cold tier in S3 7y)
|
|
322
|
+
#
|
|
323
|
+
# Use Cases:
|
|
324
|
+
# - UC-012: Audit Trail (compliance requirements)
|
|
325
|
+
# - UC-019: Tiered Storage (hot/warm/cold tiers per adapter)
|
|
326
|
+
#
|
|
327
|
+
# Related: ADR-006 (Security & Compliance)
|
|
328
|
+
|
|
329
|
+
config.audit_retention = case ENV['JURISDICTION']
|
|
330
|
+
when 'EU' then 7.years # GDPR Article 30
|
|
331
|
+
when 'US' then 10.years # SOX Section 802
|
|
332
|
+
else 5.years # Conservative default
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# ============================================================================
|
|
337
|
+
# Start E11y (starts background workers)
|
|
338
|
+
# ============================================================================
|
|
339
|
+
E11y.start!
|
|
340
|
+
|
|
341
|
+
# Graceful shutdown on SIGTERM/SIGINT
|
|
342
|
+
at_exit do
|
|
343
|
+
E11y.stop!(timeout: 5)
|
|
344
|
+
end
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
---
|
|
348
|
+
|
|
349
|
+
**🎯 v1.1 Summary: Infrastructure-Only Configuration**
|
|
350
|
+
|
|
351
|
+
| Category | Lines | Description |
|
|
352
|
+
|----------|-------|-------------|
|
|
353
|
+
| **Adapters Registry** | ~120 | Register 12 adapters (Loki, Sentry, ES, etc.) |
|
|
354
|
+
| **Buffer Config** | ~30 | Ring buffer, flush settings, backpressure |
|
|
355
|
+
| **Circuit Breaker** | ~30 | Per-adapter health protection |
|
|
356
|
+
| **Global Context** | ~30 | Context enrichment for ALL events |
|
|
357
|
+
| **Lifecycle Hooks** | ~50 | before_track, after_flush, on_error |
|
|
358
|
+
| **Graceful Shutdown** | ~10 | Flush on exit |
|
|
359
|
+
| **Audit Retention** | ~10 | Configurable per jurisdiction |
|
|
360
|
+
| **TOTAL** | **~280 lines** | **Well under 300!** ✅ |
|
|
361
|
+
|
|
362
|
+
**What's NOT in global config (moved to event-level):**
|
|
363
|
+
|
|
364
|
+
❌ **Per-event configuration** (see Section 2):
|
|
365
|
+
- `severity` - defined in event class
|
|
366
|
+
- `rate_limit` - defined in event class
|
|
367
|
+
- `sample_rate` - inferred from severity or explicit
|
|
368
|
+
- `adapters` - inferred from severity or explicit
|
|
369
|
+
- `retention` - inferred from severity or explicit
|
|
370
|
+
- `pii_filtering` - defined in event class
|
|
371
|
+
- `metric` - defined in event class
|
|
372
|
+
- `buffering` - defined in event class
|
|
373
|
+
- `slo_target` - defined in event class
|
|
374
|
+
|
|
375
|
+
**Migration from v1.0:**
|
|
376
|
+
- v1.0: **1400+ lines** (global config for everything)
|
|
377
|
+
- v1.1: **<300 lines** (infrastructure only)
|
|
378
|
+
- **Reduction: ~1120 lines (78%)**
|
|
379
|
+
|
|
380
|
+
**UC Coverage:**
|
|
381
|
+
- ✅ UC-001: Request-Scoped Debug Buffering → event-level `buffering` DSL
|
|
382
|
+
- ✅ UC-002: Business Event Tracking → `global_context` + event schemas
|
|
383
|
+
- ✅ UC-003: Pattern-Based Metrics → event-level `metric` DSL
|
|
384
|
+
- ✅ UC-004: Zero-Config SLO Tracking → conventions + event-level overrides
|
|
385
|
+
- ✅ UC-005: Sentry Integration → adapter registry + event-level overrides
|
|
386
|
+
- ✅ UC-006: Trace Context → `context_enricher` (global)
|
|
387
|
+
- ✅ UC-007: PII Filtering → event-level `pii_filtering` DSL
|
|
388
|
+
- ✅ UC-008: OpenTelemetry → OTLP adapter registered
|
|
389
|
+
- ✅ UC-011: Rate Limiting → event-level `rate_limit` DSL
|
|
390
|
+
- ✅ UC-012: Audit Trail → `audit_retention` (configurable) + C01 two pipelines
|
|
391
|
+
- ✅ UC-013: Cardinality Protection → event-level metric config
|
|
392
|
+
- ✅ UC-014: Adaptive Sampling → conventions + C11 stratified sampling
|
|
393
|
+
- ✅ UC-015: Cost Optimization → event-level retention + routing
|
|
394
|
+
- ✅ UC-020: Event Versioning → event-level `version` DSL
|
|
395
|
+
- ✅ UC-021: Error Handling → circuit breaker + hooks
|
|
396
|
+
|
|
397
|
+
**See Section 2 for event-level configuration examples.**
|
|
398
|
+
|
|
399
|
+
---
|
|
400
|
+
|
|
401
|
+
## 2. Event Examples (v1.1 - RECOMMENDED ✅)
|
|
402
|
+
|
|
403
|
+
> **🎯 Event-Level Configuration** (CONTRADICTION_01 Resolution)
|
|
404
|
+
>
|
|
405
|
+
> **This is the RECOMMENDED approach starting from v1.1!**
|
|
406
|
+
>
|
|
407
|
+
> E11y now supports **event-level configuration** to reduce global config from 1400+ lines to <300 lines.
|
|
408
|
+
> Configuration is distributed across event classes (locality of behavior).
|
|
409
|
+
>
|
|
410
|
+
> **Benefits over v1.0 global config:**
|
|
411
|
+
> - ✅ **78% reduction** in config lines (1400+ → <300)
|
|
412
|
+
> - ✅ **Locality of behavior** (config next to schema)
|
|
413
|
+
> - ✅ **Better maintainability** (change event = change config)
|
|
414
|
+
> - ✅ **DRY via inheritance** (base classes + presets)
|
|
415
|
+
> - ✅ **Sensible defaults** (conventions eliminate 80% of config)
|
|
416
|
+
|
|
417
|
+
### 2.0. Conventions & Sensible Defaults (NEW)
|
|
418
|
+
|
|
419
|
+
> **Philosophy:** "Explicit over implicit" + conventions = best balance.
|
|
420
|
+
>
|
|
421
|
+
> E11y applies **sensible defaults** based on conventions to eliminate 80% of configuration.
|
|
422
|
+
> All conventions are clearly documented and can be overridden.
|
|
423
|
+
|
|
424
|
+
**Convention 1: Event Name → Severity**
|
|
425
|
+
|
|
426
|
+
```ruby
|
|
427
|
+
# Convention: *Failed, *Error → :error
|
|
428
|
+
class Events::PaymentFailed < E11y::Event::Base
|
|
429
|
+
# ← Auto: severity = :error (inferred from name!)
|
|
430
|
+
schema do; required(:error_code).filled(:string); end
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
# Convention: *Paid, *Succeeded, *Completed → :success
|
|
434
|
+
class Events::OrderPaid < E11y::Event::Base
|
|
435
|
+
# ← Auto: severity = :success (inferred from name!)
|
|
436
|
+
schema do; required(:order_id).filled(:string); end
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
# Convention: *Started, *Processing → :info
|
|
440
|
+
class Events::OrderProcessing < E11y::Event::Base
|
|
441
|
+
# ← Auto: severity = :info (inferred from name!)
|
|
442
|
+
schema do; required(:order_id).filled(:string); end
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
# Convention: Debug* → :debug
|
|
446
|
+
class Events::DebugQuery < E11y::Event::Base
|
|
447
|
+
# ← Auto: severity = :debug (inferred from name!)
|
|
448
|
+
schema do; required(:query).filled(:string); end
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
# Override when needed:
|
|
452
|
+
class Events::PaymentFailed < E11y::Event::Base
|
|
453
|
+
severity :warn # ← Explicit override (unusual case)
|
|
454
|
+
end
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
**Convention 2: Severity → Adapters**
|
|
458
|
+
|
|
459
|
+
```ruby
|
|
460
|
+
# Convention: :error/:fatal → [:sentry]
|
|
461
|
+
class Events::CriticalError < E11y::Event::Base
|
|
462
|
+
severity :fatal
|
|
463
|
+
# ← Auto: adapters = [:sentry] (errors go to Sentry!)
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
# Convention: :success/:info/:warn → [:loki]
|
|
467
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
468
|
+
severity :success
|
|
469
|
+
# ← Auto: adapters = [:loki] (business events to Loki)
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
# Convention: :debug → [:file] (dev), [:loki] (prod with sampling)
|
|
473
|
+
class Events::DebugLog < E11y::Event::Base
|
|
474
|
+
severity :debug
|
|
475
|
+
# ← Auto: adapters = [:file] in dev, [:loki] in prod
|
|
476
|
+
end
|
|
477
|
+
```
|
|
478
|
+
|
|
479
|
+
**Convention 3: Severity → Sample Rate**
|
|
480
|
+
|
|
481
|
+
```ruby
|
|
482
|
+
# Convention: :error/:fatal → 1.0 (100%, never sample errors!)
|
|
483
|
+
class Events::PaymentFailed < E11y::Event::Base
|
|
484
|
+
severity :error
|
|
485
|
+
# ← Auto: sample_rate = 1.0 (100%)
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
# Convention: :warn → 0.5 (50%)
|
|
489
|
+
class Events::RateLimitWarning < E11y::Event::Base
|
|
490
|
+
severity :warn
|
|
491
|
+
# ← Auto: sample_rate = 0.5 (50%)
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
# Convention: :success/:info → 0.1 (10%)
|
|
495
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
496
|
+
severity :success
|
|
497
|
+
# ← Auto: sample_rate = 0.1 (10%)
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
# Convention: :debug → 0.01 (1%)
|
|
501
|
+
class Events::DebugQuery < E11y::Event::Base
|
|
502
|
+
severity :debug
|
|
503
|
+
# ← Auto: sample_rate = 0.01 (1%)
|
|
504
|
+
end
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
**Convention 4: Severity → Retention**
|
|
508
|
+
|
|
509
|
+
```ruby
|
|
510
|
+
# Convention: :error/:fatal → 90 days
|
|
511
|
+
class Events::CriticalError < E11y::Event::Base
|
|
512
|
+
severity :fatal
|
|
513
|
+
# ← Auto: retention = 90.days
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
# Convention: :info/:success → 30 days
|
|
517
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
518
|
+
severity :success
|
|
519
|
+
# ← Auto: retention = 30.days
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
# Convention: :debug → 7 days
|
|
523
|
+
class Events::DebugQuery < E11y::Event::Base
|
|
524
|
+
severity :debug
|
|
525
|
+
# ← Auto: retention = 7.days
|
|
526
|
+
end
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
**Convention 5: Default Rate Limit**
|
|
530
|
+
|
|
531
|
+
```ruby
|
|
532
|
+
# Convention: 1000 events/sec default (override only for high-volume)
|
|
533
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
534
|
+
# ← Auto: rate_limit = 1000 (per second)
|
|
535
|
+
schema do; required(:order_id).filled(:string); end
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
# Override for high-volume events:
|
|
539
|
+
class Events::PageView < E11y::Event::Base
|
|
540
|
+
rate_limit 10_000 # ← Explicit override (high-volume)
|
|
541
|
+
end
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
**Result: Zero-Config Events**
|
|
545
|
+
|
|
546
|
+
```ruby
|
|
547
|
+
# 90% of events need ONLY schema (zero config!)
|
|
548
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
549
|
+
schema do
|
|
550
|
+
required(:order_id).filled(:string)
|
|
551
|
+
required(:amount).filled(:decimal)
|
|
552
|
+
end
|
|
553
|
+
# ← That's it! All config from conventions:
|
|
554
|
+
# severity: :success (from name)
|
|
555
|
+
# adapters: [:loki] (from severity)
|
|
556
|
+
# sample_rate: 0.1 (from severity)
|
|
557
|
+
# retention: 30.days (from severity)
|
|
558
|
+
# rate_limit: 1000 (default)
|
|
559
|
+
end
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
**Override conventions when needed:**
|
|
563
|
+
|
|
564
|
+
```ruby
|
|
565
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
566
|
+
schema do; required(:order_id).filled(:string); end
|
|
567
|
+
|
|
568
|
+
# Override specific settings:
|
|
569
|
+
severity :info # ← Override convention
|
|
570
|
+
sample_rate 1.0 # ← Never sample orders
|
|
571
|
+
retention 7.years # ← Financial records
|
|
572
|
+
end
|
|
573
|
+
```
|
|
574
|
+
|
|
575
|
+
---
|
|
576
|
+
|
|
577
|
+
### 2.1. Simple Business Event (with Event-Level Config)
|
|
578
|
+
|
|
579
|
+
```ruby
|
|
580
|
+
# app/events/order_created.rb
|
|
581
|
+
module Events
|
|
582
|
+
class OrderCreated < E11y::Event::Base
|
|
583
|
+
severity :success
|
|
584
|
+
|
|
585
|
+
schema do
|
|
586
|
+
required(:order_id).filled(:string)
|
|
587
|
+
required(:user_id).filled(:string)
|
|
588
|
+
required(:amount).filled(:decimal)
|
|
589
|
+
required(:currency).filled(:string)
|
|
590
|
+
optional(:items_count).filled(:integer)
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
# ✨ NEW: Event-level configuration (right next to schema!)
|
|
594
|
+
rate_limit 1000, window: 1.second # Max 1000 events/sec
|
|
595
|
+
sample_rate 0.1 # 10% sampling
|
|
596
|
+
retention 30.days # Keep for 30 days
|
|
597
|
+
|
|
598
|
+
# Auto-create metric: app_orders_created_total
|
|
599
|
+
metric :counter,
|
|
600
|
+
name: 'orders.created.total',
|
|
601
|
+
tags: [:currency],
|
|
602
|
+
comment: 'Total orders created'
|
|
603
|
+
end
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
# Usage (unchanged)
|
|
607
|
+
Events::OrderCreated.track(
|
|
608
|
+
order_id: 'ord_123',
|
|
609
|
+
user_id: 'usr_456',
|
|
610
|
+
amount: 99.99,
|
|
611
|
+
currency: 'USD',
|
|
612
|
+
items_count: 3
|
|
613
|
+
)
|
|
614
|
+
```
|
|
615
|
+
|
|
616
|
+
### 2.2. "Fat" Event with Multiple Features
|
|
617
|
+
|
|
618
|
+
```ruby
|
|
619
|
+
# app/events/payment_processed.rb
|
|
620
|
+
module Events
|
|
621
|
+
class PaymentProcessed < E11y::Event::Base
|
|
622
|
+
# === UC-002: Business Event Tracking ===
|
|
623
|
+
severity :success
|
|
624
|
+
|
|
625
|
+
schema do
|
|
626
|
+
required(:transaction_id).filled(:string)
|
|
627
|
+
required(:order_id).filled(:string)
|
|
628
|
+
required(:user_id).filled(:string)
|
|
629
|
+
required(:amount).filled(:decimal)
|
|
630
|
+
required(:currency).filled(:string)
|
|
631
|
+
required(:payment_method).filled(:string)
|
|
632
|
+
required(:processor).filled(:string) # stripe, paypal, etc.
|
|
633
|
+
|
|
634
|
+
optional(:card_last4).filled(:string)
|
|
635
|
+
optional(:card_brand).filled(:string)
|
|
636
|
+
optional(:billing_country).filled(:string)
|
|
637
|
+
optional(:risk_score).filled(:float)
|
|
638
|
+
optional(:processor_fee).filled(:decimal)
|
|
639
|
+
optional(:net_amount).filled(:decimal)
|
|
640
|
+
|
|
641
|
+
# For tracing
|
|
642
|
+
optional(:trace_id).filled(:string)
|
|
643
|
+
optional(:span_id).filled(:string)
|
|
644
|
+
end
|
|
645
|
+
|
|
646
|
+
# === UC-003: Pattern-Based Metrics ===
|
|
647
|
+
metric :counter,
|
|
648
|
+
name: 'payments.processed.total',
|
|
649
|
+
tags: [:currency, :payment_method, :processor],
|
|
650
|
+
comment: 'Total successful payments'
|
|
651
|
+
|
|
652
|
+
metric :histogram,
|
|
653
|
+
name: 'payments.processed.amount',
|
|
654
|
+
tags: [:currency, :payment_method],
|
|
655
|
+
buckets: [10, 50, 100, 500, 1000, 5000, 10000],
|
|
656
|
+
comment: 'Distribution of payment amounts'
|
|
657
|
+
|
|
658
|
+
metric :histogram,
|
|
659
|
+
name: 'payments.risk_score',
|
|
660
|
+
tags: [:payment_method],
|
|
661
|
+
buckets: [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
|
|
662
|
+
comment: 'Payment risk score distribution'
|
|
663
|
+
|
|
664
|
+
# === UC-004: Zero-Config SLO Tracking ===
|
|
665
|
+
# (Automatically tracked if amount in payload)
|
|
666
|
+
|
|
667
|
+
# === UC-002: Per-Event Adapter Override ===
|
|
668
|
+
# Send to all default adapters + S3 archive
|
|
669
|
+
adapters_strategy :append
|
|
670
|
+
adapters [:s3] # In addition to [:loki, :elasticsearch, :otlp]
|
|
671
|
+
|
|
672
|
+
# === UC-007: PII Filtering ===
|
|
673
|
+
# card_last4, billing_country will be filtered if in PII config
|
|
674
|
+
|
|
675
|
+
# === UC-014: Adaptive Sampling ===
|
|
676
|
+
# High-value payments sampled at 100% (see value_based sampling config)
|
|
677
|
+
|
|
678
|
+
# === UC-015: Cost Optimization ===
|
|
679
|
+
# retention_tagging will add retention_days: 90
|
|
680
|
+
|
|
681
|
+
# Custom validation
|
|
682
|
+
validate do
|
|
683
|
+
if amount <= 0
|
|
684
|
+
errors.add(:amount, 'must be positive')
|
|
685
|
+
end
|
|
686
|
+
|
|
687
|
+
if risk_score && risk_score > 0.9
|
|
688
|
+
errors.add(:risk_score, 'suspiciously high')
|
|
689
|
+
end
|
|
690
|
+
end
|
|
691
|
+
end
|
|
692
|
+
end
|
|
693
|
+
|
|
694
|
+
# Usage with duration measurement
|
|
695
|
+
Events::PaymentProcessed.track(
|
|
696
|
+
transaction_id: 'txn_abc123',
|
|
697
|
+
order_id: 'ord_456',
|
|
698
|
+
user_id: 'usr_789',
|
|
699
|
+
amount: 1999.99,
|
|
700
|
+
currency: 'USD',
|
|
701
|
+
payment_method: 'credit_card',
|
|
702
|
+
processor: 'stripe',
|
|
703
|
+
card_last4: '4242', # Will be filtered
|
|
704
|
+
card_brand: 'visa',
|
|
705
|
+
billing_country: 'US',
|
|
706
|
+
risk_score: 0.12,
|
|
707
|
+
processor_fee: 59.99,
|
|
708
|
+
net_amount: 1940.00
|
|
709
|
+
) do
|
|
710
|
+
# Measure duration of block
|
|
711
|
+
StripePaymentProcessor.charge(...)
|
|
712
|
+
end
|
|
713
|
+
```
|
|
714
|
+
|
|
715
|
+
### 2.3. "Fat" Security/Audit Event
|
|
716
|
+
|
|
717
|
+
```ruby
|
|
718
|
+
# app/events/user_permission_changed.rb
|
|
719
|
+
module Events
|
|
720
|
+
class UserPermissionChanged < E11y::AuditEvent
|
|
721
|
+
# === UC-012: Audit Trail ===
|
|
722
|
+
audit_retention 7.years
|
|
723
|
+
audit_reason 'compliance_regulatory'
|
|
724
|
+
severity :warn
|
|
725
|
+
|
|
726
|
+
signing do
|
|
727
|
+
enabled true # Cryptographically sign this event
|
|
728
|
+
algorithm :ed25519
|
|
729
|
+
end
|
|
730
|
+
|
|
731
|
+
schema do
|
|
732
|
+
required(:user_id).filled(:string)
|
|
733
|
+
required(:user_email).filled(:string)
|
|
734
|
+
required(:old_role).filled(:string)
|
|
735
|
+
required(:new_role).filled(:string)
|
|
736
|
+
required(:changed_by_user_id).filled(:string)
|
|
737
|
+
required(:changed_by_email).filled(:string)
|
|
738
|
+
required(:reason).filled(:string)
|
|
739
|
+
required(:ip_address).filled(:string)
|
|
740
|
+
required(:user_agent).filled(:string)
|
|
741
|
+
|
|
742
|
+
optional(:approval_ticket_id).filled(:string)
|
|
743
|
+
optional(:approval_required).filled(:bool)
|
|
744
|
+
end
|
|
745
|
+
|
|
746
|
+
# === UC-002: Per-Event Adapter Override ===
|
|
747
|
+
# Audit events go to special audit log + Elasticsearch
|
|
748
|
+
adapters [:audit_file, :elasticsearch]
|
|
749
|
+
|
|
750
|
+
# === UC-003: Metrics ===
|
|
751
|
+
metric :counter,
|
|
752
|
+
name: 'security.permissions.changed.total',
|
|
753
|
+
tags: [:old_role, :new_role],
|
|
754
|
+
comment: 'User permission changes'
|
|
755
|
+
|
|
756
|
+
# === UC-005: Sentry Integration ===
|
|
757
|
+
# Don't send to Sentry (not an error)
|
|
758
|
+
exclude_from_sentry true
|
|
759
|
+
|
|
760
|
+
# === UC-007: PII Filtering ===
|
|
761
|
+
# email, ip_address, user_agent will be filtered unless in allowlist
|
|
762
|
+
|
|
763
|
+
# === UC-011: Rate Limiting ===
|
|
764
|
+
# Has dedicated rate limit in config: per_event 'security.*'
|
|
765
|
+
|
|
766
|
+
# === UC-014: Adaptive Sampling ===
|
|
767
|
+
# Security events always sampled at 100% (see content_based.always_sample)
|
|
768
|
+
|
|
769
|
+
# Custom validation
|
|
770
|
+
validate do
|
|
771
|
+
if old_role == new_role
|
|
772
|
+
errors.add(:base, 'role unchanged')
|
|
773
|
+
end
|
|
774
|
+
|
|
775
|
+
VALID_ROLES = %w[user admin superadmin]
|
|
776
|
+
unless VALID_ROLES.include?(old_role) && VALID_ROLES.include?(new_role)
|
|
777
|
+
errors.add(:base, 'invalid role')
|
|
778
|
+
end
|
|
779
|
+
end
|
|
780
|
+
end
|
|
781
|
+
end
|
|
782
|
+
|
|
783
|
+
# Usage
|
|
784
|
+
Events::UserPermissionChanged.track(
|
|
785
|
+
user_id: 'usr_123',
|
|
786
|
+
user_email: 'john@example.com', # Filtered
|
|
787
|
+
old_role: 'user',
|
|
788
|
+
new_role: 'admin',
|
|
789
|
+
changed_by_user_id: 'usr_admin',
|
|
790
|
+
changed_by_email: 'admin@example.com', # Filtered
|
|
791
|
+
reason: 'User promoted to admin for project X',
|
|
792
|
+
ip_address: '192.168.1.1', # Filtered
|
|
793
|
+
user_agent: 'Mozilla/5.0...', # Filtered
|
|
794
|
+
approval_ticket_id: 'JIRA-1234',
|
|
795
|
+
approval_required: true
|
|
796
|
+
)
|
|
797
|
+
```
|
|
798
|
+
|
|
799
|
+
### 2.4. "Fat" Error Event with Full Context
|
|
800
|
+
|
|
801
|
+
```ruby
|
|
802
|
+
# app/events/critical_system_error.rb
|
|
803
|
+
module Events
|
|
804
|
+
class CriticalSystemError < E11y::Event::Base
|
|
805
|
+
# === UC-002: Business Event Tracking ===
|
|
806
|
+
severity :fatal
|
|
807
|
+
|
|
808
|
+
schema do
|
|
809
|
+
required(:error_class).filled(:string)
|
|
810
|
+
required(:error_message).filled(:string)
|
|
811
|
+
required(:error_backtrace).array(:string)
|
|
812
|
+
|
|
813
|
+
required(:controller).filled(:string)
|
|
814
|
+
required(:action).filled(:string)
|
|
815
|
+
required(:http_method).filled(:string)
|
|
816
|
+
required(:path).filled(:string)
|
|
817
|
+
|
|
818
|
+
optional(:user_id).filled(:string)
|
|
819
|
+
optional(:session_id).filled(:string)
|
|
820
|
+
optional(:request_id).filled(:string)
|
|
821
|
+
optional(:trace_id).filled(:string)
|
|
822
|
+
|
|
823
|
+
optional(:params).hash
|
|
824
|
+
optional(:headers).hash
|
|
825
|
+
optional(:environment_variables).hash
|
|
826
|
+
|
|
827
|
+
optional(:database_state).filled(:string)
|
|
828
|
+
optional(:redis_state).filled(:string)
|
|
829
|
+
optional(:external_api_status).hash
|
|
830
|
+
end
|
|
831
|
+
|
|
832
|
+
# === UC-002: Per-Event Adapter Override ===
|
|
833
|
+
# Critical errors go EVERYWHERE
|
|
834
|
+
adapters [:loki, :elasticsearch, :sentry, :pagerduty, :slack, :file]
|
|
835
|
+
|
|
836
|
+
# === UC-003: Metrics ===
|
|
837
|
+
metric :counter,
|
|
838
|
+
name: 'errors.critical.total',
|
|
839
|
+
tags: [:error_class, :controller, :action],
|
|
840
|
+
comment: 'Critical system errors'
|
|
841
|
+
|
|
842
|
+
# === UC-005: Sentry Integration ===
|
|
843
|
+
# Automatically sent to Sentry (severity: fatal)
|
|
844
|
+
sentry_options do
|
|
845
|
+
level :fatal
|
|
846
|
+
|
|
847
|
+
# Custom fingerprint
|
|
848
|
+
fingerprint [:error_class, :controller, :action]
|
|
849
|
+
|
|
850
|
+
# Extra context
|
|
851
|
+
extra do |event|
|
|
852
|
+
{
|
|
853
|
+
database_state: event.payload[:database_state],
|
|
854
|
+
redis_state: event.payload[:redis_state],
|
|
855
|
+
external_api_status: event.payload[:external_api_status]
|
|
856
|
+
}
|
|
857
|
+
end
|
|
858
|
+
|
|
859
|
+
# Tags
|
|
860
|
+
tags do |event|
|
|
861
|
+
{
|
|
862
|
+
controller: event.payload[:controller],
|
|
863
|
+
action: event.payload[:action]
|
|
864
|
+
}
|
|
865
|
+
end
|
|
866
|
+
end
|
|
867
|
+
|
|
868
|
+
# === UC-006: Trace Context ===
|
|
869
|
+
# trace_id automatically extracted from request or generated
|
|
870
|
+
|
|
871
|
+
# === UC-007: PII Filtering ===
|
|
872
|
+
# params, headers, environment_variables will be deeply scanned for PII
|
|
873
|
+
|
|
874
|
+
# === UC-011: Rate Limiting ===
|
|
875
|
+
# Has bypass in config: bypass_for event_types: ['system.critical']
|
|
876
|
+
|
|
877
|
+
# === UC-014: Adaptive Sampling ===
|
|
878
|
+
# Fatal errors always sampled at 100%
|
|
879
|
+
|
|
880
|
+
# === UC-015: Cost Optimization ===
|
|
881
|
+
# payload_minimization will truncate long backtraces
|
|
882
|
+
end
|
|
883
|
+
end
|
|
884
|
+
|
|
885
|
+
# Usage (typically in exception handler)
|
|
886
|
+
begin
|
|
887
|
+
# Some critical operation
|
|
888
|
+
PaymentProcessor.charge(order)
|
|
889
|
+
rescue => e
|
|
890
|
+
Events::CriticalSystemError.track(
|
|
891
|
+
error_class: e.class.name,
|
|
892
|
+
error_message: e.message,
|
|
893
|
+
error_backtrace: e.backtrace,
|
|
894
|
+
|
|
895
|
+
controller: 'OrdersController',
|
|
896
|
+
action: 'create',
|
|
897
|
+
http_method: 'POST',
|
|
898
|
+
path: '/api/orders',
|
|
899
|
+
|
|
900
|
+
user_id: current_user&.id,
|
|
901
|
+
session_id: session.id,
|
|
902
|
+
request_id: request.uuid,
|
|
903
|
+
trace_id: Current.trace_id,
|
|
904
|
+
|
|
905
|
+
params: params.to_unsafe_h, # Will be PII-filtered
|
|
906
|
+
headers: request.headers.to_h, # Will be PII-filtered
|
|
907
|
+
environment_variables: ENV.to_h, # Will be PII-filtered
|
|
908
|
+
|
|
909
|
+
database_state: 'connected',
|
|
910
|
+
redis_state: 'disconnected', # ← The problem!
|
|
911
|
+
external_api_status: {
|
|
912
|
+
stripe: 'healthy',
|
|
913
|
+
sendgrid: 'healthy'
|
|
914
|
+
}
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
# Re-raise
|
|
918
|
+
raise
|
|
919
|
+
end
|
|
920
|
+
```
|
|
921
|
+
|
|
922
|
+
### 2.5. Event Inheritance & Base Classes (NEW - CONTRADICTION_01 Resolution)
|
|
923
|
+
|
|
924
|
+
> **🎯 Pattern:** Use inheritance to share common configuration across related events.
|
|
925
|
+
|
|
926
|
+
**Base class for payment events:**
|
|
927
|
+
|
|
928
|
+
```ruby
|
|
929
|
+
# app/events/base_payment_event.rb
|
|
930
|
+
module Events
|
|
931
|
+
class BasePaymentEvent < E11y::Event::Base
|
|
932
|
+
# Common payment event configuration
|
|
933
|
+
severity :success
|
|
934
|
+
rate_limit 1000
|
|
935
|
+
sample_rate 1.0 # Never sample payments (high-value)
|
|
936
|
+
retention 7.years # Financial records
|
|
937
|
+
adapters [:loki, :sentry, :s3_archive]
|
|
938
|
+
|
|
939
|
+
# Common PII filtering
|
|
940
|
+
pii_filtering do
|
|
941
|
+
hashes :email, :user_id # Pseudonymize for searchability
|
|
942
|
+
allows :order_id, :amount, :currency # Non-PII
|
|
943
|
+
end
|
|
944
|
+
|
|
945
|
+
# Common metric
|
|
946
|
+
metric :counter,
|
|
947
|
+
name: 'payments.total',
|
|
948
|
+
tags: [:currency, :payment_method],
|
|
949
|
+
comment: 'Total payment events'
|
|
950
|
+
end
|
|
951
|
+
end
|
|
952
|
+
|
|
953
|
+
# Inherit from base (1-2 lines per event!)
|
|
954
|
+
class Events::PaymentSucceeded < Events::BasePaymentEvent
|
|
955
|
+
schema do
|
|
956
|
+
required(:transaction_id).filled(:string)
|
|
957
|
+
required(:order_id).filled(:string)
|
|
958
|
+
required(:amount).filled(:decimal)
|
|
959
|
+
required(:currency).filled(:string)
|
|
960
|
+
required(:payment_method).filled(:string)
|
|
961
|
+
end
|
|
962
|
+
# ← Inherits ALL config from BasePaymentEvent!
|
|
963
|
+
end
|
|
964
|
+
|
|
965
|
+
class Events::PaymentFailed < Events::BasePaymentEvent
|
|
966
|
+
severity :error # ← Override base (errors, not success)
|
|
967
|
+
|
|
968
|
+
schema do
|
|
969
|
+
required(:transaction_id).filled(:string)
|
|
970
|
+
required(:order_id).filled(:string)
|
|
971
|
+
required(:amount).filled(:decimal)
|
|
972
|
+
required(:error_code).filled(:string)
|
|
973
|
+
required(:error_message).filled(:string)
|
|
974
|
+
end
|
|
975
|
+
# ← Inherits: rate_limit, sample_rate, retention, adapters, PII rules
|
|
976
|
+
end
|
|
977
|
+
```
|
|
978
|
+
|
|
979
|
+
**Base class for audit events:**
|
|
980
|
+
|
|
981
|
+
```ruby
|
|
982
|
+
# app/events/base_audit_event.rb
|
|
983
|
+
module Events
|
|
984
|
+
class BaseAuditEvent < E11y::Event::Base
|
|
985
|
+
# Common audit configuration
|
|
986
|
+
severity :warn
|
|
987
|
+
audit_event true
|
|
988
|
+
adapters [:audit_encrypted]
|
|
989
|
+
# ← Auto-set by audit_event:
|
|
990
|
+
# retention = E11y.config.audit_retention (default: 7.years, configurable per jurisdiction!)
|
|
991
|
+
# rate_limiting = false (LOCKED - cannot override!)
|
|
992
|
+
# sampling = false (LOCKED - cannot override!)
|
|
993
|
+
|
|
994
|
+
# Cryptographic signing
|
|
995
|
+
signing do
|
|
996
|
+
enabled true
|
|
997
|
+
algorithm :ed25519
|
|
998
|
+
end
|
|
999
|
+
|
|
1000
|
+
# Common audit fields
|
|
1001
|
+
contains_pii true
|
|
1002
|
+
pii_filtering do
|
|
1003
|
+
# Audit: keep original data (GDPR Art. 6(1)(c))
|
|
1004
|
+
# Filtering skipped for :audit_encrypted adapter
|
|
1005
|
+
end
|
|
1006
|
+
end
|
|
1007
|
+
end
|
|
1008
|
+
|
|
1009
|
+
# Inherit from base
|
|
1010
|
+
class Events::UserPermissionChanged < Events::BaseAuditEvent
|
|
1011
|
+
schema do
|
|
1012
|
+
required(:user_id).filled(:string)
|
|
1013
|
+
required(:user_email).filled(:string)
|
|
1014
|
+
required(:old_role).filled(:string)
|
|
1015
|
+
required(:new_role).filled(:string)
|
|
1016
|
+
required(:changed_by_user_id).filled(:string)
|
|
1017
|
+
required(:ip_address).filled(:string)
|
|
1018
|
+
end
|
|
1019
|
+
# ← Inherits: audit_event, adapters, retention, signing, etc.
|
|
1020
|
+
end
|
|
1021
|
+
```
|
|
1022
|
+
|
|
1023
|
+
**Base class for debug events:**
|
|
1024
|
+
|
|
1025
|
+
```ruby
|
|
1026
|
+
# app/events/base_debug_event.rb
|
|
1027
|
+
module Events
|
|
1028
|
+
class BaseDebugEvent < E11y::Event::Base
|
|
1029
|
+
# Common debug configuration
|
|
1030
|
+
severity :debug
|
|
1031
|
+
rate_limit 100 # Low limit
|
|
1032
|
+
sample_rate 0.01 # 1% sampling
|
|
1033
|
+
retention 7.days # Short retention
|
|
1034
|
+
adapters [:file] # Local file only (cheap)
|
|
1035
|
+
|
|
1036
|
+
# No PII in debug events
|
|
1037
|
+
contains_pii false
|
|
1038
|
+
end
|
|
1039
|
+
end
|
|
1040
|
+
|
|
1041
|
+
# Inherit from base
|
|
1042
|
+
class Events::DebugSqlQuery < Events::BaseDebugEvent
|
|
1043
|
+
schema do
|
|
1044
|
+
required(:query).filled(:string)
|
|
1045
|
+
required(:duration_ms).filled(:float)
|
|
1046
|
+
end
|
|
1047
|
+
# ← Inherits: severity, rate_limit, sample_rate, retention, adapters
|
|
1048
|
+
end
|
|
1049
|
+
```
|
|
1050
|
+
|
|
1051
|
+
**Benefits of inheritance:**
|
|
1052
|
+
- ✅ 1-2 lines per event (just schema!)
|
|
1053
|
+
- ✅ DRY (common config shared)
|
|
1054
|
+
- ✅ Consistency (all payments have same config)
|
|
1055
|
+
- ✅ Easy to change (update base class → all events updated)
|
|
1056
|
+
|
|
1057
|
+
---
|
|
1058
|
+
|
|
1059
|
+
### 2.5a. Preset Modules (NEW - CONTRADICTION_01 Resolution)
|
|
1060
|
+
|
|
1061
|
+
> **🎯 Pattern:** Use preset modules for 1-line configuration includes (Rails-style concerns).
|
|
1062
|
+
|
|
1063
|
+
**E11y provides built-in presets:**
|
|
1064
|
+
|
|
1065
|
+
```ruby
|
|
1066
|
+
# lib/e11y/presets/high_value_event.rb
|
|
1067
|
+
module E11y
|
|
1068
|
+
module Presets
|
|
1069
|
+
module HighValueEvent
|
|
1070
|
+
extend ActiveSupport::Concern
|
|
1071
|
+
included do
|
|
1072
|
+
rate_limit 10_000
|
|
1073
|
+
sample_rate 1.0 # Never sample (100%)
|
|
1074
|
+
retention 7.years
|
|
1075
|
+
adapters [:loki, :sentry, :s3_archive]
|
|
1076
|
+
end
|
|
1077
|
+
end
|
|
1078
|
+
|
|
1079
|
+
module DebugEvent
|
|
1080
|
+
extend ActiveSupport::Concern
|
|
1081
|
+
included do
|
|
1082
|
+
severity :debug
|
|
1083
|
+
rate_limit 100
|
|
1084
|
+
sample_rate 0.01 # 1% sampling
|
|
1085
|
+
retention 7.days
|
|
1086
|
+
adapters [:file] # Local only
|
|
1087
|
+
end
|
|
1088
|
+
end
|
|
1089
|
+
|
|
1090
|
+
module AuditEvent
|
|
1091
|
+
extend ActiveSupport::Concern
|
|
1092
|
+
included do
|
|
1093
|
+
audit_event true
|
|
1094
|
+
adapters [:audit_encrypted]
|
|
1095
|
+
# ← Auto-set by audit_event:
|
|
1096
|
+
# retention = E11y.config.audit_retention (configurable!)
|
|
1097
|
+
# rate_limiting = false (LOCKED!)
|
|
1098
|
+
# sampling = false (LOCKED!)
|
|
1099
|
+
end
|
|
1100
|
+
end
|
|
1101
|
+
end
|
|
1102
|
+
end
|
|
1103
|
+
```
|
|
1104
|
+
|
|
1105
|
+
**Usage (1-line includes!):**
|
|
1106
|
+
|
|
1107
|
+
```ruby
|
|
1108
|
+
# High-value event
|
|
1109
|
+
class Events::PaymentProcessed < E11y::Event::Base
|
|
1110
|
+
include E11y::Presets::HighValueEvent # ← All config inherited!
|
|
1111
|
+
|
|
1112
|
+
schema do
|
|
1113
|
+
required(:transaction_id).filled(:string)
|
|
1114
|
+
required(:amount).filled(:decimal)
|
|
1115
|
+
end
|
|
1116
|
+
end
|
|
1117
|
+
|
|
1118
|
+
# Debug event
|
|
1119
|
+
class Events::DebugSqlQuery < E11y::Event::Base
|
|
1120
|
+
include E11y::Presets::DebugEvent # ← All config inherited!
|
|
1121
|
+
|
|
1122
|
+
schema do
|
|
1123
|
+
required(:query).filled(:string)
|
|
1124
|
+
required(:duration_ms).filled(:float)
|
|
1125
|
+
end
|
|
1126
|
+
end
|
|
1127
|
+
|
|
1128
|
+
# Audit event
|
|
1129
|
+
class Events::UserDeleted < E11y::Event::Base
|
|
1130
|
+
include E11y::Presets::AuditEvent # ← All config inherited!
|
|
1131
|
+
|
|
1132
|
+
schema do
|
|
1133
|
+
required(:user_id).filled(:string)
|
|
1134
|
+
required(:deleted_by).filled(:string)
|
|
1135
|
+
required(:reason).filled(:string)
|
|
1136
|
+
end
|
|
1137
|
+
end
|
|
1138
|
+
```
|
|
1139
|
+
|
|
1140
|
+
**Custom presets (project-specific):**
|
|
1141
|
+
|
|
1142
|
+
```ruby
|
|
1143
|
+
# app/events/presets/critical_business_event.rb
|
|
1144
|
+
module Events
|
|
1145
|
+
module Presets
|
|
1146
|
+
module CriticalBusinessEvent
|
|
1147
|
+
extend ActiveSupport::Concern
|
|
1148
|
+
included do
|
|
1149
|
+
severity :success
|
|
1150
|
+
rate_limit 5000
|
|
1151
|
+
sample_rate 1.0 # Never sample
|
|
1152
|
+
retention 7.years
|
|
1153
|
+
adapters [:loki, :elasticsearch, :s3_archive]
|
|
1154
|
+
|
|
1155
|
+
# Send Slack notification
|
|
1156
|
+
adapters_strategy :append
|
|
1157
|
+
adapters [:slack_business]
|
|
1158
|
+
|
|
1159
|
+
# Common metric
|
|
1160
|
+
metric :counter,
|
|
1161
|
+
name: 'critical_business_events.total',
|
|
1162
|
+
tags: [:event_name]
|
|
1163
|
+
end
|
|
1164
|
+
end
|
|
1165
|
+
end
|
|
1166
|
+
end
|
|
1167
|
+
|
|
1168
|
+
# Usage:
|
|
1169
|
+
class Events::LargeOrderPlaced < E11y::Event::Base
|
|
1170
|
+
include Events::Presets::CriticalBusinessEvent
|
|
1171
|
+
|
|
1172
|
+
schema do
|
|
1173
|
+
required(:order_id).filled(:string)
|
|
1174
|
+
required(:amount).filled(:decimal)
|
|
1175
|
+
end
|
|
1176
|
+
end
|
|
1177
|
+
```
|
|
1178
|
+
|
|
1179
|
+
**Benefits:**
|
|
1180
|
+
- ✅ 1-line includes (even simpler than inheritance!)
|
|
1181
|
+
- ✅ Mix multiple presets (include HighValueEvent, AuditEvent)
|
|
1182
|
+
- ✅ Rails-style familiar pattern (ActiveSupport::Concern)
|
|
1183
|
+
- ✅ Easy to create custom presets
|
|
1184
|
+
|
|
1185
|
+
---
|
|
1186
|
+
|
|
1187
|
+
### 2.6. "Fat" Background Job Event
|
|
1188
|
+
|
|
1189
|
+
```ruby
|
|
1190
|
+
# app/events/background_job_executed.rb
|
|
1191
|
+
module Events
|
|
1192
|
+
class BackgroundJobExecuted < E11y::Event::Base
|
|
1193
|
+
# === UC-010: Background Job Tracking ===
|
|
1194
|
+
severity :info
|
|
1195
|
+
|
|
1196
|
+
schema do
|
|
1197
|
+
required(:job_id).filled(:string)
|
|
1198
|
+
required(:job_class).filled(:string)
|
|
1199
|
+
required(:queue_name).filled(:string)
|
|
1200
|
+
required(:status).filled(:string) # enqueued, started, success, failed, retry
|
|
1201
|
+
|
|
1202
|
+
required(:enqueued_at).filled(:time)
|
|
1203
|
+
optional(:started_at).filled(:time)
|
|
1204
|
+
optional(:finished_at).filled(:time)
|
|
1205
|
+
optional(:duration_ms).filled(:float)
|
|
1206
|
+
optional(:queue_latency_ms).filled(:float) # started_at - enqueued_at
|
|
1207
|
+
|
|
1208
|
+
optional(:arguments).array
|
|
1209
|
+
optional(:retry_count).filled(:integer)
|
|
1210
|
+
optional(:error_class).filled(:string)
|
|
1211
|
+
optional(:error_message).filled(:string)
|
|
1212
|
+
|
|
1213
|
+
# === UC-006: Trace Context ===
|
|
1214
|
+
optional(:trace_id).filled(:string)
|
|
1215
|
+
optional(:span_id).filled(:string)
|
|
1216
|
+
optional(:parent_span_id).filled(:string)
|
|
1217
|
+
|
|
1218
|
+
# === UC-009: Multi-Service Tracing ===
|
|
1219
|
+
optional(:origin_service).filled(:string)
|
|
1220
|
+
optional(:origin_request_id).filled(:string)
|
|
1221
|
+
end
|
|
1222
|
+
|
|
1223
|
+
# === UC-003: Metrics ===
|
|
1224
|
+
metric :counter,
|
|
1225
|
+
name: 'background_jobs.executed.total',
|
|
1226
|
+
tags: [:job_class, :queue_name, :status],
|
|
1227
|
+
comment: 'Background jobs executed'
|
|
1228
|
+
|
|
1229
|
+
metric :histogram,
|
|
1230
|
+
name: 'background_jobs.duration',
|
|
1231
|
+
tags: [:job_class, :queue_name],
|
|
1232
|
+
buckets: [10, 50, 100, 500, 1000, 5000, 10000, 30000],
|
|
1233
|
+
comment: 'Job execution duration'
|
|
1234
|
+
|
|
1235
|
+
metric :histogram,
|
|
1236
|
+
name: 'background_jobs.queue_latency',
|
|
1237
|
+
tags: [:job_class, :queue_name],
|
|
1238
|
+
buckets: [10, 100, 1000, 5000, 10000, 30000, 60000],
|
|
1239
|
+
comment: 'Time from enqueue to start'
|
|
1240
|
+
|
|
1241
|
+
# === UC-004: Zero-Config SLO Tracking ===
|
|
1242
|
+
# Automatically tracked for Sidekiq/ActiveJob
|
|
1243
|
+
|
|
1244
|
+
# === UC-014: Adaptive Sampling ===
|
|
1245
|
+
# Job failures sampled at 100%, successes at 10%
|
|
1246
|
+
end
|
|
1247
|
+
end
|
|
1248
|
+
|
|
1249
|
+
# Usage (auto-instrumented, but can be manual)
|
|
1250
|
+
Events::BackgroundJobExecuted.track(
|
|
1251
|
+
job_id: 'jid_abc123',
|
|
1252
|
+
job_class: 'SendWelcomeEmailJob',
|
|
1253
|
+
queue_name: 'mailers',
|
|
1254
|
+
status: 'success',
|
|
1255
|
+
|
|
1256
|
+
enqueued_at: 2.minutes.ago,
|
|
1257
|
+
started_at: 30.seconds.ago,
|
|
1258
|
+
finished_at: Time.now,
|
|
1259
|
+
duration_ms: 1234.56,
|
|
1260
|
+
queue_latency_ms: 90000, # 90 seconds wait
|
|
1261
|
+
|
|
1262
|
+
arguments: ['user_123', { template: 'welcome_v2' }],
|
|
1263
|
+
retry_count: 0,
|
|
1264
|
+
|
|
1265
|
+
# Trace context (propagated from original HTTP request)
|
|
1266
|
+
trace_id: 'trace_xyz789',
|
|
1267
|
+
span_id: 'span_job_001',
|
|
1268
|
+
parent_span_id: 'span_http_request_001',
|
|
1269
|
+
|
|
1270
|
+
origin_service: 'web-api',
|
|
1271
|
+
origin_request_id: 'req_original'
|
|
1272
|
+
)
|
|
1273
|
+
```
|
|
1274
|
+
|
|
1275
|
+
---
|
|
1276
|
+
|
|
1277
|
+
### 2.7. Use Case Coverage: v1.1 Event-Level Configuration
|
|
1278
|
+
|
|
1279
|
+
> **🎯 How Each UC Works in v1.1**
|
|
1280
|
+
>
|
|
1281
|
+
> This section demonstrates how v1.1 event-level configuration + conventions + infrastructure-only global config handles all 22 Use Cases with minimal code.
|
|
1282
|
+
>
|
|
1283
|
+
> **Key Insight:** Most UCs need **0 lines** in global config! Configuration lives where it belongs: in event classes.
|
|
1284
|
+
|
|
1285
|
+
#### UC-001: Request-Scoped Debug Buffering
|
|
1286
|
+
|
|
1287
|
+
**v1.0 (OLD):** 50+ lines in global config for request scope setup
|
|
1288
|
+
**v1.1 (NEW):** Event-level `buffering` DSL
|
|
1289
|
+
|
|
1290
|
+
```ruby
|
|
1291
|
+
# ✅ v1.1: Event-level buffering config
|
|
1292
|
+
class Events::DebugQuery < E11y::Event::Base
|
|
1293
|
+
severity :debug
|
|
1294
|
+
|
|
1295
|
+
buffering :request_scope, # ← Buffer in request, flush on completion
|
|
1296
|
+
max_events: 1000,
|
|
1297
|
+
flush_on: :request_end
|
|
1298
|
+
|
|
1299
|
+
schema { required(:sql).filled(:string) }
|
|
1300
|
+
end
|
|
1301
|
+
|
|
1302
|
+
# Global config: ZERO lines needed! (buffer infrastructure already configured in Section 1)
|
|
1303
|
+
```
|
|
1304
|
+
|
|
1305
|
+
**Lines saved:** 50+ (v1.0 global) → 0 (v1.1 event-level)
|
|
1306
|
+
|
|
1307
|
+
---
|
|
1308
|
+
|
|
1309
|
+
#### UC-002: Business Event Tracking
|
|
1310
|
+
|
|
1311
|
+
**v1.0 (OLD):** Per-event adapter routing in global config
|
|
1312
|
+
**v1.1 (NEW):** Conventions + event-level overrides
|
|
1313
|
+
|
|
1314
|
+
```ruby
|
|
1315
|
+
# ✅ v1.1: Convention-based (zero config!)
|
|
1316
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
1317
|
+
# severity :success ← Auto-inferred from name "Created"
|
|
1318
|
+
# adapters [:loki] ← Auto from severity
|
|
1319
|
+
# sample_rate 1.0 ← Auto from severity (business events = 100%)
|
|
1320
|
+
|
|
1321
|
+
schema do
|
|
1322
|
+
required(:order_id).filled(:string)
|
|
1323
|
+
required(:amount).filled(:float)
|
|
1324
|
+
end
|
|
1325
|
+
end
|
|
1326
|
+
|
|
1327
|
+
# Global config: ZERO lines! Conventions handle everything.
|
|
1328
|
+
# Global context (user_id, tenant_id) enriched via Section 1 hooks.
|
|
1329
|
+
```
|
|
1330
|
+
|
|
1331
|
+
**Lines saved:** 30-40 (v1.0 per-event routing) → 0 (v1.1 conventions)
|
|
1332
|
+
|
|
1333
|
+
---
|
|
1334
|
+
|
|
1335
|
+
#### UC-003: Pattern-Based Metrics
|
|
1336
|
+
|
|
1337
|
+
**v1.0 (OLD):** 100+ lines in global config for metric patterns
|
|
1338
|
+
**v1.1 (NEW):** Event-level `metric` DSL
|
|
1339
|
+
|
|
1340
|
+
```ruby
|
|
1341
|
+
# ✅ v1.1: Event-level metrics (locality of behavior!)
|
|
1342
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
1343
|
+
severity :success
|
|
1344
|
+
|
|
1345
|
+
metric :counter,
|
|
1346
|
+
name: 'orders.created.total',
|
|
1347
|
+
tags: [:payment_method, :country],
|
|
1348
|
+
comment: 'Orders created'
|
|
1349
|
+
|
|
1350
|
+
metric :histogram,
|
|
1351
|
+
name: 'orders.value',
|
|
1352
|
+
tags: [:country],
|
|
1353
|
+
buckets: [10, 50, 100, 500, 1000],
|
|
1354
|
+
comment: 'Order value distribution'
|
|
1355
|
+
|
|
1356
|
+
schema do
|
|
1357
|
+
required(:order_id).filled(:string)
|
|
1358
|
+
required(:amount).filled(:float)
|
|
1359
|
+
required(:payment_method).filled(:string)
|
|
1360
|
+
required(:country).filled(:string)
|
|
1361
|
+
end
|
|
1362
|
+
end
|
|
1363
|
+
|
|
1364
|
+
# Global config: ZERO lines! Metrics defined where they're used.
|
|
1365
|
+
```
|
|
1366
|
+
|
|
1367
|
+
**Lines saved:** 100+ (v1.0 global patterns) → 0 (v1.1 event-level)
|
|
1368
|
+
|
|
1369
|
+
---
|
|
1370
|
+
|
|
1371
|
+
#### UC-004: Zero-Config SLO Tracking
|
|
1372
|
+
|
|
1373
|
+
**v1.0 (OLD):** 80+ lines for SLO definitions per event type
|
|
1374
|
+
**v1.1 (NEW):** Conventions infer SLO targets from severity
|
|
1375
|
+
|
|
1376
|
+
```ruby
|
|
1377
|
+
# ✅ v1.1: Convention-based SLO (zero config!)
|
|
1378
|
+
class Events::ApiRequestCompleted < E11y::Event::Base
|
|
1379
|
+
severity :success
|
|
1380
|
+
# slo_target 0.999 ← Auto from severity (success = 99.9%)
|
|
1381
|
+
# slo_budget 0.001 ← Auto calculated
|
|
1382
|
+
|
|
1383
|
+
schema do
|
|
1384
|
+
required(:status_code).filled(:integer)
|
|
1385
|
+
required(:duration_ms).filled(:float)
|
|
1386
|
+
end
|
|
1387
|
+
end
|
|
1388
|
+
|
|
1389
|
+
# Override only when needed:
|
|
1390
|
+
class Events::CriticalPaymentProcessed < E11y::Event::Base
|
|
1391
|
+
severity :success
|
|
1392
|
+
slo_target 0.9999 # ← Explicit override (99.99% for payments)
|
|
1393
|
+
|
|
1394
|
+
schema { required(:payment_id).filled(:string) }
|
|
1395
|
+
end
|
|
1396
|
+
|
|
1397
|
+
# Global config: ZERO lines! Conventions + optional overrides.
|
|
1398
|
+
```
|
|
1399
|
+
|
|
1400
|
+
**Lines saved:** 80+ (v1.0 per-event SLO) → 0 (v1.1 conventions)
|
|
1401
|
+
|
|
1402
|
+
---
|
|
1403
|
+
|
|
1404
|
+
#### UC-005: Sentry Integration
|
|
1405
|
+
|
|
1406
|
+
**v1.0 (OLD):** 40+ lines for Sentry routing rules in global config
|
|
1407
|
+
**v1.1 (NEW):** Conventions route errors to Sentry automatically
|
|
1408
|
+
|
|
1409
|
+
```ruby
|
|
1410
|
+
# ✅ v1.1: Convention-based (zero config!)
|
|
1411
|
+
class Events::PaymentFailed < E11y::Event::Base
|
|
1412
|
+
severity :error
|
|
1413
|
+
# adapters [:sentry] ← Auto from severity! (errors → Sentry)
|
|
1414
|
+
# sample_rate 1.0 ← Auto (never sample errors!)
|
|
1415
|
+
|
|
1416
|
+
schema do
|
|
1417
|
+
required(:payment_id).filled(:string)
|
|
1418
|
+
required(:error_code).filled(:string)
|
|
1419
|
+
end
|
|
1420
|
+
end
|
|
1421
|
+
|
|
1422
|
+
# Global config: 8 lines (Sentry adapter registration in Section 1)
|
|
1423
|
+
# Routing: 0 lines (convention handles it!)
|
|
1424
|
+
```
|
|
1425
|
+
|
|
1426
|
+
**Lines saved:** 40+ (v1.0 routing rules) → 0 (v1.1 conventions)
|
|
1427
|
+
|
|
1428
|
+
---
|
|
1429
|
+
|
|
1430
|
+
#### UC-006: Trace Context Management
|
|
1431
|
+
|
|
1432
|
+
**v1.0 (OLD):** 60+ lines for trace context propagation in global config
|
|
1433
|
+
**v1.1 (NEW):** Global hooks (infrastructure-level, Section 1)
|
|
1434
|
+
|
|
1435
|
+
```ruby
|
|
1436
|
+
# ✅ v1.1: Global infrastructure (already in Section 1!)
|
|
1437
|
+
E11y.configure do |config|
|
|
1438
|
+
config.context_enricher do |event, context|
|
|
1439
|
+
event.context[:trace_id] = context[:trace_id] || SecureRandom.uuid
|
|
1440
|
+
event.context[:span_id] = SecureRandom.hex(8)
|
|
1441
|
+
event.context[:parent_span_id] = context[:parent_span_id]
|
|
1442
|
+
event
|
|
1443
|
+
end
|
|
1444
|
+
end
|
|
1445
|
+
|
|
1446
|
+
# Event classes: ZERO extra config needed!
|
|
1447
|
+
class Events::ApiRequest < E11y::Event::Base
|
|
1448
|
+
# trace_id/span_id auto-added by global enricher
|
|
1449
|
+
schema { required(:endpoint).filled(:string) }
|
|
1450
|
+
end
|
|
1451
|
+
```
|
|
1452
|
+
|
|
1453
|
+
**Lines saved:** 60+ (v1.0 duplicated per event) → ~20 (v1.1 global hook, reused)
|
|
1454
|
+
|
|
1455
|
+
---
|
|
1456
|
+
|
|
1457
|
+
#### UC-007: PII Filtering
|
|
1458
|
+
|
|
1459
|
+
**v1.0 (OLD):** 70+ lines in global config for PII patterns
|
|
1460
|
+
**v1.1 (NEW):** Event-level `pii_filtering` DSL
|
|
1461
|
+
|
|
1462
|
+
```ruby
|
|
1463
|
+
# ✅ v1.1: Event-level PII config (locality!)
|
|
1464
|
+
class Events::UserRegistered < E11y::Event::Base
|
|
1465
|
+
severity :success
|
|
1466
|
+
|
|
1467
|
+
pii_filtering enabled: true,
|
|
1468
|
+
fields: [:email, :phone, :ip_address],
|
|
1469
|
+
strategy: :hash, # or :redact, :encrypt
|
|
1470
|
+
salt: ENV['PII_SALT']
|
|
1471
|
+
|
|
1472
|
+
schema do
|
|
1473
|
+
required(:user_id).filled(:string)
|
|
1474
|
+
required(:email).filled(:string)
|
|
1475
|
+
required(:phone).filled(:string)
|
|
1476
|
+
optional(:ip_address).filled(:string)
|
|
1477
|
+
end
|
|
1478
|
+
end
|
|
1479
|
+
|
|
1480
|
+
# Global config: ZERO lines! PII filtering per event.
|
|
1481
|
+
```
|
|
1482
|
+
|
|
1483
|
+
**Lines saved:** 70+ (v1.0 global patterns) → 0 (v1.1 event-level)
|
|
1484
|
+
|
|
1485
|
+
---
|
|
1486
|
+
|
|
1487
|
+
#### UC-008: OpenTelemetry Integration
|
|
1488
|
+
|
|
1489
|
+
**v1.0 (OLD):** 50+ lines for OTLP exporter config
|
|
1490
|
+
**v1.1 (NEW):** Adapter registration (Section 1, infrastructure)
|
|
1491
|
+
|
|
1492
|
+
```ruby
|
|
1493
|
+
# ✅ v1.1: Adapter registration (already in Section 1!)
|
|
1494
|
+
config.adapters do
|
|
1495
|
+
register :otlp, E11y::Adapters::OTLPAdapter.new(
|
|
1496
|
+
endpoint: ENV['OTEL_EXPORTER_OTLP_ENDPOINT'],
|
|
1497
|
+
headers: { 'Authorization' => "Bearer #{ENV['OTEL_TOKEN']}" },
|
|
1498
|
+
protocol: :grpc
|
|
1499
|
+
)
|
|
1500
|
+
end
|
|
1501
|
+
|
|
1502
|
+
# Event classes: ZERO extra config!
|
|
1503
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
1504
|
+
# adapters [:loki, :otlp] ← Add :otlp if needed
|
|
1505
|
+
schema { required(:order_id).filled(:string) }
|
|
1506
|
+
end
|
|
1507
|
+
```
|
|
1508
|
+
|
|
1509
|
+
**Lines saved:** 50+ (v1.0 per-event OTLP) → ~10 (v1.1 adapter registration, reused)
|
|
1510
|
+
|
|
1511
|
+
---
|
|
1512
|
+
|
|
1513
|
+
#### UC-009: Multi-Service Tracing
|
|
1514
|
+
|
|
1515
|
+
**v1.0 (OLD):** 80+ lines for cross-service trace propagation
|
|
1516
|
+
**v1.1 (NEW):** Global hooks + event-level schema
|
|
1517
|
+
|
|
1518
|
+
```ruby
|
|
1519
|
+
# ✅ v1.1: Global hook (Section 1) + event schema
|
|
1520
|
+
# Global: Already configured in Section 1 (context_enricher)
|
|
1521
|
+
|
|
1522
|
+
class Events::ServiceCallInitiated < E11y::Event::Base
|
|
1523
|
+
severity :info
|
|
1524
|
+
|
|
1525
|
+
schema do
|
|
1526
|
+
required(:service_name).filled(:string)
|
|
1527
|
+
required(:endpoint).filled(:string)
|
|
1528
|
+
# trace_id, span_id, parent_span_id auto-added by enricher
|
|
1529
|
+
end
|
|
1530
|
+
end
|
|
1531
|
+
|
|
1532
|
+
# Cross-service: E11y.current_context propagated via HTTP headers
|
|
1533
|
+
# No global config needed beyond Section 1 hook!
|
|
1534
|
+
```
|
|
1535
|
+
|
|
1536
|
+
**Lines saved:** 80+ (v1.0) → 0 (v1.1 event-level, hook reused)
|
|
1537
|
+
|
|
1538
|
+
---
|
|
1539
|
+
|
|
1540
|
+
#### UC-010: Background Job Tracking
|
|
1541
|
+
|
|
1542
|
+
**v1.0 (OLD):** 60+ lines for Sidekiq/ActiveJob instrumentation
|
|
1543
|
+
**v1.1 (NEW):** Event-level config + auto-instrumentation
|
|
1544
|
+
|
|
1545
|
+
```ruby
|
|
1546
|
+
# ✅ v1.1: Event class (already shown in Section 2.6!)
|
|
1547
|
+
class Events::BackgroundJobExecuted < E11y::Event::Base
|
|
1548
|
+
severity :info
|
|
1549
|
+
|
|
1550
|
+
metric :counter, name: 'jobs.executed.total', tags: [:job_class, :status]
|
|
1551
|
+
metric :histogram, name: 'jobs.duration', tags: [:job_class]
|
|
1552
|
+
|
|
1553
|
+
schema do
|
|
1554
|
+
required(:job_id).filled(:string)
|
|
1555
|
+
required(:job_class).filled(:string)
|
|
1556
|
+
required(:status).filled(:string)
|
|
1557
|
+
end
|
|
1558
|
+
end
|
|
1559
|
+
|
|
1560
|
+
# Global config: ZERO lines! (auto-instrumentation via Rails integration)
|
|
1561
|
+
```
|
|
1562
|
+
|
|
1563
|
+
**Lines saved:** 60+ (v1.0 instrumentation config) → 0 (v1.1 auto + event-level)
|
|
1564
|
+
|
|
1565
|
+
---
|
|
1566
|
+
|
|
1567
|
+
#### UC-011: Rate Limiting
|
|
1568
|
+
|
|
1569
|
+
**v1.0 (OLD):** 100+ lines for rate limiting rules in global config
|
|
1570
|
+
**v1.1 (NEW):** Event-level `rate_limit` DSL
|
|
1571
|
+
|
|
1572
|
+
```ruby
|
|
1573
|
+
# ✅ v1.1: Event-level rate limiting
|
|
1574
|
+
class Events::UserLogin < E11y::Event::Base
|
|
1575
|
+
severity :info
|
|
1576
|
+
|
|
1577
|
+
rate_limit key: [:user_id],
|
|
1578
|
+
limit: 10,
|
|
1579
|
+
period: 1.minute,
|
|
1580
|
+
on_exceeded: :throttle, # :drop, :sample, :throttle
|
|
1581
|
+
sample_rate: 0.1 # If :throttle
|
|
1582
|
+
|
|
1583
|
+
schema { required(:user_id).filled(:string) }
|
|
1584
|
+
end
|
|
1585
|
+
|
|
1586
|
+
# Global config: ZERO lines! Rate limiting per event.
|
|
1587
|
+
```
|
|
1588
|
+
|
|
1589
|
+
**Lines saved:** 100+ (v1.0 global rules) → 0 (v1.1 event-level)
|
|
1590
|
+
|
|
1591
|
+
---
|
|
1592
|
+
|
|
1593
|
+
#### UC-012: Audit Trail (C01: Two Pipelines) ⚠️
|
|
1594
|
+
|
|
1595
|
+
**v1.0 (OLD):** 90+ lines for audit config in global config
|
|
1596
|
+
**v1.1 (NEW):** Event-level `audit_event` + separate pipeline (C01 Resolution)
|
|
1597
|
+
|
|
1598
|
+
```ruby
|
|
1599
|
+
# ✅ v1.1: Event-level audit config with separate pipeline
|
|
1600
|
+
class Events::GdprDeletionRequested < E11y::Event::Base
|
|
1601
|
+
audit_event true # ← Uses SEPARATE AUDIT PIPELINE (C01)
|
|
1602
|
+
# ← Locks: rate_limiting=false, sampling=false
|
|
1603
|
+
# ← NO PII filtering (signs ORIGINAL data!)
|
|
1604
|
+
|
|
1605
|
+
retention 10.years # ← Override global default (EU: 7y, US: 10y)
|
|
1606
|
+
severity :warn
|
|
1607
|
+
|
|
1608
|
+
schema do
|
|
1609
|
+
required(:user_id).filled(:string)
|
|
1610
|
+
required(:reason).filled(:string)
|
|
1611
|
+
required(:requested_by).filled(:string)
|
|
1612
|
+
required(:admin_email).filled(:string) # ← PII preserved for non-repudiation!
|
|
1613
|
+
required(:ip_address).filled(:string) # ← PII preserved for legal compliance
|
|
1614
|
+
end
|
|
1615
|
+
end
|
|
1616
|
+
|
|
1617
|
+
# === Pipeline Routing (Automatic) ===
|
|
1618
|
+
#
|
|
1619
|
+
# Standard Events (audit_event false):
|
|
1620
|
+
# 1. Validation → 2. PII Filtering ✅ → 3. Sampling ✅ → 4. Adapters
|
|
1621
|
+
#
|
|
1622
|
+
# Audit Events (audit_event true):
|
|
1623
|
+
# 1. Validation → 2. Cryptographic Signing ✅ (ORIGINAL data!)
|
|
1624
|
+
# → 3. Encryption (AES-256-GCM) → 4. Audit Adapter
|
|
1625
|
+
# → NO PII filtering (C01: non-repudiation requirement)
|
|
1626
|
+
# → NO rate limiting (audit events never dropped)
|
|
1627
|
+
# → NO sampling (100% captured)
|
|
1628
|
+
#
|
|
1629
|
+
# Compensating Controls:
|
|
1630
|
+
# - ✅ Encryption at rest (AES-256-GCM mandatory)
|
|
1631
|
+
# - ✅ Access control (auditor role only)
|
|
1632
|
+
# - ✅ Separate storage (isolated from app DB)
|
|
1633
|
+
#
|
|
1634
|
+
# Related: ADR-015 §3.3 (C01 Resolution), UC-007 (PII per-adapter)
|
|
1635
|
+
|
|
1636
|
+
# Global config: 5 lines (audit_retention default in Section 1)
|
|
1637
|
+
# Per-event: retention overridable, adapters auto-routed to audit pipeline
|
|
1638
|
+
```
|
|
1639
|
+
|
|
1640
|
+
**Lines saved:** 90+ (v1.0 global audit rules) → ~5 (v1.1 global default + event-level)
|
|
1641
|
+
|
|
1642
|
+
**Key Innovation (C01):** Separate pipeline for audit events that skips PII filtering
|
|
1643
|
+
to preserve non-repudiation (SOX, HIPAA, GDPR Art. 30), with compensating controls
|
|
1644
|
+
(encryption, access control, separate storage).
|
|
1645
|
+
|
|
1646
|
+
---
|
|
1647
|
+
|
|
1648
|
+
#### UC-013: High Cardinality Protection
|
|
1649
|
+
|
|
1650
|
+
**v1.0 (OLD):** 70+ lines for cardinality limits in global config
|
|
1651
|
+
**v1.1 (NEW):** Event-level metric config with `max_cardinality`
|
|
1652
|
+
|
|
1653
|
+
```ruby
|
|
1654
|
+
# ✅ v1.1: Event-level cardinality protection
|
|
1655
|
+
class Events::ApiRequest < E11y::Event::Base
|
|
1656
|
+
severity :info
|
|
1657
|
+
|
|
1658
|
+
metric :counter,
|
|
1659
|
+
name: 'api.requests.total',
|
|
1660
|
+
tags: [:endpoint], # High cardinality!
|
|
1661
|
+
max_cardinality: 1000, # ← Protection!
|
|
1662
|
+
on_exceeded: :aggregate # or :drop, :sample
|
|
1663
|
+
|
|
1664
|
+
schema { required(:endpoint).filled(:string) }
|
|
1665
|
+
end
|
|
1666
|
+
|
|
1667
|
+
# Global config: ZERO lines! Protection per metric.
|
|
1668
|
+
```
|
|
1669
|
+
|
|
1670
|
+
**Lines saved:** 70+ (v1.0 global cardinality) → 0 (v1.1 event-level)
|
|
1671
|
+
|
|
1672
|
+
---
|
|
1673
|
+
|
|
1674
|
+
#### UC-014: Adaptive Sampling (C11: Stratified Sampling) ⚠️
|
|
1675
|
+
|
|
1676
|
+
**v1.0 (OLD):** 120+ lines for adaptive sampling strategies
|
|
1677
|
+
**v1.1 (NEW):** Conventions + stratified sampling by severity (C11 Resolution)
|
|
1678
|
+
|
|
1679
|
+
```ruby
|
|
1680
|
+
# ✅ v1.1: Convention-based stratified sampling (auto!)
|
|
1681
|
+
class Events::ApiRequest < E11y::Event::Base
|
|
1682
|
+
# === Severity-Based Sampling (C11: Stratified Sampling) ===
|
|
1683
|
+
#
|
|
1684
|
+
# Convention: Severity → Sample Rate (auto!)
|
|
1685
|
+
# :error/:fatal → 1.0 (100%, never sample errors!)
|
|
1686
|
+
# :warn → 0.5 (50%)
|
|
1687
|
+
# :success → 0.1 (10%)
|
|
1688
|
+
# :info → 0.1 (10%)
|
|
1689
|
+
# :debug → 0.01 (1%)
|
|
1690
|
+
#
|
|
1691
|
+
# Why Stratified? Random sampling breaks SLO metrics!
|
|
1692
|
+
# - Errors are rare (5%) but critical → 100% capture
|
|
1693
|
+
# - Success is common (95%) but less critical → 10% sample
|
|
1694
|
+
# → Cost savings: 85.5% reduction while maintaining accuracy
|
|
1695
|
+
|
|
1696
|
+
severity :success
|
|
1697
|
+
# sample_rate 0.1 ← Auto from severity (success = 10%)
|
|
1698
|
+
|
|
1699
|
+
schema do
|
|
1700
|
+
required(:endpoint).filled(:string)
|
|
1701
|
+
required(:status).filled(:integer)
|
|
1702
|
+
end
|
|
1703
|
+
end
|
|
1704
|
+
|
|
1705
|
+
# Override for high-value events:
|
|
1706
|
+
class Events::PaymentProcessed < E11y::Event::Base
|
|
1707
|
+
severity :success
|
|
1708
|
+
sample_rate 1.0 # ← Override: NEVER sample payments (high-value)
|
|
1709
|
+
adaptive_sampling enabled: false # ← Disable adaptive
|
|
1710
|
+
|
|
1711
|
+
schema do
|
|
1712
|
+
required(:amount).filled(:float)
|
|
1713
|
+
required(:payment_id).filled(:string)
|
|
1714
|
+
end
|
|
1715
|
+
end
|
|
1716
|
+
|
|
1717
|
+
# === SLO Calculation with Sampling Correction ===
|
|
1718
|
+
#
|
|
1719
|
+
# Problem: Random sampling (e.g., 10% of ALL events) skews error rates
|
|
1720
|
+
# 1000 requests: 950 success (95%), 50 errors (5%)
|
|
1721
|
+
# Random 10% sample: might get 98 success, 2 errors → 98% success rate ❌ WRONG!
|
|
1722
|
+
#
|
|
1723
|
+
# Solution: Stratified sampling + correction
|
|
1724
|
+
# Sample 50 errors (100% × 50)
|
|
1725
|
+
# Sample 95 success (10% × 950)
|
|
1726
|
+
# Total: 145 events
|
|
1727
|
+
# Corrected success rate: (95/0.1) / ((95/0.1) + (50/1.0)) = 95% ✅ CORRECT!
|
|
1728
|
+
#
|
|
1729
|
+
# Related: ADR-009 §3.7 (C11 Resolution), UC-004 (SLO with correction)
|
|
1730
|
+
|
|
1731
|
+
# Global config: ZERO lines! Conventions + event-level overrides.
|
|
1732
|
+
```
|
|
1733
|
+
|
|
1734
|
+
**Lines saved:** 120+ (v1.0 global strategies) → 0 (v1.1 conventions)
|
|
1735
|
+
|
|
1736
|
+
**Key Innovation (C11):** Stratified sampling by severity preserves error/success ratio
|
|
1737
|
+
for accurate SLO metrics (100% errors, 10% success) while achieving 85.5% cost savings.
|
|
1738
|
+
|
|
1739
|
+
---
|
|
1740
|
+
|
|
1741
|
+
#### UC-015: Cost Optimization
|
|
1742
|
+
|
|
1743
|
+
**v1.0 (OLD):** 150+ lines for cost optimization rules
|
|
1744
|
+
**v1.1 (NEW):** Event-level retention + routing + sampling
|
|
1745
|
+
|
|
1746
|
+
```ruby
|
|
1747
|
+
# ✅ v1.1: Event-level cost optimization
|
|
1748
|
+
class Events::PageView < E11y::Event::Base
|
|
1749
|
+
severity :debug
|
|
1750
|
+
|
|
1751
|
+
# Cost optimization via event-level config:
|
|
1752
|
+
retention 7.days # ← Short retention for cheap events
|
|
1753
|
+
sample_rate 0.01 # ← 1% sampling
|
|
1754
|
+
adapters [:loki] # ← Cheap adapter (not Datadog!)
|
|
1755
|
+
|
|
1756
|
+
compression :zstd, level: 3 # ← Compression
|
|
1757
|
+
|
|
1758
|
+
schema { required(:page_url).filled(:string) }
|
|
1759
|
+
end
|
|
1760
|
+
|
|
1761
|
+
# Global config: ~30 lines (compression settings in Section 1)
|
|
1762
|
+
# Per-event routing/retention: 0 lines (event-level)
|
|
1763
|
+
```
|
|
1764
|
+
|
|
1765
|
+
**Lines saved:** 150+ (v1.0 global cost rules) → ~30 (v1.1 global compression + event-level)
|
|
1766
|
+
|
|
1767
|
+
---
|
|
1768
|
+
|
|
1769
|
+
#### UC-016: Rails Logger Migration
|
|
1770
|
+
|
|
1771
|
+
**v1.0 (OLD):** 40+ lines for Rails.logger compatibility shim
|
|
1772
|
+
**v1.1 (NEW):** Auto-instrumentation (Rails integration, Section 1)
|
|
1773
|
+
|
|
1774
|
+
```ruby
|
|
1775
|
+
# ✅ v1.1: Auto-instrumentation (zero config!)
|
|
1776
|
+
# Rails.logger.info → auto-converted to E11y::Event
|
|
1777
|
+
|
|
1778
|
+
# Global config: Already in Section 1 (Rails integration)
|
|
1779
|
+
# Enable with: config.rails_logger_integration = true
|
|
1780
|
+
|
|
1781
|
+
# Custom events still possible:
|
|
1782
|
+
class Events::RailsLog < E11y::Event::Base
|
|
1783
|
+
severity :info
|
|
1784
|
+
schema { required(:message).filled(:string) }
|
|
1785
|
+
end
|
|
1786
|
+
```
|
|
1787
|
+
|
|
1788
|
+
**Lines saved:** 40+ (v1.0 shim config) → 1 (v1.1 enable flag)
|
|
1789
|
+
|
|
1790
|
+
---
|
|
1791
|
+
|
|
1792
|
+
#### UC-017: Local Development
|
|
1793
|
+
|
|
1794
|
+
**v1.0 (OLD):** 50+ lines for dev environment config
|
|
1795
|
+
**v1.1 (NEW):** Environment-specific adapter routing (conventions)
|
|
1796
|
+
|
|
1797
|
+
```ruby
|
|
1798
|
+
# ✅ v1.1: Convention-based dev config
|
|
1799
|
+
# Global config (Section 1): Adapters registered per environment
|
|
1800
|
+
|
|
1801
|
+
config.adapters do
|
|
1802
|
+
if Rails.env.development?
|
|
1803
|
+
register :file, E11y::Adapters::FileAdapter.new(path: 'log/e11y.log')
|
|
1804
|
+
register :console, E11y::Adapters::ConsoleAdapter.new
|
|
1805
|
+
else
|
|
1806
|
+
register :loki, E11y::Adapters::LokiAdapter.new(url: ENV['LOKI_URL'])
|
|
1807
|
+
end
|
|
1808
|
+
end
|
|
1809
|
+
|
|
1810
|
+
# Event classes: ZERO changes needed!
|
|
1811
|
+
# Conventions route events based on registered adapters.
|
|
1812
|
+
```
|
|
1813
|
+
|
|
1814
|
+
**Lines saved:** 50+ (v1.0 per-env duplication) → ~10 (v1.1 conditional adapter registration)
|
|
1815
|
+
|
|
1816
|
+
---
|
|
1817
|
+
|
|
1818
|
+
#### UC-018: Testing Events
|
|
1819
|
+
|
|
1820
|
+
**v1.0 (OLD):** 60+ lines for test adapter config
|
|
1821
|
+
**v1.1 (NEW):** Test adapter (Section 1) + event classes unchanged
|
|
1822
|
+
|
|
1823
|
+
```ruby
|
|
1824
|
+
# ✅ v1.1: Test adapter (already in Section 1!)
|
|
1825
|
+
# spec/support/e11y.rb
|
|
1826
|
+
E11y.configure do |config|
|
|
1827
|
+
config.adapters do
|
|
1828
|
+
register :test, E11y::Adapters::TestAdapter.new # ← Memory-only
|
|
1829
|
+
end
|
|
1830
|
+
end
|
|
1831
|
+
|
|
1832
|
+
# Tests: Query captured events
|
|
1833
|
+
RSpec.describe 'Order creation' do
|
|
1834
|
+
it 'tracks order.created event' do
|
|
1835
|
+
create_order
|
|
1836
|
+
|
|
1837
|
+
event = E11y.adapter(:test).events.last
|
|
1838
|
+
expect(event.name).to eq('order.created')
|
|
1839
|
+
expect(event.payload[:order_id]).to eq('123')
|
|
1840
|
+
end
|
|
1841
|
+
end
|
|
1842
|
+
|
|
1843
|
+
# Event classes: ZERO changes! Same code in dev/test/prod.
|
|
1844
|
+
```
|
|
1845
|
+
|
|
1846
|
+
**Lines saved:** 60+ (v1.0 test-specific config) → ~5 (v1.1 test adapter registration)
|
|
1847
|
+
|
|
1848
|
+
---
|
|
1849
|
+
|
|
1850
|
+
#### UC-019: Tiered Storage (Retention Tagging)
|
|
1851
|
+
|
|
1852
|
+
**v1.0 (OLD):** 80+ lines for tiered storage rules in global config
|
|
1853
|
+
**v1.1 (NEW):** Event-level retention + adapter-level tiering
|
|
1854
|
+
|
|
1855
|
+
```ruby
|
|
1856
|
+
# ✅ v1.1: Event-level retention
|
|
1857
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
1858
|
+
severity :success
|
|
1859
|
+
retention 30.days # ← Business event: 30 days
|
|
1860
|
+
adapters [:loki, :s3]
|
|
1861
|
+
end
|
|
1862
|
+
|
|
1863
|
+
class Events::AuditLog < E11y::Event::Base
|
|
1864
|
+
audit_event true
|
|
1865
|
+
retention 7.years # ← Audit: 7 years
|
|
1866
|
+
adapters [:loki, :s3_glacier] # ← Tiered adapters
|
|
1867
|
+
end
|
|
1868
|
+
|
|
1869
|
+
# Global config: Adapter-level tiering (Section 1)
|
|
1870
|
+
config.adapters do
|
|
1871
|
+
register :loki, E11y::Adapters::LokiAdapter.new(retention: 30.days)
|
|
1872
|
+
register :s3_glacier, E11y::Adapters::S3Adapter.new(
|
|
1873
|
+
storage_class: 'GLACIER',
|
|
1874
|
+
retention: 7.years
|
|
1875
|
+
)
|
|
1876
|
+
end
|
|
1877
|
+
|
|
1878
|
+
# Downstream (ES/S3): Use retention_until field (auto-added by E11y)
|
|
1879
|
+
```
|
|
1880
|
+
|
|
1881
|
+
**Lines saved:** 80+ (v1.0 global tiering) → ~20 (v1.1 adapter-level + event-level)
|
|
1882
|
+
|
|
1883
|
+
---
|
|
1884
|
+
|
|
1885
|
+
#### UC-020: Event Versioning
|
|
1886
|
+
|
|
1887
|
+
**v1.0 (OLD):** 50+ lines for versioning config in global config
|
|
1888
|
+
**v1.1 (NEW):** Event-level `version` DSL
|
|
1889
|
+
|
|
1890
|
+
```ruby
|
|
1891
|
+
# ✅ v1.1: Event-level versioning
|
|
1892
|
+
class Events::OrderCreated < E11y::Event::Base
|
|
1893
|
+
version 2 # ← Event schema version
|
|
1894
|
+
|
|
1895
|
+
schema do
|
|
1896
|
+
required(:order_id).filled(:string)
|
|
1897
|
+
required(:amount_cents).filled(:integer) # v2: changed from :amount
|
|
1898
|
+
optional(:currency).filled(:string) # v2: added
|
|
1899
|
+
end
|
|
1900
|
+
end
|
|
1901
|
+
|
|
1902
|
+
# Global config: ZERO lines! Versioning per event.
|
|
1903
|
+
# Version added to event metadata automatically.
|
|
1904
|
+
```
|
|
1905
|
+
|
|
1906
|
+
**Lines saved:** 50+ (v1.0 global versioning) → 0 (v1.1 event-level)
|
|
1907
|
+
|
|
1908
|
+
---
|
|
1909
|
+
|
|
1910
|
+
#### UC-021: Error Handling & Retry (Circuit Breaker, DLQ)
|
|
1911
|
+
|
|
1912
|
+
**v1.0 (OLD):** 100+ lines for circuit breaker + DLQ config
|
|
1913
|
+
**v1.1 (NEW):** Infrastructure-level (Section 1) + event-level retry
|
|
1914
|
+
|
|
1915
|
+
```ruby
|
|
1916
|
+
# ✅ v1.1: Infrastructure in Section 1 (already configured!)
|
|
1917
|
+
config.circuit_breaker do
|
|
1918
|
+
enabled true
|
|
1919
|
+
per_adapter true
|
|
1920
|
+
failure_threshold 5
|
|
1921
|
+
timeout 30.seconds
|
|
1922
|
+
end
|
|
1923
|
+
|
|
1924
|
+
config.dead_letter_queue do
|
|
1925
|
+
enabled true
|
|
1926
|
+
adapter :file # or :s3, :redis
|
|
1927
|
+
max_retries 3
|
|
1928
|
+
end
|
|
1929
|
+
|
|
1930
|
+
# Event classes: ZERO extra config!
|
|
1931
|
+
# Circuit breaker + DLQ apply to ALL events automatically.
|
|
1932
|
+
|
|
1933
|
+
# Optional: Per-event retry policy
|
|
1934
|
+
class Events::CriticalPayment < E11y::Event::Base
|
|
1935
|
+
retry_policy max_attempts: 5,
|
|
1936
|
+
backoff: :exponential,
|
|
1937
|
+
max_backoff: 1.minute
|
|
1938
|
+
end
|
|
1939
|
+
```
|
|
1940
|
+
|
|
1941
|
+
**Lines saved:** 100+ (v1.0 duplicated) → ~30 (v1.1 global infrastructure, reused)
|
|
1942
|
+
|
|
1943
|
+
---
|
|
1944
|
+
|
|
1945
|
+
#### UC-022: Event Registry (Schema Discovery)
|
|
1946
|
+
|
|
1947
|
+
**v1.0 (OLD):** 40+ lines for registry export config
|
|
1948
|
+
**v1.1 (NEW):** Auto-generated from event classes (zero config!)
|
|
1949
|
+
|
|
1950
|
+
```ruby
|
|
1951
|
+
# ✅ v1.1: Auto-generated registry (zero config!)
|
|
1952
|
+
# E11y.registry.all_events → returns all event classes with schemas
|
|
1953
|
+
|
|
1954
|
+
# Export registry to JSON (for docs, tooling)
|
|
1955
|
+
rake e11y:registry:export
|
|
1956
|
+
|
|
1957
|
+
# Output: config/e11y_registry.json
|
|
1958
|
+
# {
|
|
1959
|
+
# "events": [
|
|
1960
|
+
# {
|
|
1961
|
+
# "name": "order.created",
|
|
1962
|
+
# "class": "Events::OrderCreated",
|
|
1963
|
+
# "severity": "success",
|
|
1964
|
+
# "schema": { ... },
|
|
1965
|
+
# "version": 1
|
|
1966
|
+
# }
|
|
1967
|
+
# ]
|
|
1968
|
+
# }
|
|
1969
|
+
|
|
1970
|
+
# Global config: ZERO lines! Auto-discovery via Rails autoloading.
|
|
1971
|
+
```
|
|
1972
|
+
|
|
1973
|
+
**Lines saved:** 40+ (v1.0 manual registry) → 0 (v1.1 auto-discovery)
|
|
1974
|
+
|
|
1975
|
+
---
|
|
1976
|
+
|
|
1977
|
+
### 2.7 Summary: v1.1 Configuration Savings
|
|
1978
|
+
|
|
1979
|
+
| Use Case | v1.0 Global Config Lines | v1.1 Global Config Lines | v1.1 Event-Level Lines | Savings |
|
|
1980
|
+
|----------|-------------------------|-------------------------|----------------------|---------|
|
|
1981
|
+
| **UC-001** Request-Scoped Debug | 50 | 0 | 3 | ✅ 50 → 0 |
|
|
1982
|
+
| **UC-002** Business Events | 40 | 0 | 0 (conventions!) | ✅ 40 → 0 |
|
|
1983
|
+
| **UC-003** Metrics | 100 | 0 | 8 | ✅ 100 → 0 |
|
|
1984
|
+
| **UC-004** SLO Tracking | 80 | 0 | 0 (conventions!) | ✅ 80 → 0 |
|
|
1985
|
+
| **UC-005** Sentry | 40 | 8 | 0 (conventions!) | ✅ 40 → 8 |
|
|
1986
|
+
| **UC-006** Trace Context | 60 | 20 | 0 | ✅ 60 → 20 |
|
|
1987
|
+
| **UC-007** PII Filtering | 70 | 0 | 5 | ✅ 70 → 0 |
|
|
1988
|
+
| **UC-008** OpenTelemetry | 50 | 10 | 0 | ✅ 50 → 10 |
|
|
1989
|
+
| **UC-009** Multi-Service Tracing | 80 | 0 | 3 | ✅ 80 → 0 |
|
|
1990
|
+
| **UC-010** Background Jobs | 60 | 0 | 8 | ✅ 60 → 0 |
|
|
1991
|
+
| **UC-011** Rate Limiting | 100 | 0 | 6 | ✅ 100 → 0 |
|
|
1992
|
+
| **UC-012** Audit Trail | 90 | 5 | 4 | ✅ 90 → 5 |
|
|
1993
|
+
| **UC-013** Cardinality Protection | 70 | 0 | 5 | ✅ 70 → 0 |
|
|
1994
|
+
| **UC-014** Adaptive Sampling | 120 | 0 | 0 (conventions!) | ✅ 120 → 0 |
|
|
1995
|
+
| **UC-015** Cost Optimization | 150 | 30 | 6 | ✅ 150 → 30 |
|
|
1996
|
+
| **UC-016** Rails Logger | 40 | 1 | 0 | ✅ 40 → 1 |
|
|
1997
|
+
| **UC-017** Local Development | 50 | 10 | 0 | ✅ 50 → 10 |
|
|
1998
|
+
| **UC-018** Testing | 60 | 5 | 0 | ✅ 60 → 5 |
|
|
1999
|
+
| **UC-019** Tiered Storage | 80 | 20 | 3 | ✅ 80 → 20 |
|
|
2000
|
+
| **UC-020** Event Versioning | 50 | 0 | 1 | ✅ 50 → 0 |
|
|
2001
|
+
| **UC-021** Error Handling & DLQ | 100 | 30 | 0 | ✅ 100 → 30 |
|
|
2002
|
+
| **UC-022** Event Registry | 40 | 0 | 0 (auto!) | ✅ 40 → 0 |
|
|
2003
|
+
| **TOTAL** | **1490 lines** | **139 lines** | **52 lines** | **✅ 1490 → 191 (87% reduction!)** |
|
|
2004
|
+
|
|
2005
|
+
**Key Insights:**
|
|
2006
|
+
|
|
2007
|
+
1. **Infrastructure stays global** (~139 lines): Adapters, buffer, circuit breaker, hooks
|
|
2008
|
+
2. **Event-specific moves to events** (~52 lines avg per UC): Schemas, metrics, retention, PII
|
|
2009
|
+
3. **Conventions eliminate 80% of config**: Severity → adapters, sample rates, SLO targets
|
|
2010
|
+
4. **Total reduction: 87%** (1490 → 191 lines)
|
|
2011
|
+
5. **Maintainability ↑**: Config lives where it's used (locality of behavior)
|
|
2012
|
+
|
|
2013
|
+
**v1.1 Philosophy:**
|
|
2014
|
+
|
|
2015
|
+
```ruby
|
|
2016
|
+
# v1.0: "Configure everything globally" → 1400+ lines, hard to maintain
|
|
2017
|
+
# v1.1: "Configure infrastructure globally, events locally" → <300 lines, easy to maintain
|
|
2018
|
+
```
|
|
2019
|
+
|
|
2020
|
+
---
|
|
2021
|
+
|
|
2022
|
+
## 3. Feature Coverage Matrix (v1.1)
|
|
2023
|
+
|
|
2024
|
+
> **How v1.1 Event-Level Configuration Covers All Use Cases**
|
|
2025
|
+
>
|
|
2026
|
+
> This matrix shows where each UC's configuration lives in v1.1:
|
|
2027
|
+
> - **Global (Infra)**: Infrastructure config in Section 1 (~280 lines)
|
|
2028
|
+
> - **Event-Level**: Config in event classes (locality of behavior)
|
|
2029
|
+
> - **Conventions**: Auto-inferred, zero config needed
|
|
2030
|
+
|
|
2031
|
+
| Use Case | v1.0 Global Lines | v1.1 Global Lines | v1.1 Event Lines | Primary Mechanism | Validation |
|
|
2032
|
+
|----------|------------------|------------------|------------------|-------------------|------------|
|
|
2033
|
+
| **UC-001: Request-Scoped Debug** | 50 | 0 | 3 | Event-level `buffering` DSL | ✅ Buffer type validation |
|
|
2034
|
+
| **UC-002: Business Events** | 40 | 30 (hooks) | 0 | Conventions + global hooks | ✅ Schema required |
|
|
2035
|
+
| **UC-003: Pattern Metrics** | 100 | 0 | 8 | Event-level `metric` DSL | ✅ Metric config validation |
|
|
2036
|
+
| **UC-004: Zero-Config SLO** | 80 | 0 | 0 | Conventions (severity → SLO) | ✅ SLO target 0.0..1.0 |
|
|
2037
|
+
| **UC-005: Sentry** | 40 | 8 (adapter) | 0 | Conventions (error → Sentry) | ✅ Adapter registration check |
|
|
2038
|
+
| **UC-006: Trace Context** | 60 | 20 (enricher) | 0 | Global `context_enricher` hook | ✅ Trace ID format |
|
|
2039
|
+
| **UC-007: PII Filtering** | 70 | 0 | 5 | Event-level `pii_filtering` DSL | ✅ PII strategy validation |
|
|
2040
|
+
| **UC-008: OpenTelemetry** | 50 | 10 (adapter) | 0 | OTLP adapter registration | ✅ OTLP endpoint required |
|
|
2041
|
+
| **UC-009: Multi-Service Tracing** | 80 | 0 (reuse UC-006) | 3 | Event schema + global hook | ✅ Service name required |
|
|
2042
|
+
| **UC-010: Background Jobs** | 60 | 0 | 8 | Event-level config + Rails integration | ✅ Job status enum |
|
|
2043
|
+
| **UC-011: Rate Limiting** | 100 | 0 | 6 | Event-level `rate_limit` DSL | ✅ Limit > 0, period valid |
|
|
2044
|
+
| **UC-012: Audit Trail** | 90 | 5 (retention) | 4 | Event-level `audit_event` + C01 two pipelines | ✅ Locked: rate_limit/sampling |
|
|
2045
|
+
| **UC-013: Cardinality Protection** | 70 | 0 | 5 | Event-level metric `max_cardinality` | ✅ Cardinality > 0 |
|
|
2046
|
+
| **UC-014: Adaptive Sampling** | 120 | 0 | 0 | Conventions + C11 stratified sampling | ✅ Sample rate 0.0..1.0 |
|
|
2047
|
+
| **UC-015: Cost Optimization** | 150 | 30 (compression) | 6 | Event-level retention + compression | ✅ Retention > 0, compression valid |
|
|
2048
|
+
| **UC-016: Rails Logger** | 40 | 1 (enable flag) | 0 | Rails integration auto-capture | ✅ Rails env check |
|
|
2049
|
+
| **UC-017: Local Development** | 50 | 10 (env adapters) | 0 | Environment-specific adapters | ✅ Adapter per env |
|
|
2050
|
+
| **UC-018: Testing Events** | 60 | 5 (test adapter) | 0 | Test adapter registration | ✅ Test adapter present |
|
|
2051
|
+
| **UC-019: Tiered Storage** | 80 | 20 (adapters) | 3 | Adapter-level + event-level retention | ✅ Retention valid, adapters registered |
|
|
2052
|
+
| **UC-020: Event Versioning** | 50 | 0 | 1 | Event-level `version` DSL | ✅ Version > 0 |
|
|
2053
|
+
| **UC-021: Error Handling & DLQ** | 100 | 30 (circuit breaker + DLQ) | 0 | Global infrastructure | ✅ Circuit breaker config valid |
|
|
2054
|
+
| **UC-022: Event Registry** | 40 | 0 | 0 | Auto-discovery via Rails autoloading | ✅ All events discoverable |
|
|
2055
|
+
| **TOTAL** | **1490** | **169** | **52** | **87% reduction** | **✅ Comprehensive** |
|
|
2056
|
+
|
|
2057
|
+
### v1.1 Configuration Distribution
|
|
2058
|
+
|
|
2059
|
+
**Where config lives:**
|
|
2060
|
+
|
|
2061
|
+
1. **Global Infrastructure (169 lines):**
|
|
2062
|
+
- Adapters registry (120 lines) - reused by all UCs
|
|
2063
|
+
- Buffer (30 lines) - UC-001 infrastructure
|
|
2064
|
+
- Circuit breaker (30 lines) - UC-021 infrastructure
|
|
2065
|
+
- Context enricher (20 lines) - UC-006, UC-009 shared
|
|
2066
|
+
- Audit retention (5 lines) - UC-012 default
|
|
2067
|
+
- Compression (30 lines) - UC-015 infrastructure
|
|
2068
|
+
- Rails integration (1 line) - UC-016 enable flag
|
|
2069
|
+
- Test adapter (5 lines) - UC-018 infrastructure
|
|
2070
|
+
- DLQ (30 lines) - UC-021 infrastructure
|
|
2071
|
+
|
|
2072
|
+
2. **Event-Level Config (avg 52 lines per UC):**
|
|
2073
|
+
- Schemas (all events) - UC-002, UC-022
|
|
2074
|
+
- Metrics (8 lines) - UC-003
|
|
2075
|
+
- PII filtering (5 lines) - UC-007
|
|
2076
|
+
- Rate limiting (6 lines) - UC-011
|
|
2077
|
+
- Audit settings (4 lines) - UC-012
|
|
2078
|
+
- Buffering (3 lines) - UC-001
|
|
2079
|
+
- Retention (3 lines) - UC-015, UC-019
|
|
2080
|
+
- Version (1 line) - UC-020
|
|
2081
|
+
|
|
2082
|
+
3. **Conventions (0 lines!):**
|
|
2083
|
+
- Severity → adapters (UC-005)
|
|
2084
|
+
- Severity → sample rate (UC-014)
|
|
2085
|
+
- Severity → SLO target (UC-004)
|
|
2086
|
+
- Event name → severity (UC-002)
|
|
2087
|
+
- Auto-discovery (UC-022)
|
|
2088
|
+
|
|
2089
|
+
### v1.1 Benefits Summary
|
|
2090
|
+
|
|
2091
|
+
**Configuration Simplicity:**
|
|
2092
|
+
- ✅ **87% reduction** in global config (1490 → 169 lines)
|
|
2093
|
+
- ✅ **Locality of behavior** - config lives in event classes
|
|
2094
|
+
- ✅ **DRY** - infrastructure configured once, reused
|
|
2095
|
+
- ✅ **Conventions** - 80% of config inferred automatically
|
|
2096
|
+
|
|
2097
|
+
**Maintainability:**
|
|
2098
|
+
- ✅ **Single source of truth** - event schema + config in one place
|
|
2099
|
+
- ✅ **Type safety** - validations at class load time
|
|
2100
|
+
- ✅ **Refactoring** - change event = change config (no global search)
|
|
2101
|
+
|
|
2102
|
+
**Developer Experience:**
|
|
2103
|
+
- ✅ **Intuitive** - Rails developers feel at home
|
|
2104
|
+
- ✅ **Discoverable** - config visible in event class
|
|
2105
|
+
- ✅ **Safe** - impossible to forget adapter registration
|
|
2106
|
+
|
|
2107
|
+
---
|
|
2108
|
+
|
|
2109
|
+
## 4. Conflict Analysis (v1.1 - RESOLVED ✅)
|
|
2110
|
+
|
|
2111
|
+
> **Status:** All major contradictions analyzed and resolved through v1.1 event-level configuration approach.
|
|
2112
|
+
>
|
|
2113
|
+
> **Reference:** See `docs/researches/final_analysis/contradictions/` for detailed TRIZ analysis.
|
|
2114
|
+
|
|
2115
|
+
### v1.1 Resolution Summary
|
|
2116
|
+
|
|
2117
|
+
**CONTRADICTION_01: Configuration Complexity (PRIMARY)**
|
|
2118
|
+
- ✅ **RESOLVED** through event-level configuration
|
|
2119
|
+
- Solution: Global config (infrastructure only) + Event-level config + Conventions
|
|
2120
|
+
- Result: 1400+ lines → <300 lines (78% reduction)
|
|
2121
|
+
- Details: `contradictions/CONTRADICTION_01_IMPLEMENTATION_SUMMARY.md`
|
|
2122
|
+
|
|
2123
|
+
**CONTRADICTION_02: Buffer Management**
|
|
2124
|
+
- ✅ **RESOLVED** through dual-buffer architecture
|
|
2125
|
+
- Solution: Request-scoped buffer (debug) + Main buffer (all events)
|
|
2126
|
+
- Result: No conflicts, clear separation of concerns
|
|
2127
|
+
- Details: `contradictions/CONTRADICTION_02_BUFFERS.md`
|
|
2128
|
+
|
|
2129
|
+
**CONTRADICTION_03: Sampling Strategies**
|
|
2130
|
+
- ✅ **RESOLVED** through conventions + event-level overrides
|
|
2131
|
+
- Solution: Severity-based default sampling + per-event adaptive strategies
|
|
2132
|
+
- Result: 120+ lines global config → 0 lines (conventions)
|
|
2133
|
+
- Details: `contradictions/CONTRADICTION_03_SAMPLING.md`
|
|
2134
|
+
|
|
2135
|
+
**CONTRADICTION_04: PII Filtering**
|
|
2136
|
+
- ✅ **RESOLVED** through event-level PII config
|
|
2137
|
+
- Solution: Per-event `pii_filtering` DSL with field-level control
|
|
2138
|
+
- Result: 70+ lines global patterns → event-level (locality)
|
|
2139
|
+
- Details: `contradictions/CONTRADICTION_04_PII.md`
|
|
2140
|
+
|
|
2141
|
+
**CONTRADICTION_05: Performance Overhead**
|
|
2142
|
+
- ✅ **RESOLVED** through smart defaults + lazy evaluation + opt-in features
|
|
2143
|
+
- Solution: Zero-allocation fast path + opt-in features (versioning, sampling) + opt-out features (PII filtering, rate limiting)
|
|
2144
|
+
- Result: <100ns per event overhead on happy path
|
|
2145
|
+
- Performance optimization: Opt-out PII filtering saves 0.2ms (20% of budget!), opt-out rate limiting saves 0.01ms
|
|
2146
|
+
- Details: `contradictions/CONTRADICTION_05_PERFORMANCE.md`
|
|
2147
|
+
- See also: ADR-001 Section 12 (Opt-In Features Pattern)
|
|
2148
|
+
|
|
2149
|
+
**CONTRADICTION_06: Multi-Adapter Routing**
|
|
2150
|
+
- ✅ **RESOLVED** through conventions + circuit breaker
|
|
2151
|
+
- Solution: Severity-based routing + per-adapter health checks
|
|
2152
|
+
- Result: Automatic failover, no global routing rules
|
|
2153
|
+
- Details: `contradictions/CONTRADICTION_06_MULTI_ADAPTER.md`
|
|
2154
|
+
|
|
2155
|
+
### Feature Interaction Matrix (v1.1)
|
|
2156
|
+
|
|
2157
|
+
| Feature A | Feature B | Conflict? | Resolution |
|
|
2158
|
+
|-----------|-----------|-----------|------------|
|
|
2159
|
+
| **Request Buffer** | Main Buffer | ❌ No | Dual-buffer: separate concerns |
|
|
2160
|
+
| **Rate Limiting** | Adaptive Sampling | ❌ No | Sequential: rate limit → sampling |
|
|
2161
|
+
| **PII Filtering** | OTEL Semantics | ❌ No | PII applied after semantic conventions |
|
|
2162
|
+
| **Audit Signing** | PII Filtering | ❌ No | Signing after PII (hash stable) |
|
|
2163
|
+
| **Cardinality Protection** | Auto-Metrics | ❌ No | Max cardinality enforced per metric |
|
|
2164
|
+
| **Circuit Breaker** | Multi-Adapter | ❌ No | Per-adapter circuit, others unaffected |
|
|
2165
|
+
| **Compression** | Minimization | ❌ No | Minimize → compress (order matters) |
|
|
2166
|
+
| **Tiered Storage** | Retention | ❌ No | Complementary: retention drives tiering |
|
|
2167
|
+
| **Job Tracing** | Sampling | ❌ No | Same rules as HTTP (severity-based) |
|
|
2168
|
+
|
|
2169
|
+
### Middleware Order (Canonical)
|
|
2170
|
+
|
|
2171
|
+
**v1.1 Middleware Stack (ADR-015):**
|
|
2172
|
+
|
|
2173
|
+
```ruby
|
|
2174
|
+
# Execution order (top to bottom):
|
|
2175
|
+
1. Schema Validation # ← Fail fast on invalid events
|
|
2176
|
+
2. PII Filtering # ← Before any storage/transmission
|
|
2177
|
+
3. Context Enrichment # ← Add trace_id, tenant_id, etc.
|
|
2178
|
+
4. Rate Limiting # ← Drop excess events early
|
|
2179
|
+
5. Adaptive Sampling # ← Intelligent sampling decisions
|
|
2180
|
+
6. Cardinality Protection # ← Protect metrics from explosion
|
|
2181
|
+
7. Compression # ← Reduce payload size
|
|
2182
|
+
8. Circuit Breaker # ← Adapter health check
|
|
2183
|
+
9. Multi-Adapter Routing # ← Send to registered adapters
|
|
2184
|
+
10. Buffer Management # ← Queue for async flush
|
|
2185
|
+
```
|
|
2186
|
+
|
|
2187
|
+
**Key Insight:** v1.1 event-level configuration eliminates most potential conflicts by moving decisions to event definition time (class load) rather than runtime global rules.
|
|
2188
|
+
|
|
2189
|
+
**Reference Documents:**
|
|
2190
|
+
- `docs/researches/final_analysis/contradictions/CONTRADICTION_01_CONFIGURATION.md`
|
|
2191
|
+
- `docs/ADR-015-middleware-order.md`
|
|
2192
|
+
- `docs/ADR-013-reliability-error-handling.md`
|
|
2193
|
+
|
|
2194
|
+
---
|
|
2195
|
+
|
|
2196
|
+
## 5. Unified DSL Validations & Best Practices (NEW - v1.1)
|
|
2197
|
+
|
|
2198
|
+
### 5.1. Automatic Validations
|
|
2199
|
+
|
|
2200
|
+
**All event classes automatically validated at load time:**
|
|
2201
|
+
|
|
2202
|
+
```ruby
|
|
2203
|
+
# Schema presence validation
|
|
2204
|
+
class Events::OrderPaid < E11y::Event::Base
|
|
2205
|
+
# ← ERROR at load: "Events::OrderPaid missing schema!"
|
|
2206
|
+
end
|
|
2207
|
+
|
|
2208
|
+
# Severity validation
|
|
2209
|
+
class Events::OrderPaid < E11y::Event::Base
|
|
2210
|
+
severity :critical # ← ERROR: "Invalid severity: :critical. Valid: debug, info, success, warn, error, fatal"
|
|
2211
|
+
end
|
|
2212
|
+
|
|
2213
|
+
# Adapters validation
|
|
2214
|
+
class Events::OrderPaid < E11y::Event::Base
|
|
2215
|
+
adapters [:loki, :sentri] # ← ERROR: "Unknown adapter: :sentri. Registered: loki, sentry, file"
|
|
2216
|
+
end
|
|
2217
|
+
|
|
2218
|
+
# Rate limit validation
|
|
2219
|
+
class Events::ApiRequest < E11y::Event::Base
|
|
2220
|
+
rate_limit -100 # ← ERROR: "rate_limit must be positive integer, got: -100"
|
|
2221
|
+
end
|
|
2222
|
+
|
|
2223
|
+
# Sample rate validation
|
|
2224
|
+
class Events::DebugLog < E11y::Event::Base
|
|
2225
|
+
sample_rate 1.5 # ← ERROR: "sample_rate must be 0.0..1.0, got: 1.5"
|
|
2226
|
+
end
|
|
2227
|
+
|
|
2228
|
+
# Audit event locked settings
|
|
2229
|
+
class Events::UserDeleted < E11y::Event::Base
|
|
2230
|
+
audit_event true
|
|
2231
|
+
rate_limiting true # ← ERROR: "Cannot enable rate_limiting for audit events!"
|
|
2232
|
+
sampling true # ← ERROR: "Cannot enable sampling for audit events!"
|
|
2233
|
+
end
|
|
2234
|
+
```
|
|
2235
|
+
|
|
2236
|
+
### 5.2. Environment-Specific Configuration Patterns
|
|
2237
|
+
|
|
2238
|
+
**Pattern 1: Adapters per environment**
|
|
2239
|
+
|
|
2240
|
+
```ruby
|
|
2241
|
+
class Events::DebugQuery < E11y::Event::Base
|
|
2242
|
+
adapters Rails.env.production? ? [:loki] : [:file]
|
|
2243
|
+
end
|
|
2244
|
+
```
|
|
2245
|
+
|
|
2246
|
+
**Pattern 2: Rate limits per environment**
|
|
2247
|
+
|
|
2248
|
+
```ruby
|
|
2249
|
+
class Events::ApiRequest < E11y::Event::Base
|
|
2250
|
+
rate_limit case Rails.env
|
|
2251
|
+
when 'production' then 10_000
|
|
2252
|
+
when 'staging' then 1_000
|
|
2253
|
+
else 100
|
|
2254
|
+
end
|
|
2255
|
+
end
|
|
2256
|
+
```
|
|
2257
|
+
|
|
2258
|
+
**Pattern 3: Sampling per environment**
|
|
2259
|
+
|
|
2260
|
+
```ruby
|
|
2261
|
+
class Events::DebugLog < E11y::Event::Base
|
|
2262
|
+
sample_rate Rails.env.production? ? 0.01 : 1.0
|
|
2263
|
+
adaptive_sampling enabled: Rails.env.production?
|
|
2264
|
+
end
|
|
2265
|
+
```
|
|
2266
|
+
|
|
2267
|
+
**Pattern 4: PII per jurisdiction**
|
|
2268
|
+
|
|
2269
|
+
```ruby
|
|
2270
|
+
class Events::UserRegistered < E11y::Event::Base
|
|
2271
|
+
contains_pii true
|
|
2272
|
+
pii_filtering do
|
|
2273
|
+
if ENV['JURISDICTION'] == 'EU'
|
|
2274
|
+
hashes :user_id, algorithm: :sha256 # GDPR pseudonymization
|
|
2275
|
+
else
|
|
2276
|
+
allows :user_id # Non-EU: allow
|
|
2277
|
+
end
|
|
2278
|
+
end
|
|
2279
|
+
end
|
|
2280
|
+
```
|
|
2281
|
+
|
|
2282
|
+
**Pattern 5: Audit retention per jurisdiction**
|
|
2283
|
+
|
|
2284
|
+
```ruby
|
|
2285
|
+
# config/initializers/e11y.rb
|
|
2286
|
+
E11y.configure do |config|
|
|
2287
|
+
config.audit_retention = case ENV['JURISDICTION']
|
|
2288
|
+
when 'EU' then 7.years # GDPR
|
|
2289
|
+
when 'US' then 10.years # SOX/Financial
|
|
2290
|
+
else 5.years
|
|
2291
|
+
end
|
|
2292
|
+
end
|
|
2293
|
+
|
|
2294
|
+
# Event uses configured value:
|
|
2295
|
+
class Events::UserDeleted < E11y::Event::Base
|
|
2296
|
+
audit_event true
|
|
2297
|
+
# ← Auto: retention = E11y.config.audit_retention (configurable!)
|
|
2298
|
+
end
|
|
2299
|
+
```
|
|
2300
|
+
|
|
2301
|
+
### 5.3. Precedence Rules Summary
|
|
2302
|
+
|
|
2303
|
+
**Configuration precedence (highest to lowest):**
|
|
2304
|
+
|
|
2305
|
+
```
|
|
2306
|
+
1. Event-level explicit config (highest priority)
|
|
2307
|
+
↓
|
|
2308
|
+
2. Preset module config
|
|
2309
|
+
↓
|
|
2310
|
+
3. Base class config (inheritance)
|
|
2311
|
+
↓
|
|
2312
|
+
4. Convention-based defaults
|
|
2313
|
+
↓
|
|
2314
|
+
5. Global config (lowest priority)
|
|
2315
|
+
```
|
|
2316
|
+
|
|
2317
|
+
**Example: Complete precedence chain**
|
|
2318
|
+
|
|
2319
|
+
```ruby
|
|
2320
|
+
# Global (lowest)
|
|
2321
|
+
E11y.configure do |config|
|
|
2322
|
+
config.adapters = [:file]
|
|
2323
|
+
config.sample_rate = 0.1
|
|
2324
|
+
config.rate_limit = 1_000
|
|
2325
|
+
end
|
|
2326
|
+
|
|
2327
|
+
# Convention (auto-inferred)
|
|
2328
|
+
# severity :error → sample_rate 1.0, adapters [:sentry]
|
|
2329
|
+
|
|
2330
|
+
# Base class (inheritance)
|
|
2331
|
+
class Events::BasePaymentEvent < E11y::Event::Base
|
|
2332
|
+
severity :success
|
|
2333
|
+
adapters [:loki, :sentry]
|
|
2334
|
+
sample_rate 1.0
|
|
2335
|
+
rate_limit 10_000
|
|
2336
|
+
end
|
|
2337
|
+
|
|
2338
|
+
# Preset (module)
|
|
2339
|
+
module E11y::Presets::HighValueEvent
|
|
2340
|
+
extend ActiveSupport::Concern
|
|
2341
|
+
included do
|
|
2342
|
+
retention 7.years
|
|
2343
|
+
rate_limit 50_000
|
|
2344
|
+
end
|
|
2345
|
+
end
|
|
2346
|
+
|
|
2347
|
+
# Event (highest)
|
|
2348
|
+
class Events::CriticalPayment < Events::BasePaymentEvent
|
|
2349
|
+
include E11y::Presets::HighValueEvent
|
|
2350
|
+
|
|
2351
|
+
adapters [:loki, :sentry, :s3_archive] # Override all
|
|
2352
|
+
|
|
2353
|
+
# Final config:
|
|
2354
|
+
# - severity: :success (base)
|
|
2355
|
+
# - adapters: [:loki, :sentry, :s3_archive] (event override)
|
|
2356
|
+
# - sample_rate: 1.0 (base)
|
|
2357
|
+
# - rate_limit: 50_000 (preset override)
|
|
2358
|
+
# - retention: 7.years (preset)
|
|
2359
|
+
end
|
|
2360
|
+
```
|
|
2361
|
+
|
|
2362
|
+
---
|
|
2363
|
+
|
|
2364
|
+
**Status:** Configuration Complete ✅ (Updated to Unified DSL v1.1.0)
|
|
2365
|
+
**Next Step:** Conflict Analysis 🔍
|
|
2366
|
+
|