e11y 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +4 -0
  3. data/.rubocop.yml +69 -0
  4. data/CHANGELOG.md +26 -0
  5. data/CODE_OF_CONDUCT.md +64 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +179 -0
  8. data/Rakefile +37 -0
  9. data/benchmarks/run_all.rb +33 -0
  10. data/config/README.md +83 -0
  11. data/config/loki-local-config.yaml +35 -0
  12. data/config/prometheus.yml +15 -0
  13. data/docker-compose.yml +78 -0
  14. data/docs/00-ICP-AND-TIMELINE.md +483 -0
  15. data/docs/01-SCALE-REQUIREMENTS.md +858 -0
  16. data/docs/ADR-001-architecture.md +2617 -0
  17. data/docs/ADR-002-metrics-yabeda.md +1395 -0
  18. data/docs/ADR-003-slo-observability.md +3337 -0
  19. data/docs/ADR-004-adapter-architecture.md +2385 -0
  20. data/docs/ADR-005-tracing-context.md +1372 -0
  21. data/docs/ADR-006-security-compliance.md +4143 -0
  22. data/docs/ADR-007-opentelemetry-integration.md +1385 -0
  23. data/docs/ADR-008-rails-integration.md +1911 -0
  24. data/docs/ADR-009-cost-optimization.md +2993 -0
  25. data/docs/ADR-010-developer-experience.md +2166 -0
  26. data/docs/ADR-011-testing-strategy.md +1836 -0
  27. data/docs/ADR-012-event-evolution.md +958 -0
  28. data/docs/ADR-013-reliability-error-handling.md +2750 -0
  29. data/docs/ADR-014-event-driven-slo.md +1533 -0
  30. data/docs/ADR-015-middleware-order.md +1061 -0
  31. data/docs/ADR-016-self-monitoring-slo.md +1234 -0
  32. data/docs/API-REFERENCE-L28.md +914 -0
  33. data/docs/COMPREHENSIVE-CONFIGURATION.md +2366 -0
  34. data/docs/IMPLEMENTATION_NOTES.md +2804 -0
  35. data/docs/IMPLEMENTATION_PLAN.md +1971 -0
  36. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +586 -0
  37. data/docs/PLAN.md +148 -0
  38. data/docs/QUICK-START.md +934 -0
  39. data/docs/README.md +296 -0
  40. data/docs/design/00-memory-optimization.md +593 -0
  41. data/docs/guides/MIGRATION-L27-L28.md +692 -0
  42. data/docs/guides/PERFORMANCE-BENCHMARKS.md +434 -0
  43. data/docs/guides/README.md +44 -0
  44. data/docs/prd/01-overview-vision.md +440 -0
  45. data/docs/use_cases/README.md +119 -0
  46. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +813 -0
  47. data/docs/use_cases/UC-002-business-event-tracking.md +1953 -0
  48. data/docs/use_cases/UC-003-pattern-based-metrics.md +1627 -0
  49. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +728 -0
  50. data/docs/use_cases/UC-005-sentry-integration.md +759 -0
  51. data/docs/use_cases/UC-006-trace-context-management.md +905 -0
  52. data/docs/use_cases/UC-007-pii-filtering.md +2648 -0
  53. data/docs/use_cases/UC-008-opentelemetry-integration.md +1153 -0
  54. data/docs/use_cases/UC-009-multi-service-tracing.md +1043 -0
  55. data/docs/use_cases/UC-010-background-job-tracking.md +1018 -0
  56. data/docs/use_cases/UC-011-rate-limiting.md +1906 -0
  57. data/docs/use_cases/UC-012-audit-trail.md +2301 -0
  58. data/docs/use_cases/UC-013-high-cardinality-protection.md +2127 -0
  59. data/docs/use_cases/UC-014-adaptive-sampling.md +1940 -0
  60. data/docs/use_cases/UC-015-cost-optimization.md +735 -0
  61. data/docs/use_cases/UC-016-rails-logger-migration.md +785 -0
  62. data/docs/use_cases/UC-017-local-development.md +867 -0
  63. data/docs/use_cases/UC-018-testing-events.md +1081 -0
  64. data/docs/use_cases/UC-019-tiered-storage-migration.md +562 -0
  65. data/docs/use_cases/UC-020-event-versioning.md +708 -0
  66. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +956 -0
  67. data/docs/use_cases/UC-022-event-registry.md +648 -0
  68. data/docs/use_cases/backlog.md +226 -0
  69. data/e11y.gemspec +76 -0
  70. data/lib/e11y/adapters/adaptive_batcher.rb +207 -0
  71. data/lib/e11y/adapters/audit_encrypted.rb +239 -0
  72. data/lib/e11y/adapters/base.rb +580 -0
  73. data/lib/e11y/adapters/file.rb +224 -0
  74. data/lib/e11y/adapters/in_memory.rb +216 -0
  75. data/lib/e11y/adapters/loki.rb +333 -0
  76. data/lib/e11y/adapters/otel_logs.rb +203 -0
  77. data/lib/e11y/adapters/registry.rb +141 -0
  78. data/lib/e11y/adapters/sentry.rb +230 -0
  79. data/lib/e11y/adapters/stdout.rb +108 -0
  80. data/lib/e11y/adapters/yabeda.rb +370 -0
  81. data/lib/e11y/buffers/adaptive_buffer.rb +339 -0
  82. data/lib/e11y/buffers/base_buffer.rb +40 -0
  83. data/lib/e11y/buffers/request_scoped_buffer.rb +246 -0
  84. data/lib/e11y/buffers/ring_buffer.rb +267 -0
  85. data/lib/e11y/buffers.rb +14 -0
  86. data/lib/e11y/console.rb +122 -0
  87. data/lib/e11y/current.rb +48 -0
  88. data/lib/e11y/event/base.rb +894 -0
  89. data/lib/e11y/event/value_sampling_config.rb +84 -0
  90. data/lib/e11y/events/base_audit_event.rb +43 -0
  91. data/lib/e11y/events/base_payment_event.rb +33 -0
  92. data/lib/e11y/events/rails/cache/delete.rb +21 -0
  93. data/lib/e11y/events/rails/cache/read.rb +23 -0
  94. data/lib/e11y/events/rails/cache/write.rb +22 -0
  95. data/lib/e11y/events/rails/database/query.rb +45 -0
  96. data/lib/e11y/events/rails/http/redirect.rb +21 -0
  97. data/lib/e11y/events/rails/http/request.rb +26 -0
  98. data/lib/e11y/events/rails/http/send_file.rb +21 -0
  99. data/lib/e11y/events/rails/http/start_processing.rb +26 -0
  100. data/lib/e11y/events/rails/job/completed.rb +22 -0
  101. data/lib/e11y/events/rails/job/enqueued.rb +22 -0
  102. data/lib/e11y/events/rails/job/failed.rb +22 -0
  103. data/lib/e11y/events/rails/job/scheduled.rb +23 -0
  104. data/lib/e11y/events/rails/job/started.rb +22 -0
  105. data/lib/e11y/events/rails/log.rb +56 -0
  106. data/lib/e11y/events/rails/view/render.rb +23 -0
  107. data/lib/e11y/events.rb +18 -0
  108. data/lib/e11y/instruments/active_job.rb +201 -0
  109. data/lib/e11y/instruments/rails_instrumentation.rb +141 -0
  110. data/lib/e11y/instruments/sidekiq.rb +175 -0
  111. data/lib/e11y/logger/bridge.rb +205 -0
  112. data/lib/e11y/metrics/cardinality_protection.rb +172 -0
  113. data/lib/e11y/metrics/cardinality_tracker.rb +134 -0
  114. data/lib/e11y/metrics/registry.rb +234 -0
  115. data/lib/e11y/metrics/relabeling.rb +226 -0
  116. data/lib/e11y/metrics.rb +102 -0
  117. data/lib/e11y/middleware/audit_signing.rb +174 -0
  118. data/lib/e11y/middleware/base.rb +140 -0
  119. data/lib/e11y/middleware/event_slo.rb +167 -0
  120. data/lib/e11y/middleware/pii_filter.rb +266 -0
  121. data/lib/e11y/middleware/pii_filtering.rb +280 -0
  122. data/lib/e11y/middleware/rate_limiting.rb +214 -0
  123. data/lib/e11y/middleware/request.rb +163 -0
  124. data/lib/e11y/middleware/routing.rb +157 -0
  125. data/lib/e11y/middleware/sampling.rb +254 -0
  126. data/lib/e11y/middleware/slo.rb +168 -0
  127. data/lib/e11y/middleware/trace_context.rb +131 -0
  128. data/lib/e11y/middleware/validation.rb +118 -0
  129. data/lib/e11y/middleware/versioning.rb +132 -0
  130. data/lib/e11y/middleware.rb +12 -0
  131. data/lib/e11y/pii/patterns.rb +90 -0
  132. data/lib/e11y/pii.rb +13 -0
  133. data/lib/e11y/pipeline/builder.rb +155 -0
  134. data/lib/e11y/pipeline/zone_validator.rb +110 -0
  135. data/lib/e11y/pipeline.rb +12 -0
  136. data/lib/e11y/presets/audit_event.rb +65 -0
  137. data/lib/e11y/presets/debug_event.rb +34 -0
  138. data/lib/e11y/presets/high_value_event.rb +51 -0
  139. data/lib/e11y/presets.rb +19 -0
  140. data/lib/e11y/railtie.rb +138 -0
  141. data/lib/e11y/reliability/circuit_breaker.rb +216 -0
  142. data/lib/e11y/reliability/dlq/file_storage.rb +277 -0
  143. data/lib/e11y/reliability/dlq/filter.rb +117 -0
  144. data/lib/e11y/reliability/retry_handler.rb +207 -0
  145. data/lib/e11y/reliability/retry_rate_limiter.rb +117 -0
  146. data/lib/e11y/sampling/error_spike_detector.rb +225 -0
  147. data/lib/e11y/sampling/load_monitor.rb +161 -0
  148. data/lib/e11y/sampling/stratified_tracker.rb +92 -0
  149. data/lib/e11y/sampling/value_extractor.rb +82 -0
  150. data/lib/e11y/self_monitoring/buffer_monitor.rb +79 -0
  151. data/lib/e11y/self_monitoring/performance_monitor.rb +97 -0
  152. data/lib/e11y/self_monitoring/reliability_monitor.rb +146 -0
  153. data/lib/e11y/slo/event_driven.rb +150 -0
  154. data/lib/e11y/slo/tracker.rb +119 -0
  155. data/lib/e11y/version.rb +9 -0
  156. data/lib/e11y.rb +283 -0
  157. metadata +452 -0
@@ -0,0 +1,1395 @@
1
+ # ADR-002: Metrics & Yabeda Integration
2
+
3
+ **Status:** Implemented
4
+ **Date:** January 12, 2026 (Updated: January 20, 2026)
5
+ **Covers:** UC-003 (Pattern-Based Metrics), UC-013 (High Cardinality Protection)
6
+ **Depends On:** ADR-001 (Core Architecture)
7
+
8
+ **Implementation Notes:** Refactored to "Rails Way" architecture (January 20, 2026) - see [IMPLEMENTATION_NOTES.md](./IMPLEMENTATION_NOTES.md#2026-01-20-metrics-architecture-refactoring---rails-way-)
9
+
10
+ ---
11
+
12
+ ## 📋 Table of Contents
13
+
14
+ 1. [Context & Problem](#1-context--problem)
15
+ 2. [Architecture Overview](#2-architecture-overview)
16
+ 3. [Pattern-Based Metrics](#3-pattern-based-metrics)
17
+ 4. [Cardinality Protection](#4-cardinality-protection)
18
+ - 4.1. [Four-Layer Defense](#41-four-layer-defense)
19
+ - 4.2. [Layer 1: Universal Denylist](#42-layer-1-universal-denylist)
20
+ - 4.3. [Layer 2: Safe Allowlist](#43-layer-2-safe-allowlist)
21
+ - 4.4. [Layer 3: Per-Metric Limits](#44-layer-3-per-metric-cardinality-limits)
22
+ - 4.5. [Layer 4: Dynamic Actions](#45-layer-4-dynamic-actions)
23
+ - 4.6. [Relabeling Rules](#46-relabeling-rules)
24
+ 5. [Yabeda Integration](#5-yabeda-integration)
25
+ 6. [Self-Monitoring](#6-self-monitoring)
26
+ 7. [Configuration](#7-configuration)
27
+ 8. [Performance](#8-performance)
28
+ 9. [Testing](#9-testing)
29
+ 10. [Trade-offs](#10-trade-offs)
30
+ 11. [FAQ & Critical Clarifications](#11-faq--critical-clarifications)
31
+
32
+ ---
33
+
34
+ ## 0. Rails Way Implementation (2026-01-20)
35
+
36
+ > **🎯 Quick Start:** This section describes the implemented "Rails Way" architecture. For historical context and detailed design decisions, see sections below.
37
+
38
+ ### 0.1. Metrics DSL in Event::Base
39
+
40
+ **Define metrics directly in event classes:**
41
+
42
+ ```ruby
43
+ class Events::OrderCreated < E11y::Event::Base
44
+ schema do
45
+ required(:order_id).filled(:string)
46
+ required(:currency).filled(:string)
47
+ required(:status).filled(:string)
48
+ required(:amount).filled(:float)
49
+ end
50
+
51
+ # Define metrics for this event
52
+ metrics do
53
+ counter :orders_total, tags: [:currency, :status]
54
+ histogram :order_amount, value: :amount, tags: [:currency]
55
+ end
56
+ end
57
+
58
+ # Track event - metrics automatically updated
59
+ Events::OrderCreated.track(
60
+ order_id: "123",
61
+ currency: "USD",
62
+ status: "pending",
63
+ amount: 99.99
64
+ )
65
+
66
+ # Prometheus metrics:
67
+ # orders_total{currency="USD",status="pending"} 1
68
+ # order_amount_bucket{currency="USD",le="100"} 1
69
+ ```
70
+
71
+ ### 0.2. Singleton Registry with Boot-Time Validation
72
+
73
+ **All metrics registered in singleton Registry:**
74
+
75
+ ```ruby
76
+ # Automatic registration from Event::Base DSL
77
+ registry = E11y::Metrics::Registry.instance
78
+
79
+ # Find metrics for event
80
+ metrics = registry.find_matching("Events::OrderCreated")
81
+ # => [{ type: :counter, name: :orders_total, tags: [:currency, :status] }, ...]
82
+
83
+ # Boot-time validation catches conflicts:
84
+ class Events::OrderPaid < E11y::Event::Base
85
+ metrics do
86
+ counter :orders_total, tags: [:currency] # ❌ LabelConflictError!
87
+ # Different labels than OrderCreated - caught at boot time
88
+ end
89
+ end
90
+ ```
91
+
92
+ **Rails Integration (Automatic Validation):**
93
+
94
+ ```ruby
95
+ # lib/e11y/railtie.rb - Automatic validation on Rails boot
96
+ class Railtie < Rails::Railtie
97
+ initializer "e11y.validate_metrics", after: :load_config_initializers do
98
+ Rails.application.config.after_initialize do
99
+ E11y::Metrics::Registry.instance.validate_all!
100
+ Rails.logger.info "E11y: Metrics validated successfully (#{registry.size} metrics)"
101
+ end
102
+ end
103
+ end
104
+
105
+ # Result on boot:
106
+ # E11y: Metrics validated successfully (42 metrics)
107
+ #
108
+ # Or if conflict:
109
+ # E11y::Metrics::Registry::LabelConflictError:
110
+ # Metric "orders_total" label conflict!
111
+ #
112
+ # Existing: [:currency, :status] (from Events::OrderCreated.metrics)
113
+ # New: [:currency] (from Events::OrderPaid.metrics)
114
+ #
115
+ # Fix: Use the same labels everywhere or rename the metric.
116
+ ```
117
+
118
+ **Non-Rails Projects (Manual Validation):**
119
+
120
+ ```ruby
121
+ # config/boot.rb or similar
122
+ require 'e11y'
123
+
124
+ # After loading all event classes
125
+ E11y::Metrics::Registry.instance.validate_all!
126
+ ```
127
+
128
+ ### 0.3. Yabeda Adapter with Integrated Cardinality Protection
129
+
130
+ **Replaces middleware, integrates protection:**
131
+
132
+ ```ruby
133
+ # config/initializers/e11y.rb
134
+ E11y.configure do |config|
135
+ # Yabeda adapter with cardinality protection
136
+ config.adapters[:metrics] = E11y::Adapters::Yabeda.new(
137
+ cardinality_limit: 1000,
138
+ forbidden_labels: [:custom_id]
139
+ )
140
+ end
141
+
142
+ # Adapter automatically:
143
+ # 1. Finds matching metrics from Registry
144
+ # 2. Extracts labels from event data
145
+ # 3. Applies 3-layer cardinality protection
146
+ # 4. Updates Yabeda metrics
147
+ ```
148
+
149
+ ### 0.4. Metric Inheritance and Composition
150
+
151
+ **Base classes for shared metrics:**
152
+
153
+ ```ruby
154
+ class BaseOrderEvent < E11y::Event::Base
155
+ schema do
156
+ required(:order_id).filled(:string)
157
+ required(:currency).filled(:string)
158
+ required(:status).filled(:string)
159
+ end
160
+
161
+ # Shared metric for all order events
162
+ metrics do
163
+ counter :orders_total, tags: [:currency, :status]
164
+ end
165
+ end
166
+
167
+ class Events::OrderCreated < BaseOrderEvent
168
+ # Inherits orders_total metric
169
+ end
170
+
171
+ class Events::OrderPaid < BaseOrderEvent
172
+ # Inherits orders_total + adds own metric
173
+ metrics do
174
+ histogram :order_amount, value: :amount, tags: [:currency]
175
+ end
176
+ end
177
+ ```
178
+
179
+ ### 0.5. Global Metrics via Registry
180
+
181
+ **Pattern-based metrics for multiple events:**
182
+
183
+ ```ruby
184
+ # config/initializers/e11y.rb
185
+ E11y.configure do |config|
186
+ # Global metric for all order.* events
187
+ E11y::Metrics::Registry.instance.register(
188
+ type: :counter,
189
+ pattern: 'order.*', # Matches order.created, order.paid, etc.
190
+ name: :orders_total,
191
+ tags: [:currency, :status],
192
+ source: 'config/initializers/e11y.rb'
193
+ )
194
+ end
195
+ ```
196
+
197
+ ### 0.6. Key Benefits
198
+
199
+ | Feature | Old (Middleware) | New (Rails Way) |
200
+ |---------|------------------|-----------------|
201
+ | **Metric Definition** | Config file | Event class DSL |
202
+ | **Validation** | Runtime | Boot time |
203
+ | **Cardinality Protection** | Separate class | Yabeda adapter |
204
+ | **Inheritance** | Not supported | Full support |
205
+ | **Conflict Detection** | Runtime errors | Boot-time errors |
206
+ | **Complexity** | 4 layers | 3 layers |
207
+
208
+ ---
209
+
210
+ ## 1. Context & Problem
211
+
212
+ ### 1.1. Problem Statement
213
+
214
+ **Current Pain Points:**
215
+
216
+ 1. **Manual Metric Definition:**
217
+ ```ruby
218
+ # ❌ Manual duplication for every event
219
+ Yabeda.orders.increment({}, by: 1)
220
+ Events::OrderCreated.track(...)
221
+ ```
222
+
223
+ 2. **Cardinality Explosions:**
224
+ ```ruby
225
+ # ❌ High-cardinality label → metrics explosion
226
+ Yabeda.api.increment({ user_id: '123456' }) # 1M users = 1M time series!
227
+ ```
228
+
229
+ 3. **Cost & Performance:**
230
+ - Prometheus can't handle high cardinality
231
+ - Query performance degrades
232
+ - Storage costs explode ($$$)
233
+
234
+ ### 1.2. Goals
235
+
236
+ > **⚠️ NOTE (C03 Resolution):** Yabeda is the **default metrics backend** for E11y. OpenTelemetry metrics are **optional** (see ADR-007). Choose ONE backend to avoid double overhead. See [CONFLICT-ANALYSIS.md C03](../researches/CONFLICT-ANALYSIS.md#c03-dual-metrics-collection-overhead) for details.
237
+
238
+ **Primary Goals:**
239
+ - ✅ Auto-create metrics from events (pattern-based)
240
+ - ✅ Prevent cardinality explosions (4-layer defense)
241
+ - ✅ Zero manual metric definitions
242
+ - ✅ Prometheus-friendly
243
+ - ✅ Cost-effective (<10k time series per metric)
244
+
245
+ **Non-Goals:**
246
+ - ❌ Replace Yabeda (we integrate with it)
247
+ - ❌ Custom metrics backend (use Prometheus)
248
+ - ❌ Real-time aggregation (Prometheus handles it)
249
+
250
+ ### 1.3. Success Metrics
251
+
252
+ | Metric | Target | Critical? |
253
+ |--------|--------|-----------|
254
+ | **Auto-metrics** | 100% coverage | ✅ Yes |
255
+ | **Cardinality** | <10k per metric | ✅ Yes |
256
+ | **Overhead** | <0.1ms per event | ✅ Yes |
257
+ | **Cost savings** | $45k/year | ⚠️ Important |
258
+
259
+ ---
260
+
261
+ ## 2. Architecture Overview
262
+
263
+ > **🔄 Architecture Update (2026-01-20):** Refactored to "Rails Way" - metrics defined in Event::Base DSL, singleton Registry, Yabeda adapter replaces middleware.
264
+
265
+ ### 2.1. System Context
266
+
267
+ ```mermaid
268
+ C4Context
269
+ title Metrics System Context (Rails Way)
270
+
271
+ Person(dev, "Developer", "Defines metrics in Event classes")
272
+
273
+ System(e11y, "E11y Gem", "Event tracking with DSL-based metrics")
274
+ System(yabeda, "Yabeda", "Metrics DSL & collection")
275
+ System_Ext(prometheus, "Prometheus", "Metrics storage & queries")
276
+ System_Ext(grafana, "Grafana", "Visualization")
277
+
278
+ Rel(dev, e11y, "Defines metrics", "class Event::Base { metrics { counter :orders_total } }")
279
+ Rel(e11y, yabeda, "Auto-updates metrics", "Yabeda.e11y.orders_total.increment(...)")
280
+ Rel(yabeda, prometheus, "Exposes metrics", "/metrics endpoint")
281
+ Rel(grafana, prometheus, "Queries", "PromQL")
282
+ Rel(prometheus, grafana, "Returns data", "JSON")
283
+ ```
284
+
285
+ ### 2.2. Component Architecture (Rails Way)
286
+
287
+ ```mermaid
288
+ graph TB
289
+ subgraph "Event Definition (Boot Time)"
290
+ EventClass[Event::Base Class]
291
+ MetricsDSL[metrics DSL Block]
292
+ Registry[Singleton Registry]
293
+ end
294
+
295
+ subgraph "Runtime (Event Tracking)"
296
+ Event[Event.track]
297
+ YabedaAdapter[Yabeda Adapter]
298
+ Matcher[Pattern Matcher]
299
+ Cardinality[Cardinality Protection]
300
+ end
301
+
302
+ subgraph "Yabeda"
303
+ Counter[Yabeda Counters]
304
+ Histogram[Yabeda Histograms]
305
+ Gauge[Yabeda Gauges]
306
+ end
307
+
308
+ subgraph "Protection (3-Layer)"
309
+ L1[Layer 1: Denylist]
310
+ L2[Layer 2: Per-Metric Limits]
311
+ L3[Layer 3: Monitoring]
312
+ end
313
+
314
+ EventClass -->|defines| MetricsDSL
315
+ MetricsDSL -->|registers| Registry
316
+ Registry -->|validates conflicts| Registry
317
+
318
+ Event -->|writes to| YabedaAdapter
319
+ YabedaAdapter --> Matcher
320
+ Matcher -->|finds metrics| Registry
321
+ Matcher --> Cardinality
322
+
323
+ Cardinality --> L1
324
+ L1 -->|pass| L2
325
+ L2 -->|pass| L3
326
+ L3 -->|safe labels| Counter
327
+ L3 -->|safe labels| Histogram
328
+ L3 -->|safe labels| Gauge
329
+
330
+ style Registry fill:#d1ecf1
331
+ style Cardinality fill:#f8d7da
332
+ style L1 fill:#fff3cd
333
+ style L2 fill:#fff3cd
334
+ style L3 fill:#d4edda
335
+ ```
336
+
337
+ ### 2.3. Data Flow
338
+
339
+ ```mermaid
340
+ sequenceDiagram
341
+ participant App as Application
342
+ participant Event as Events::OrderPaid
343
+ participant Middleware as Metrics Middleware
344
+ participant Matcher as Pattern Matcher
345
+ participant Protection as Cardinality Protection
346
+ participant Yabeda as Yabeda
347
+ participant Prom as Prometheus
348
+
349
+ App->>Event: .track(order_id: '123', amount: 99.99, status: 'paid')
350
+ Event->>Middleware: process(event_data)
351
+
352
+ Middleware->>Matcher: match_patterns(event_name: 'order.paid')
353
+ Matcher-->>Middleware: [counter: orders.total, histogram: orders.amount]
354
+
355
+ loop For each metric
356
+ Middleware->>Protection: extract_labels(event_data)
357
+ Protection->>Protection: Check denylist (order_id? ❌)
358
+ Protection->>Protection: Check allowlist (status? ✅)
359
+ Protection->>Protection: Check cardinality (3 unique values ✅)
360
+ Protection-->>Middleware: safe_labels: {status: 'paid'}
361
+
362
+ Middleware->>Yabeda: counter.increment(safe_labels)
363
+ Middleware->>Yabeda: histogram.observe(99.99, safe_labels)
364
+ end
365
+
366
+ Note over Prom: Scrape /metrics
367
+ Prom->>Yabeda: GET /metrics
368
+ Yabeda-->>Prom: orders_total{status="paid"} 1542
369
+ ```
370
+
371
+ ---
372
+
373
+ ## 3. Pattern-Based Metrics
374
+
375
+ ### 3.1. Pattern Matching
376
+
377
+ **Design Decision:** Glob-pattern matching for event names.
378
+
379
+ ```ruby
380
+ module E11y
381
+ module Metrics
382
+ class PatternMatcher
383
+ def initialize(patterns)
384
+ @patterns = patterns.map { |p| compile_pattern(p) }
385
+ end
386
+
387
+ def match(event_name)
388
+ @patterns.select { |pattern| pattern.match?(event_name) }
389
+ end
390
+
391
+ private
392
+
393
+ def compile_pattern(pattern_string)
394
+ # Convert glob pattern to regex
395
+ # 'order.*' → /^order\..+$/
396
+ # 'payment.{processed,failed}' → /^payment\.(processed|failed)$/
397
+
398
+ regex_pattern = pattern_string
399
+ .gsub('.', '\\.')
400
+ .gsub('*', '.+')
401
+ .gsub('{', '(')
402
+ .gsub('}', ')')
403
+ .gsub(',', '|')
404
+
405
+ /^#{regex_pattern}$/
406
+ end
407
+ end
408
+ end
409
+ end
410
+ ```
411
+
412
+ **Examples:**
413
+
414
+ ```ruby
415
+ matcher = PatternMatcher.new(['order.*', 'payment.{processed,failed}'])
416
+
417
+ matcher.match('order.created') # ✅ Matches 'order.*'
418
+ matcher.match('order.paid') # ✅ Matches 'order.*'
419
+ matcher.match('payment.processed') # ✅ Matches 'payment.{processed,failed}'
420
+ matcher.match('user.signup') # ❌ No match
421
+ ```
422
+
423
+ ### 3.2. Metric Types
424
+
425
+ **Decision:** Support 3 metric types (Counter, Histogram, Gauge).
426
+
427
+ ```mermaid
428
+ graph TB
429
+ subgraph "Metric Types"
430
+ Counter["Counter<br/>Monotonic increase<br/>Example: orders_total"]
431
+ Histogram["Histogram<br/>Distribution + sum<br/>Example: order_amount"]
432
+ Gauge["Gauge<br/>Current value<br/>Example: active_sessions"]
433
+ end
434
+
435
+ subgraph "Use Cases"
436
+ UC1["Count events<br/>orders_total{status}"]
437
+ UC2["Measure distributions<br/>order_amount{region}"]
438
+ UC3["Track current state<br/>buffer_size"]
439
+ end
440
+
441
+ UC1 --> Counter
442
+ UC2 --> Histogram
443
+ UC3 --> Gauge
444
+
445
+ style Counter fill:#d4edda
446
+ style Histogram fill:#d1ecf1
447
+ style Gauge fill:#fff3cd
448
+ ```
449
+
450
+ **Configuration:**
451
+
452
+ ```ruby
453
+ E11y.configure do |config|
454
+ config.metrics do
455
+ # Counter: count events
456
+ counter pattern: 'order.*',
457
+ name: 'orders_total',
458
+ comment: 'Total orders by status',
459
+ tags: [:status], # ← Extract from payload
460
+ unit: :count
461
+
462
+ # Histogram: measure values
463
+ histogram pattern: 'order.paid',
464
+ name: 'order_amount',
465
+ comment: 'Order payment amounts',
466
+ tags: [:payment_method],
467
+ unit: :dollars,
468
+ buckets: [10, 50, 100, 500, 1000, 5000]
469
+
470
+ # Gauge: current value
471
+ gauge pattern: 'buffer.*',
472
+ name: 'buffer_size',
473
+ comment: 'Current buffer size',
474
+ tags: [:buffer_type],
475
+ unit: :events
476
+ end
477
+ end
478
+ ```
479
+
480
+ ### 3.3. Label Extraction
481
+
482
+ **Decision:** Extract labels from event payload (with cardinality protection).
483
+
484
+ ```ruby
485
+ module E11y
486
+ module Metrics
487
+ class LabelExtractor
488
+ def initialize(allowed_tags, cardinality_config)
489
+ @allowed_tags = allowed_tags
490
+ @cardinality_config = cardinality_config
491
+ end
492
+
493
+ def extract(event_data)
494
+ labels = {}
495
+
496
+ @allowed_tags.each do |tag|
497
+ value = event_data[:payload][tag]
498
+
499
+ next if value.nil?
500
+
501
+ # Apply cardinality protection
502
+ safe_value = @cardinality_config.protect(tag, value)
503
+ labels[tag] = safe_value if safe_value
504
+ end
505
+
506
+ labels
507
+ end
508
+ end
509
+ end
510
+ end
511
+ ```
512
+
513
+ **Example:**
514
+
515
+ ```ruby
516
+ # Event:
517
+ Events::OrderPaid.track(
518
+ order_id: '123456', # ← High cardinality (skip!)
519
+ status: 'paid', # ← Low cardinality ✅
520
+ payment_method: 'card', # ← Low cardinality ✅
521
+ amount: 99.99
522
+ )
523
+
524
+ # Labels extracted:
525
+ # { status: 'paid', payment_method: 'card' }
526
+ # order_id skipped (in denylist)
527
+ ```
528
+
529
+ ---
530
+
531
+ ## 4. Cardinality Protection
532
+
533
+ > **🔄 Simplified (2026-01-20):** Reduced from 4 layers to 3 layers. Removed "Safe Allowlist" (Layer 2) as overengineering for MVP.
534
+
535
+ ### 4.1. Three-Layer Defense
536
+
537
+ **🔑 Critical: Layer Flow Logic**
538
+
539
+ Layers are applied **sequentially** (not simultaneously):
540
+
541
+ 1. **Layer 1 (Universal Denylist):** If label in denylist → DROP, stop processing
542
+ 2. **Layer 2 (Per-Metric Limits):** Track unique values per label, drop if exceeded
543
+ 3. **Layer 3 (Monitoring):** Log warnings when limits exceeded
544
+
545
+ **Example Flow:**
546
+
547
+ ```
548
+ Label: user_id
549
+ → Layer 1: in FORBIDDEN_LABELS? ✅ Yes → DROP ❌ (stop here)
550
+
551
+ Label: status
552
+ → Layer 1: in FORBIDDEN_LABELS? ❌ No → continue
553
+ → Layer 2: in SAFE_LABELS? ✅ Yes → KEEP ✅ (skip Layer 3-4)
554
+
555
+ Label: custom_field
556
+ → Layer 1: in FORBIDDEN_LABELS? ❌ No → continue
557
+ → Layer 2: in SAFE_LABELS? ❌ No → continue
558
+ → Layer 3: cardinality < limit? ❌ No (150 > 100) → continue
559
+ → Layer 4: action=drop → DROP ❌
560
+ ```
561
+
562
+ ```mermaid
563
+ graph TB
564
+ Input[Event Labels] --> L1{Layer 1<br/>Denylist}
565
+
566
+ L1 -->|In denylist| Drop1[❌ Drop Label]
567
+ L1 -->|Not in denylist| L2{Layer 2<br/>Allowlist}
568
+
569
+ L2 -->|In allowlist| Keep[✅ Keep Label]
570
+ L2 -->|Not in allowlist| L3{Layer 3<br/>Cardinality Limit}
571
+
572
+ L3 -->|Under limit| Keep
573
+ L3 -->|Over limit| L4{Layer 4<br/>Dynamic Action}
574
+
575
+ L4 -->|drop| Drop2[❌ Drop Label]
576
+ L4 -->|alert| Alert[🚨 Alert + Drop]
577
+
578
+ Drop1 --> Log1[Log: denylist_hit]
579
+ Drop2 --> Log2[Log: cardinality_exceeded]
580
+ Alert --> PagerDuty[PagerDuty Alert]
581
+
582
+ Keep --> Metric[Export to Yabeda]
583
+
584
+ style L1 fill:#f8d7da
585
+ style L2 fill:#d4edda
586
+ style L3 fill:#fff3cd
587
+ style L4 fill:#d1ecf1
588
+ style Drop1 fill:#f8d7da
589
+ style Drop2 fill:#f8d7da
590
+ style Keep fill:#d4edda
591
+ ```
592
+
593
+ ### 4.2. Layer 1: Universal Denylist
594
+
595
+ **Design Decision:** Forbidden labels (dropped **only for metrics**, preserved for other adapters).
596
+
597
+ **🔑 Critical Clarification:**
598
+ - **Metrics adapters (Yabeda/Prometheus):** Denylist applied, high-cardinality fields dropped
599
+ - **Other adapters (Loki, Sentry, File):** Full payload preserved, no filtering
600
+
601
+ ```ruby
602
+ module E11y
603
+ module Metrics
604
+ class CardinalityProtection
605
+ # Universal denylist (never allow as metric labels)
606
+ # NOTE: These fields are still sent to non-metric adapters!
607
+ FORBIDDEN_LABELS = [
608
+ # IDs (high cardinality)
609
+ :id, :uuid, :guid, :user_id, :order_id, :transaction_id, :session_id,
610
+ :request_id, :trace_id, :span_id,
611
+
612
+ # Personal data (PII)
613
+ :email, :phone, :ip_address, :user_agent,
614
+
615
+ # Timestamps (infinite cardinality)
616
+ :timestamp, :created_at, :updated_at,
617
+
618
+ # Freeform text (unbounded)
619
+ :message, :error_message, :description, :notes,
620
+
621
+ # URLs/paths (high cardinality)
622
+ :url, :path, :referer,
623
+
624
+ # Tokens/secrets
625
+ :token, :api_key, :password, :secret
626
+ ].freeze
627
+
628
+ def in_denylist?(label_name)
629
+ FORBIDDEN_LABELS.include?(label_name.to_sym)
630
+ end
631
+ end
632
+ end
633
+ end
634
+ ```
635
+
636
+ **Example:**
637
+
638
+ ```ruby
639
+ # Event:
640
+ Events::ApiCall.track(
641
+ user_id: '123456', # ❌ Dropped for metrics, ✅ sent to Loki/Sentry
642
+ endpoint: '/api/users', # ✅ Kept for metrics
643
+ status: 200 # ✅ Kept for metrics
644
+ )
645
+
646
+ # Prometheus metric labels (filtered):
647
+ # api_calls_total{endpoint="/api/users", status="200"} 1
648
+
649
+ # Loki/Sentry/File adapters receive full payload (unfiltered):
650
+ # { user_id: '123456', endpoint: '/api/users', status: 200 }
651
+ ```
652
+
653
+ **Architecture Clarification:**
654
+
655
+ ```mermaid
656
+ graph LR
657
+ Event[Event Payload<br/>user_id: 123456<br/>endpoint: /api<br/>status: 200] --> Pipeline[Middleware Chain]
658
+
659
+ Pipeline --> Metrics[Metrics Middleware]
660
+ Pipeline --> Adapters[Adapter Middleware]
661
+
662
+ Metrics --> Filter[Cardinality Filter]
663
+ Filter -->|user_id dropped| Prometheus[Prometheus<br/>endpoint + status only]
664
+
665
+ Adapters --> Loki[Loki<br/>Full payload ✅]
666
+ Adapters --> Sentry[Sentry<br/>Full payload ✅]
667
+ Adapters --> File[File<br/>Full payload ✅]
668
+
669
+ style Filter fill:#f8d7da
670
+ style Prometheus fill:#fff3cd
671
+ style Loki fill:#d4edda
672
+ style Sentry fill:#d4edda
673
+ style File fill:#d4edda
674
+ ```
675
+
676
+ ### 4.3. Layer 2: Safe Allowlist
677
+
678
+ **Decision:** Pre-approved low-cardinality labels.
679
+
680
+ ```ruby
681
+ module E11y
682
+ module Metrics
683
+ class CardinalityProtection
684
+ # Safe labels (low cardinality, always allowed)
685
+ SAFE_LABELS = [
686
+ # Status/state
687
+ :status, :state, :result, :outcome,
688
+
689
+ # Types/categories
690
+ :type, :kind, :category, :class_name,
691
+
692
+ # Methods/operations
693
+ :method, :action, :operation, :command,
694
+
695
+ # Environments
696
+ :env, :environment, :region, :zone, :datacenter,
697
+
698
+ # Services
699
+ :service, :component, :adapter, :backend,
700
+
701
+ # Severities
702
+ :severity, :level, :priority,
703
+
704
+ # HTTP
705
+ :http_method, :http_status, :http_status_class, # (200 → '2xx')
706
+
707
+ # Protocols
708
+ :protocol, :version,
709
+
710
+ # Success/failure
711
+ :success, :error_type, :error_class
712
+ ].freeze
713
+
714
+ def in_allowlist?(label_name)
715
+ SAFE_LABELS.include?(label_name.to_sym)
716
+ end
717
+ end
718
+ end
719
+ end
720
+ ```
721
+
722
+ ### 4.4. Layer 3: Per-Metric Cardinality Limits
723
+
724
+ **Decision:** Track unique values per label, enforce limits.
725
+
726
+ ```ruby
727
+ module E11y
728
+ module Metrics
729
+ class CardinalityTracker
730
+ def initialize(limit: 100)
731
+ @limit = limit
732
+ @trackers = {} # { metric_name: { label_name: Set[values] } }
733
+ @mutex = Mutex.new
734
+ end
735
+
736
+ def check_and_track(metric_name, label_name, value)
737
+ @mutex.synchronize do
738
+ @trackers[metric_name] ||= {}
739
+ @trackers[metric_name][label_name] ||= Set.new
740
+
741
+ tracker = @trackers[metric_name][label_name]
742
+
743
+ if tracker.include?(value)
744
+ # Already seen, OK
745
+ true
746
+ elsif tracker.size < @limit
747
+ # Under limit, add and allow
748
+ tracker.add(value)
749
+ true
750
+ else
751
+ # Over limit, reject
752
+ false
753
+ end
754
+ end
755
+ end
756
+
757
+ def cardinality(metric_name, label_name)
758
+ @trackers.dig(metric_name, label_name)&.size || 0
759
+ end
760
+ end
761
+ end
762
+ end
763
+ ```
764
+
765
+ **Example:**
766
+
767
+ ```ruby
768
+ tracker = CardinalityTracker.new(limit: 3)
769
+
770
+ # OK: add first value
771
+ tracker.check_and_track('orders_total', :status, 'paid') # ✅ true (1/3)
772
+
773
+ # OK: add second value
774
+ tracker.check_and_track('orders_total', :status, 'failed') # ✅ true (2/3)
775
+
776
+ # OK: add third value
777
+ tracker.check_and_track('orders_total', :status, 'pending') # ✅ true (3/3)
778
+
779
+ # REJECTED: over limit
780
+ tracker.check_and_track('orders_total', :status, 'cancelled') # ❌ false (limit!)
781
+
782
+ # OK: already seen
783
+ tracker.check_and_track('orders_total', :status, 'paid') # ✅ true (seen before)
784
+ ```
785
+
786
+ ### 4.5. Layer 4: Dynamic Actions
787
+
788
+ **Decision:** Configurable actions when limits exceeded.
789
+
790
+ ```ruby
791
+ module E11y
792
+ module Metrics
793
+ class CardinalityProtection
794
+ def handle_excess(label_name, value, config)
795
+ case config.action_on_excess
796
+ when :drop
797
+ # Drop label entirely
798
+ log_cardinality_exceeded(label_name, value, action: :drop)
799
+ nil
800
+
801
+ when :alert
802
+ # Alert + drop
803
+ alert_cardinality_exceeded(label_name, value)
804
+ log_cardinality_exceeded(label_name, value, action: :alert)
805
+ nil
806
+ end
807
+ end
808
+ end
809
+ end
810
+ end
811
+ ```
812
+
813
+ **Example:**
814
+
815
+ ```ruby
816
+ # Config: drop on excess
817
+ config.action_on_excess = :drop
818
+
819
+ # Event with high-cardinality label:
820
+ Events::ApiCall.track(
821
+ endpoint: '/api/users',
822
+ customer_id: 'cust_12345' # ← 101st unique value, exceeds limit
823
+ )
824
+
825
+ # Layer 3: cardinality exceeded (100 limit)
826
+ # Layer 4: action=drop → customer_id dropped
827
+
828
+ # Result metric:
829
+ # api_calls_total{endpoint="/api/users"} 1
830
+ # (customer_id not included)
831
+ ```
832
+
833
+ ---
834
+
835
+ **Decision Tree: Which Action to Use?**
836
+
837
+ ```mermaid
838
+ graph TB
839
+ Start[Cardinality Limit Exceeded] --> Q1{Critical to<br/>investigate?}
840
+
841
+ Q1 -->|Yes, ops needs to know| Alert[Action: ALERT<br/>PagerDuty + Drop label]
842
+ Q1 -->|No| Q2{Can group values<br/>into categories?}
843
+
844
+ Q2 -->|Yes, clear categories| Relabel[Use: RELABELING<br/>http_status → 2xx/3xx/4xx]
845
+ Q2 -->|No clear categories| Drop[Action: DROP<br/>Remove label entirely]
846
+
847
+ Drop --> Result1[orders_total 1000]
848
+ Relabel --> Result2[orders_total{status_class=2xx} 1000]
849
+ Alert --> Result3[orders_total 1000<br/>+ PagerDuty alert]
850
+
851
+ style Drop fill:#f8d7da
852
+ style Relabel fill:#d4edda
853
+ style Alert fill:#f8d7da
854
+ ```
855
+
856
+ **Summary Table:**
857
+
858
+ | Action | Cardinality | Signal Preserved | Use When | Example |
859
+ |--------|-------------|------------------|----------|---------|
860
+ | **Drop** | 1 | ❌ None | Label not important | Drop `request_id` |
861
+ | **Relabel** | 5-10 | ✅✅✅ High | Clear categories exist | `200` → `2xx` |
862
+ | **Alert** | 1 | ❌ None + 🚨 | Critical to investigate | Unexpected high cardinality |
863
+
864
+ **Note:** For v1.0, we keep it simple with just **drop** and **alert**. Advanced strategies (hash bucketing, aggregation) can be added in v1.1+ if needed.
865
+
866
+ ### 4.6. Relabeling Rules
867
+
868
+ **Decision:** Transform high-cardinality labels to low-cardinality.
869
+
870
+ ```ruby
871
+ E11y.configure do |config|
872
+ config.metrics.cardinality_protection do
873
+ # Relabel: HTTP status code → status class
874
+ relabel :http_status do |value|
875
+ case value.to_i
876
+ when 100..199 then '1xx'
877
+ when 200..299 then '2xx'
878
+ when 300..399 then '3xx'
879
+ when 400..499 then '4xx'
880
+ when 500..599 then '5xx'
881
+ else 'unknown'
882
+ end
883
+ end
884
+
885
+ # Relabel: path → endpoint pattern
886
+ relabel :path do |value|
887
+ # /users/123 → /users/:id
888
+ # /orders/456 → /orders/:id
889
+ value.gsub(/\/\d+/, '/:id')
890
+ end
891
+
892
+ # Relabel: region → region group
893
+ relabel :region do |value|
894
+ case value
895
+ when /^us-/ then 'us'
896
+ when /^eu-/ then 'eu'
897
+ when /^ap-/ then 'ap'
898
+ else 'other'
899
+ end
900
+ end
901
+ end
902
+ end
903
+ ```
904
+
905
+ ---
906
+
907
+ ## 5. Yabeda Integration
908
+
909
+ ### 5.1. Yabeda Setup
910
+
911
+ ```ruby
912
+ # Auto-register metrics in Yabeda
913
+ module E11y
914
+ module Metrics
915
+ class YabedaIntegration
916
+ def self.setup!
917
+ return unless defined?(Yabeda)
918
+
919
+ Yabeda.configure do
920
+ # Create group for E11y metrics
921
+ group :e11y do
922
+ # Metrics will be auto-registered here
923
+ end
924
+ end
925
+
926
+ # Hook into Yabeda collection
927
+ Yabeda.configure!
928
+ end
929
+
930
+ def self.register_metric(type, name, config)
931
+ Yabeda.e11y.public_send(type, name, **config)
932
+ end
933
+ end
934
+ end
935
+ end
936
+ ```
937
+
938
+ ### 5.2. Metric Registration
939
+
940
+ ```ruby
941
+ # Auto-create Yabeda metrics from config
942
+ E11y.configure do |config|
943
+ config.metrics do
944
+ counter pattern: 'order.*',
945
+ name: :orders_total,
946
+ comment: 'Total orders',
947
+ tags: [:status]
948
+ end
949
+ end
950
+
951
+ # Internally creates:
952
+ Yabeda.e11y.counter :orders_total,
953
+ tags: [:status],
954
+ comment: 'Total orders'
955
+ ```
956
+
957
+ ### 5.3. Metric Updates
958
+
959
+ ```ruby
960
+ module E11y
961
+ module Metrics
962
+ class Middleware < E11y::Middleware
963
+ def call(event_data)
964
+ # Match patterns
965
+ metrics = MetricsRegistry.match(event_data[:event_name])
966
+
967
+ metrics.each do |metric|
968
+ # Extract labels (with cardinality protection)
969
+ labels = extract_labels(event_data, metric.tags)
970
+
971
+ # Update Yabeda metric
972
+ case metric.type
973
+ when :counter
974
+ Yabeda.e11y.public_send(metric.name).increment(labels)
975
+
976
+ when :histogram
977
+ value = event_data[:payload][metric.value_field]
978
+ Yabeda.e11y.public_send(metric.name).observe(labels, value)
979
+
980
+ when :gauge
981
+ value = event_data[:payload][metric.value_field]
982
+ Yabeda.e11y.public_send(metric.name).set(labels, value)
983
+ end
984
+ end
985
+
986
+ @app.call(event_data)
987
+ end
988
+ end
989
+ end
990
+ end
991
+ ```
992
+
993
+ ---
994
+
995
+ ## 6. Self-Monitoring
996
+
997
+ ### 6.1. Cardinality Metrics
998
+
999
+ ```ruby
1000
+ E11y.configure do |config|
1001
+ config.self_monitoring do
1002
+ enabled true
1003
+
1004
+ # Track cardinality per metric
1005
+ gauge :metric_cardinality,
1006
+ tags: [:metric_name, :label_name],
1007
+ comment: 'Current cardinality per label'
1008
+
1009
+ # Track cardinality violations
1010
+ counter :cardinality_violations_total,
1011
+ tags: [:metric_name, :label_name, :action],
1012
+ comment: 'Cardinality limit violations'
1013
+
1014
+ # Track label operations
1015
+ counter :labels_processed_total,
1016
+ tags: [:metric_name, :label_name, :result],
1017
+ comment: 'Labels processed (kept/dropped/hashed)'
1018
+ end
1019
+ end
1020
+ ```
1021
+
1022
+ ### 6.2. Cost Impact Metrics
1023
+
1024
+ ```ruby
1025
+ # Track cost impact
1026
+ E11y.configure do |config|
1027
+ config.self_monitoring do
1028
+ # Time series created
1029
+ counter :timeseries_created_total,
1030
+ tags: [:metric_name],
1031
+ comment: 'New time series created'
1032
+
1033
+ # Estimated cost
1034
+ gauge :estimated_cost_monthly,
1035
+ tags: [:metric_name],
1036
+ comment: 'Estimated monthly cost in USD'
1037
+
1038
+ # Samples per second
1039
+ gauge :metric_samples_per_second,
1040
+ tags: [:metric_name],
1041
+ comment: 'Metric update rate'
1042
+ end
1043
+ end
1044
+ ```
1045
+
1046
+ ---
1047
+
1048
+ ## 7. Configuration
1049
+
1050
+ ### 7.1. Full Configuration Example
1051
+
1052
+ ```ruby
1053
+ # config/initializers/e11y.rb
1054
+ E11y.configure do |config|
1055
+ config.metrics do
1056
+ # Enable metrics
1057
+ enabled true
1058
+
1059
+ # Yabeda integration
1060
+ yabeda_integration true
1061
+
1062
+ # ===== Pattern-Based Metrics =====
1063
+
1064
+ # Counter: count all orders
1065
+ counter pattern: 'order.*',
1066
+ name: :orders_total,
1067
+ comment: 'Total orders by status and payment method',
1068
+ tags: [:status, :payment_method],
1069
+ unit: :count
1070
+
1071
+ # Histogram: order amounts
1072
+ histogram pattern: 'order.paid',
1073
+ name: :order_amount,
1074
+ comment: 'Order payment amounts',
1075
+ tags: [:payment_method, :currency],
1076
+ value_field: :amount, # Extract from payload
1077
+ unit: :dollars,
1078
+ buckets: [10, 50, 100, 500, 1000, 5000, 10000]
1079
+
1080
+ # Histogram: API response times
1081
+ histogram pattern: 'api.*',
1082
+ name: :api_duration_seconds,
1083
+ comment: 'API call durations',
1084
+ tags: [:endpoint, :http_status_class],
1085
+ value_field: :duration,
1086
+ unit: :seconds,
1087
+ buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5]
1088
+
1089
+ # ===== Cardinality Protection =====
1090
+
1091
+ cardinality_protection do
1092
+ # Default limit per label
1093
+ default_cardinality_limit 100
1094
+
1095
+ # Per-metric limits
1096
+ per_metric do
1097
+ metric :orders_total, label: :status, limit: 10
1098
+ metric :orders_total, label: :payment_method, limit: 20
1099
+ metric :api_duration_seconds, label: :endpoint, limit: 500
1100
+ end
1101
+
1102
+ # Action on excess
1103
+ action_on_excess :drop # :drop or :alert
1104
+
1105
+ # Denylist (in addition to universal)
1106
+ forbidden_labels [
1107
+ :customer_id,
1108
+ :internal_ref
1109
+ ]
1110
+
1111
+ # Allowlist (in addition to safe list)
1112
+ allowed_labels [
1113
+ :subscription_tier,
1114
+ :user_role
1115
+ ]
1116
+
1117
+ # Relabeling rules
1118
+ relabel :http_status do |value|
1119
+ "#{value.to_i / 100}xx"
1120
+ end
1121
+
1122
+ relabel :path do |value|
1123
+ value.gsub(/\/\d+/, '/:id')
1124
+ end
1125
+
1126
+ # Monitoring
1127
+ monitoring do
1128
+ alert_on_new_label true
1129
+ alert_threshold 80 # Alert at 80% of limit
1130
+
1131
+ on_alert do |metric_name, label_name, cardinality, limit|
1132
+ Rails.logger.warn "Cardinality alert: #{metric_name}.#{label_name} = #{cardinality}/#{limit}"
1133
+ end
1134
+ end
1135
+ end
1136
+
1137
+ # ===== Advanced Features =====
1138
+
1139
+ # Exemplars (sample trace IDs for metric values)
1140
+ exemplars do
1141
+ enabled true
1142
+ max_per_bucket 1 # 1 trace_id per histogram bucket
1143
+ end
1144
+ end
1145
+ end
1146
+ ```
1147
+
1148
+ ---
1149
+
1150
+ ## 8. Performance
1151
+
1152
+ ### 8.1. Performance Requirements
1153
+
1154
+ | Operation | Target | Critical? |
1155
+ |-----------|--------|-----------|
1156
+ | **Pattern matching** | <0.01ms | ✅ Yes |
1157
+ | **Label extraction** | <0.05ms | ✅ Yes |
1158
+ | **Cardinality check** | <0.02ms | ✅ Yes |
1159
+ | **Yabeda update** | <0.02ms | ✅ Yes |
1160
+ | **Total overhead** | <0.1ms | ✅ Yes |
1161
+
1162
+ ### 8.2. Optimization Strategies
1163
+
1164
+ ```ruby
1165
+ module E11y
1166
+ module Metrics
1167
+ class MetricsRegistry
1168
+ def initialize
1169
+ # Pre-compile patterns for fast matching
1170
+ @compiled_patterns = {}
1171
+ @pattern_cache = {}
1172
+
1173
+ compile_patterns!
1174
+ end
1175
+
1176
+ def match(event_name)
1177
+ # Cache results
1178
+ @pattern_cache[event_name] ||= begin
1179
+ @compiled_patterns.select { |pattern, _|
1180
+ pattern.match?(event_name)
1181
+ }.values
1182
+ end
1183
+ end
1184
+
1185
+ private
1186
+
1187
+ def compile_patterns!
1188
+ # Compile once at boot
1189
+ Config.metrics.each do |metric_config|
1190
+ pattern = compile_pattern(metric_config.pattern)
1191
+ @compiled_patterns[pattern] = metric_config
1192
+ end
1193
+
1194
+ # Freeze for thread-safety
1195
+ @compiled_patterns.freeze
1196
+ end
1197
+ end
1198
+ end
1199
+ end
1200
+ ```
1201
+
1202
+ ### 8.3. Memory Footprint
1203
+
1204
+ ```
1205
+ Cardinality Tracker Memory:
1206
+
1207
+ Per metric: 100 labels × 100 values × 50 bytes = 500KB
1208
+ 10 metrics: 10 × 500KB = 5MB
1209
+
1210
+ Total: ~5MB for cardinality tracking
1211
+ ```
1212
+
1213
+ ---
1214
+
1215
+ ## 9. Testing
1216
+
1217
+ ### 9.1. Test Coverage Requirements
1218
+
1219
+ | Component | Coverage | Critical? |
1220
+ |-----------|----------|-----------|
1221
+ | **Pattern matching** | 100% | ✅ Yes |
1222
+ | **Label extraction** | 95% | ✅ Yes |
1223
+ | **Cardinality protection** | 100% | ✅ Yes |
1224
+ | **Yabeda integration** | 90% | ⚠️ Important |
1225
+
1226
+ ### 9.2. Test Examples
1227
+
1228
+ ```ruby
1229
+ RSpec.describe E11y::Metrics::CardinalityProtection do
1230
+ describe 'Layer 1: Denylist' do
1231
+ it 'drops forbidden labels' do
1232
+ protection = CardinalityProtection.new
1233
+
1234
+ result = protection.protect(:user_id, '123456')
1235
+
1236
+ expect(result).to be_nil
1237
+ expect(protection.metrics[:labels_dropped]).to eq(1)
1238
+ end
1239
+ end
1240
+
1241
+ describe 'Layer 3: Cardinality Limits' do
1242
+ it 'enforces per-metric limits' do
1243
+ protection = CardinalityProtection.new(
1244
+ limits: { orders_total: { status: 3 } }
1245
+ )
1246
+
1247
+ # Add 3 values (OK)
1248
+ protection.protect(:status, 'paid')
1249
+ protection.protect(:status, 'failed')
1250
+ protection.protect(:status, 'pending')
1251
+
1252
+ # 4th value (REJECTED)
1253
+ result = protection.protect(:status, 'cancelled')
1254
+
1255
+ expect(result).to be_nil
1256
+ expect(protection.cardinality(:orders_total, :status)).to eq(3)
1257
+ end
1258
+ end
1259
+ end
1260
+ ```
1261
+
1262
+ ---
1263
+
1264
+ ## 10. Trade-offs
1265
+
1266
+ ### 10.1. Key Decisions
1267
+
1268
+ | Decision | Pro | Con | Rationale |
1269
+ |----------|-----|-----|-----------|
1270
+ | **Auto-metrics** | Zero boilerplate | Less control | DX > control |
1271
+ | **Pattern matching** | Flexible | Slower than exact | Flexibility matters |
1272
+ | **4-layer defense** | Robust | Complex | Safety critical |
1273
+ | **Hash bucketing** | Preserves some signal | Loss of precision | Better than drop |
1274
+ | **Yabeda dependency** | Battle-tested | External dep | Standard in Ruby |
1275
+
1276
+ ### 10.2. Alternatives Considered
1277
+
1278
+ **A) Custom metrics backend**
1279
+ - ❌ Rejected: Reinventing wheel, Prometheus is standard
1280
+
1281
+ **B) No cardinality protection**
1282
+ - ❌ Rejected: Production disasters inevitable
1283
+
1284
+ **C) Manual metric definitions**
1285
+ - ❌ Rejected: Poor DX, error-prone
1286
+
1287
+ ---
1288
+
1289
+ ## 11. FAQ & Critical Clarifications
1290
+
1291
+ ### Q1: Does cardinality protection apply to all adapters?
1292
+
1293
+ **A: No, only to metrics (Yabeda/Prometheus).**
1294
+
1295
+ ```
1296
+ Event payload: { user_id: '123', status: 'paid', amount: 99.99 }
1297
+
1298
+ Metrics (Prometheus):
1299
+ orders_total{status="paid"} 1
1300
+ ❌ user_id dropped (cardinality protection)
1301
+
1302
+ Loki/Sentry/File:
1303
+ { user_id: '123', status: 'paid', amount: 99.99 }
1304
+ ✅ Full payload preserved
1305
+ ```
1306
+
1307
+ **Rationale:**
1308
+ - **Metrics:** Cardinality explosions break Prometheus (cost, performance)
1309
+ - **Logs:** High-cardinality fields are fine (indexed differently)
1310
+
1311
+ ---
1312
+
1313
+ ### Q2: Are Layers 1-4 applied simultaneously or sequentially?
1314
+
1315
+ **A: Sequentially (waterfall), not simultaneously.**
1316
+
1317
+ ```
1318
+ Processing order:
1319
+
1320
+ 1. Layer 1 (Denylist)
1321
+ ↓ If in denylist → DROP, stop
1322
+ ↓ If not in denylist → continue to Layer 2
1323
+
1324
+ 2. Layer 2 (Allowlist)
1325
+ ↓ If in allowlist → KEEP, skip Layer 3-4
1326
+ ↓ If not in allowlist → continue to Layer 3
1327
+
1328
+ 3. Layer 3 (Cardinality Limit)
1329
+ ↓ If under limit → KEEP, stop
1330
+ ↓ If over limit → continue to Layer 4
1331
+
1332
+ 4. Layer 4 (Dynamic Action)
1333
+ ↓ Apply configured action: hash/drop/aggregate/alert
1334
+ ```
1335
+
1336
+ **Example:**
1337
+
1338
+ ```ruby
1339
+ # Event: { user_id: '123', status: 'paid', tier: 'premium' }
1340
+
1341
+ # user_id:
1342
+ # Layer 1: in FORBIDDEN_LABELS → ❌ DROP (stop here)
1343
+
1344
+ # status:
1345
+ # Layer 1: not in FORBIDDEN_LABELS → continue
1346
+ # Layer 2: in SAFE_LABELS → ✅ KEEP (skip Layer 3-4)
1347
+
1348
+ # tier:
1349
+ # Layer 1: not in FORBIDDEN_LABELS → continue
1350
+ # Layer 2: not in SAFE_LABELS → continue
1351
+ # Layer 3: cardinality = 150 > limit (100) → continue
1352
+ # Layer 4: action=hash → ✅ KEEP as "bucket_7"
1353
+
1354
+ # Result: { status: 'paid', tier_bucket: 'bucket_7' }
1355
+ ```
1356
+
1357
+ ---
1358
+
1359
+ ### Q3: What should I do when cardinality limit is exceeded?
1360
+
1361
+ **A: Use relabeling if possible, otherwise drop the label.**
1362
+
1363
+ **Decision Matrix:**
1364
+
1365
+ | Scenario | Best Action | Rationale |
1366
+ |----------|-------------|-----------|
1367
+ | **No signal needed** | Drop | Simplest, lowest overhead |
1368
+ | **Clear categories** | Relabeling | Best signal preservation |
1369
+ | **Critical anomaly** | Alert | Operations team needs to know |
1370
+
1371
+ **Relabeling Examples:**
1372
+
1373
+ 1. **HTTP Status Codes:** 200 → `2xx` (reduces from ~50 to 5 values)
1374
+ 2. **Geographic Regions:** `us-east-1` → `us` (reduces from ~30 to 5 values)
1375
+ 3. **Paths:** `/users/123` → `/users/:id` (reduces from infinite to ~100 endpoints)
1376
+
1377
+ **When to Alert:**
1378
+
1379
+ - Unexpected new high-cardinality label appears
1380
+ - Known label suddenly exceeds limit (potential issue)
1381
+ - Critical metric affected
1382
+
1383
+ **Why No Hash Bucketing in v1.0:**
1384
+
1385
+ - ❌ Adds complexity without clear immediate value
1386
+ - ❌ Hard to explain and debug
1387
+ - ❌ Most cases are better solved by relabeling or full logging
1388
+ - ✅ Can be added in v1.1+ if real need emerges
1389
+
1390
+ ---
1391
+
1392
+ **Status:** ✅ Draft Complete with Critical Clarifications
1393
+ **Next:** ADR-004 (Adapters) or ADR-006 (Security)
1394
+ **Estimated Implementation:** 2 weeks
1395
+