e11y 0.2.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +130 -10
  3. data/CHANGELOG.md +80 -1
  4. data/CLAUDE.md +168 -0
  5. data/CONTRIBUTING.md +640 -0
  6. data/README.md +165 -701
  7. data/RELEASE.md +41 -12
  8. data/Rakefile +249 -57
  9. data/config/README.md +1 -1
  10. data/config/loki-local-config.yaml +12 -0
  11. data/config/otel-collector-config.yaml +44 -0
  12. data/cucumber.yml +1 -0
  13. data/docker-compose.yml +18 -2
  14. data/docs/ADAPTERS.md +76 -0
  15. data/docs/ADAPTIVE_SAMPLING.md +59 -0
  16. data/docs/COMPARISON.md +104 -0
  17. data/docs/CONFIGURATION.md +52 -0
  18. data/docs/DISTRIBUTED_TRACING.md +44 -0
  19. data/docs/LIMITATIONS.md +13 -0
  20. data/docs/METRICS_DSL.md +84 -0
  21. data/docs/PERFORMANCE.md +60 -0
  22. data/docs/PII_FILTERING.md +40 -0
  23. data/docs/PRESETS.md +65 -0
  24. data/docs/QUICK-START.md +546 -587
  25. data/docs/RAILS_INTEGRATION.md +79 -0
  26. data/docs/SCHEMA_VALIDATION.md +63 -0
  27. data/docs/SLO-PROMQL-ALERTS.md +161 -0
  28. data/docs/TESTING.md +69 -0
  29. data/docs/{ADR-001-architecture.md → architecture/ADR-001-architecture.md} +36 -65
  30. data/docs/{ADR-002-metrics-yabeda.md → architecture/ADR-002-metrics-yabeda.md} +62 -236
  31. data/docs/architecture/ADR-003-slo-observability.md +1402 -0
  32. data/docs/{ADR-004-adapter-architecture.md → architecture/ADR-004-adapter-architecture.md} +163 -146
  33. data/docs/{ADR-005-tracing-context.md → architecture/ADR-005-tracing-context.md} +10 -9
  34. data/docs/{ADR-006-security-compliance.md → architecture/ADR-006-security-compliance.md} +184 -191
  35. data/docs/{ADR-007-opentelemetry-integration.md → architecture/ADR-007-opentelemetry-integration.md} +3 -21
  36. data/docs/{ADR-008-rails-integration.md → architecture/ADR-008-rails-integration.md} +182 -743
  37. data/docs/{ADR-009-cost-optimization.md → architecture/ADR-009-cost-optimization.md} +45 -54
  38. data/docs/architecture/ADR-010-developer-experience.md +522 -0
  39. data/docs/{ADR-011-testing-strategy.md → architecture/ADR-011-testing-strategy.md} +44 -86
  40. data/docs/{ADR-012-event-evolution.md → architecture/ADR-012-event-evolution.md} +11 -11
  41. data/docs/{ADR-013-reliability-error-handling.md → architecture/ADR-013-reliability-error-handling.md} +37 -12
  42. data/docs/{ADR-014-event-driven-slo.md → architecture/ADR-014-event-driven-slo.md} +12 -24
  43. data/docs/{ADR-015-middleware-order.md → architecture/ADR-015-middleware-order.md} +43 -59
  44. data/docs/{ADR-016-self-monitoring-slo.md → architecture/ADR-016-self-monitoring-slo.md} +58 -355
  45. data/docs/{ADR-017-multi-rails-compatibility.md → architecture/ADR-017-multi-rails-compatibility.md} +4 -11
  46. data/docs/architecture/ADR-018-memory-optimization.md +366 -0
  47. data/docs/{ADR-INDEX.md → architecture/ADR-INDEX.md} +11 -6
  48. data/docs/plans/2026-03-20-browser-overlay-svelte.md +281 -0
  49. data/docs/{00-ICP-AND-TIMELINE.md → prd/00-ICP-AND-TIMELINE.md} +6 -6
  50. data/docs/{01-SCALE-REQUIREMENTS.md → prd/01-SCALE-REQUIREMENTS.md} +6 -6
  51. data/docs/prd/01-overview-vision.md +19 -14
  52. data/docs/use_cases/README.md +22 -23
  53. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +50 -44
  54. data/docs/use_cases/UC-002-business-event-tracking.md +26 -95
  55. data/docs/use_cases/UC-003-event-metrics.md +66 -0
  56. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +33 -684
  57. data/docs/use_cases/UC-005-sentry-integration.md +13 -15
  58. data/docs/use_cases/UC-006-trace-context-management.md +30 -28
  59. data/docs/use_cases/UC-007-pii-filtering.md +35 -87
  60. data/docs/use_cases/UC-008-opentelemetry-integration.md +51 -89
  61. data/docs/use_cases/UC-009-multi-service-tracing.md +30 -178
  62. data/docs/use_cases/UC-010-background-job-tracking.md +24 -91
  63. data/docs/use_cases/UC-011-rate-limiting.md +95 -168
  64. data/docs/use_cases/UC-012-audit-trail.md +21 -46
  65. data/docs/use_cases/UC-013-high-cardinality-protection.md +29 -167
  66. data/docs/use_cases/UC-014-adaptive-sampling.md +2 -2
  67. data/docs/use_cases/UC-015-cost-optimization.md +46 -99
  68. data/docs/use_cases/UC-016-rails-logger-migration.md +39 -213
  69. data/docs/use_cases/UC-017-local-development.md +203 -777
  70. data/docs/use_cases/UC-018-testing-events.md +3 -3
  71. data/docs/use_cases/UC-019-retention-based-routing.md +53 -106
  72. data/docs/use_cases/UC-020-event-versioning.md +8 -9
  73. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +18 -22
  74. data/docs/use_cases/UC-022-event-registry.md +15 -21
  75. data/docs/use_cases/backlog.md +119 -87
  76. data/e11y.gemspec +2 -2
  77. data/gems/e11y-devtools/README.md +158 -0
  78. data/gems/e11y-devtools/config/routes.rb +15 -0
  79. data/gems/e11y-devtools/e11y-devtools.gemspec +25 -0
  80. data/gems/e11y-devtools/exe/e11y +34 -0
  81. data/gems/e11y-devtools/frontend/.gitignore +24 -0
  82. data/gems/e11y-devtools/frontend/README.md +51 -0
  83. data/gems/e11y-devtools/frontend/index.html +14 -0
  84. data/gems/e11y-devtools/frontend/package-lock.json +3707 -0
  85. data/gems/e11y-devtools/frontend/package.json +28 -0
  86. data/gems/e11y-devtools/frontend/public/mocks/v1/events/recent.json +4205 -0
  87. data/gems/e11y-devtools/frontend/public/mocks/v1/interactions.json +194 -0
  88. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/0a2e04027cfa22d014bc22e8b27cd913/events.json +86 -0
  89. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/0e1543af6a630fb3af6b52283154b3e0/events.json +169 -0
  90. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/1838b691faa49564f97db8592ff3978d/events.json +78 -0
  91. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/29f198f6588dacffb687777eb5f8f118/events.json +197 -0
  92. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/34bc3c9c0097de28a7a6f99b90a8e7bc/events.json +194 -0
  93. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/3ba6c20d068ab9cee00e51b180e66444/events.json +184 -0
  94. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/435bfd8f17b9009146a79812d7c3726d/events.json +144 -0
  95. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/4c7676e3fe668e99edb2b94d7d5678a9/events.json +222 -0
  96. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/6daf0d47974bedfc55d5de7004a3ea9f/events.json +194 -0
  97. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8a81ada42834d15f287bb40010043605/events.json +194 -0
  98. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8c0a98900edaae105469df8daedccf02/events.json +198 -0
  99. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8e4f645180f8a7d1dce426b07380466b/events.json +222 -0
  100. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/93db346fa5d44a032605a13b627f4b80/events.json +128 -0
  101. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/98ff6146faf7bd9be8bd03a8275817ba/events.json +223 -0
  102. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/9997ddd0247bc7e25f2ca7a5c415c93d/events.json +197 -0
  103. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/99e35f8ef3baedd798cc4fd085980ad9/events.json +194 -0
  104. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/b4f3095c1909924cbc98889a86c83d6d/events.json +131 -0
  105. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/b54b7fc32b7575a7110de809d11ccda0/events.json +128 -0
  106. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/c0b48033fa06746bcc5886745e053cff/events.json +169 -0
  107. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/c44649ac76701b4558927cd2305ab535/events.json +169 -0
  108. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/d601ae3320057580a39dbdac2edfdf4a/events.json +248 -0
  109. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/e67e724bab422d2b52eeb49635e512e1/events.json +194 -0
  110. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/e6c72765a28f158a8485b35fa63f73da/events.json +194 -0
  111. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/f541b87405c9a54819b18ebe529f6419/events.json +194 -0
  112. data/gems/e11y-devtools/frontend/scripts/generate_mocks.rb +397 -0
  113. data/gems/e11y-devtools/frontend/src/App.svelte +827 -0
  114. data/gems/e11y-devtools/frontend/src/components/Fab.svelte +19 -0
  115. data/gems/e11y-devtools/frontend/src/components/FilterBar.svelte +38 -0
  116. data/gems/e11y-devtools/frontend/src/components/FullscreenPanel.svelte +82 -0
  117. data/gems/e11y-devtools/frontend/src/components/InteractionsTimeline.svelte +264 -0
  118. data/gems/e11y-devtools/frontend/src/components/RecentHistogram.svelte +354 -0
  119. data/gems/e11y-devtools/frontend/src/lib/api.ts +37 -0
  120. data/gems/e11y-devtools/frontend/src/lib/eventIdentity.ts +12 -0
  121. data/gems/e11y-devtools/frontend/src/lib/format.ts +37 -0
  122. data/gems/e11y-devtools/frontend/src/lib/listFilter.ts +43 -0
  123. data/gems/e11y-devtools/frontend/src/lib/recentVolume.ts +80 -0
  124. data/gems/e11y-devtools/frontend/src/lib/router.ts +12 -0
  125. data/gems/e11y-devtools/frontend/src/lib/transitions.ts +34 -0
  126. data/gems/e11y-devtools/frontend/src/lib/viewportOrigin.ts +25 -0
  127. data/gems/e11y-devtools/frontend/src/main.ts +8 -0
  128. data/gems/e11y-devtools/frontend/src/overlay-entry.ts +24 -0
  129. data/gems/e11y-devtools/frontend/src/overlay.css +1080 -0
  130. data/gems/e11y-devtools/frontend/svelte.config.js +2 -0
  131. data/gems/e11y-devtools/frontend/test_puppeteer.js +41 -0
  132. data/gems/e11y-devtools/frontend/test_scale.js +3 -0
  133. data/gems/e11y-devtools/frontend/tsconfig.app.json +21 -0
  134. data/gems/e11y-devtools/frontend/tsconfig.json +7 -0
  135. data/gems/e11y-devtools/frontend/tsconfig.node.json +26 -0
  136. data/gems/e11y-devtools/frontend/vite.config.ts +36 -0
  137. data/gems/e11y-devtools/lib/e11y/devtools/mcp/server.rb +96 -0
  138. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tool_base.rb +25 -0
  139. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/clear.rb +31 -0
  140. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/errors.rb +35 -0
  141. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/event_detail.rb +33 -0
  142. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/events_by_trace.rb +33 -0
  143. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/interactions.rb +40 -0
  144. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/recent_events.rb +34 -0
  145. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/search.rb +34 -0
  146. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/stats.rb +30 -0
  147. data/gems/e11y-devtools/lib/e11y/devtools/overlay/assets/overlay.js +20 -0
  148. data/gems/e11y-devtools/lib/e11y/devtools/overlay/controller.rb +94 -0
  149. data/gems/e11y-devtools/lib/e11y/devtools/overlay/engine.rb +26 -0
  150. data/gems/e11y-devtools/lib/e11y/devtools/overlay/middleware.rb +80 -0
  151. data/gems/e11y-devtools/lib/e11y/devtools/overlay/rails_controller.rb +67 -0
  152. data/gems/e11y-devtools/lib/e11y/devtools/tui/app.rb +262 -0
  153. data/gems/e11y-devtools/lib/e11y/devtools/tui/grouping.rb +66 -0
  154. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_detail.rb +62 -0
  155. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_list.rb +70 -0
  156. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/interaction_list.rb +47 -0
  157. data/gems/e11y-devtools/lib/e11y/devtools/version.rb +8 -0
  158. data/gems/e11y-devtools/lib/e11y/devtools.rb +13 -0
  159. data/gems/e11y-devtools/spec/e11y/devtools/mcp/tools_spec.rb +107 -0
  160. data/gems/e11y-devtools/spec/e11y/devtools/overlay/controller_spec.rb +91 -0
  161. data/gems/e11y-devtools/spec/e11y/devtools/overlay/middleware_spec.rb +46 -0
  162. data/gems/e11y-devtools/spec/e11y/devtools/tui/app_spec.rb +85 -0
  163. data/gems/e11y-devtools/spec/e11y/devtools/tui/grouping_spec.rb +64 -0
  164. data/gems/e11y-devtools/spec/spec_helper.rb +5 -0
  165. data/gems/e11y-devtools/spec/tui/widgets/event_list_spec.rb +44 -0
  166. data/gems/e11y-devtools/spec/tui/widgets/interaction_list_spec.rb +62 -0
  167. data/lib/e11y/adapters/audit_encrypted.rb +53 -11
  168. data/lib/e11y/adapters/base.rb +33 -34
  169. data/lib/e11y/adapters/dev_log/file_store.rb +143 -0
  170. data/lib/e11y/adapters/dev_log/query.rb +219 -0
  171. data/lib/e11y/adapters/dev_log.rb +118 -0
  172. data/lib/e11y/adapters/file.rb +3 -6
  173. data/lib/e11y/adapters/in_memory.rb +52 -5
  174. data/lib/e11y/adapters/in_memory_test.rb +29 -0
  175. data/lib/e11y/adapters/loki.rb +58 -23
  176. data/lib/e11y/adapters/null.rb +82 -0
  177. data/lib/e11y/adapters/opentelemetry_collector.rb +183 -0
  178. data/lib/e11y/adapters/otel_logs.rb +136 -23
  179. data/lib/e11y/adapters/sentry.rb +4 -7
  180. data/lib/e11y/adapters/stdout.rb +73 -7
  181. data/lib/e11y/adapters/yabeda.rb +153 -29
  182. data/lib/e11y/buffers/adaptive_buffer.rb +3 -17
  183. data/lib/e11y/buffers/{request_scoped_buffer.rb → ephemeral_buffer.rb} +72 -58
  184. data/lib/e11y/buffers/ring_buffer.rb +3 -16
  185. data/lib/e11y/configuration.rb +272 -0
  186. data/lib/e11y/console.rb +10 -17
  187. data/lib/e11y/current.rb +53 -1
  188. data/lib/e11y/debug/pipeline_inspector.rb +96 -0
  189. data/lib/e11y/documentation/generator.rb +48 -0
  190. data/lib/e11y/event/base.rb +176 -82
  191. data/lib/e11y/event/value_sampling_config.rb +1 -5
  192. data/lib/e11y/events/rails/database/query.rb +1 -4
  193. data/lib/e11y/events/rails/job/failed.rb +2 -0
  194. data/lib/e11y/instruments/active_job.rb +44 -12
  195. data/lib/e11y/instruments/rails_instrumentation.rb +49 -24
  196. data/lib/e11y/instruments/sidekiq.rb +135 -31
  197. data/lib/e11y/linters/base.rb +11 -0
  198. data/lib/e11y/linters/pii/pii_declaration_linter.rb +120 -0
  199. data/lib/e11y/linters/slo/config_consistency_linter.rb +76 -0
  200. data/lib/e11y/linters/slo/explicit_declaration_linter.rb +36 -0
  201. data/lib/e11y/linters/slo/slo_status_from_linter.rb +41 -0
  202. data/lib/e11y/logger/bridge.rb +26 -7
  203. data/lib/e11y/metrics/cardinality_protection.rb +10 -15
  204. data/lib/e11y/metrics/cardinality_tracker.rb +16 -6
  205. data/lib/e11y/metrics/registry.rb +3 -5
  206. data/lib/e11y/metrics/test_backend.rb +62 -0
  207. data/lib/e11y/metrics.rb +56 -10
  208. data/lib/e11y/middleware/adapter_resolver.rb +40 -0
  209. data/lib/e11y/middleware/audit_signing.rb +43 -6
  210. data/lib/e11y/middleware/baggage_protection.rb +75 -0
  211. data/lib/e11y/middleware/dev_log_source.rb +24 -0
  212. data/lib/e11y/middleware/event_slo.rb +23 -9
  213. data/lib/e11y/middleware/otel_span.rb +23 -0
  214. data/lib/e11y/middleware/pii_filter.rb +104 -75
  215. data/lib/e11y/middleware/rate_limiting.rb +54 -27
  216. data/lib/e11y/middleware/request.rb +70 -23
  217. data/lib/e11y/middleware/routing.rb +78 -21
  218. data/lib/e11y/middleware/sampling.rb +66 -17
  219. data/lib/e11y/middleware/self_monitoring_emit.rb +39 -0
  220. data/lib/e11y/middleware/trace_context.rb +45 -10
  221. data/lib/e11y/middleware/track_latency.rb +34 -0
  222. data/lib/e11y/middleware/validation.rb +7 -16
  223. data/lib/e11y/middleware/versioning.rb +26 -22
  224. data/lib/e11y/opentelemetry/semantic_conventions.rb +109 -0
  225. data/lib/e11y/opentelemetry/span_creator.rb +142 -0
  226. data/lib/e11y/pii/patterns.rb +12 -1
  227. data/lib/e11y/pipeline/builder.rb +4 -4
  228. data/lib/e11y/presets/audit_event.rb +13 -2
  229. data/lib/e11y/railtie.rb +52 -14
  230. data/lib/e11y/registry.rb +306 -0
  231. data/lib/e11y/reliability/circuit_breaker.rb +19 -21
  232. data/lib/e11y/reliability/dlq/base.rb +71 -0
  233. data/lib/e11y/reliability/dlq/file_adapter.rb +301 -0
  234. data/lib/e11y/reliability/dlq/file_storage.rb +63 -34
  235. data/lib/e11y/reliability/dlq/filter.rb +37 -54
  236. data/lib/e11y/reliability/retry_handler.rb +26 -29
  237. data/lib/e11y/reliability/retry_rate_limiter.rb +3 -11
  238. data/lib/e11y/sampling/error_spike_detector.rb +0 -2
  239. data/lib/e11y/sampling/load_monitor.rb +5 -9
  240. data/lib/e11y/sampling/stratified_tracker.rb +18 -0
  241. data/lib/e11y/self_monitoring/buffer_monitor.rb +2 -0
  242. data/lib/e11y/self_monitoring/performance_monitor.rb +19 -61
  243. data/lib/e11y/self_monitoring/reliability_monitor.rb +4 -74
  244. data/lib/e11y/slo/config_loader.rb +40 -0
  245. data/lib/e11y/slo/config_validator.rb +58 -0
  246. data/lib/e11y/slo/dashboard_generator.rb +122 -0
  247. data/lib/e11y/slo/event_driven.rb +8 -0
  248. data/lib/e11y/slo/tracker.rb +31 -4
  249. data/lib/e11y/testing/have_tracked_event_matcher.rb +190 -0
  250. data/lib/e11y/testing/rspec_matchers.rb +21 -0
  251. data/lib/e11y/testing/snapshot_matcher.rb +86 -0
  252. data/lib/e11y/trace_context/sampler.rb +35 -0
  253. data/lib/e11y/tracing/faraday_middleware.rb +31 -0
  254. data/lib/e11y/tracing/net_http_patch.rb +33 -0
  255. data/lib/e11y/tracing/propagator.rb +144 -0
  256. data/lib/e11y/tracing.rb +47 -0
  257. data/lib/e11y/version.rb +1 -1
  258. data/lib/e11y/versioning/version_extractor.rb +32 -0
  259. data/lib/e11y.rb +123 -266
  260. data/lib/generators/e11y/event/event_generator.rb +22 -0
  261. data/lib/generators/e11y/event/templates/event.rb.tt +16 -0
  262. data/lib/generators/e11y/grafana_dashboard/grafana_dashboard_generator.rb +30 -0
  263. data/lib/generators/e11y/grafana_dashboard/templates/e11y_dashboard.json +81 -0
  264. data/lib/generators/e11y/install/install_generator.rb +34 -0
  265. data/lib/generators/e11y/install/templates/e11y.rb +239 -0
  266. data/lib/generators/e11y/prometheus_alerts/prometheus_alerts_generator.rb +29 -0
  267. data/lib/generators/e11y/prometheus_alerts/templates/e11y_alerts.yml +28 -0
  268. data/lib/tasks/e11y_docs.rake +30 -0
  269. data/lib/tasks/e11y_events.rake +71 -0
  270. data/lib/tasks/e11y_lint.rake +91 -0
  271. data/lib/tasks/e11y_slo.rake +29 -0
  272. metadata +186 -39
  273. data/docs/ADR-003-slo-observability.md +0 -3337
  274. data/docs/ADR-010-developer-experience.md +0 -2166
  275. data/docs/API-REFERENCE-L28.md +0 -914
  276. data/docs/COMPREHENSIVE-CONFIGURATION.md +0 -2366
  277. data/docs/CONTRIBUTING.md +0 -312
  278. data/docs/IMPLEMENTATION_NOTES.md +0 -2804
  279. data/docs/IMPLEMENTATION_PLAN.md +0 -1971
  280. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +0 -586
  281. data/docs/PLAN.md +0 -148
  282. data/docs/README.md +0 -296
  283. data/docs/design/00-memory-optimization.md +0 -593
  284. data/docs/guides/MIGRATION-L27-L28.md +0 -692
  285. data/docs/guides/PERFORMANCE-BENCHMARKS.md +0 -434
  286. data/docs/guides/README.md +0 -44
  287. data/docs/use_cases/UC-003-pattern-based-metrics.md +0 -1627
  288. data/lib/e11y/adapters/registry.rb +0 -141
@@ -13,12 +13,10 @@
13
13
 
14
14
  **The $68,000/month mistake:**
15
15
  ```ruby
16
- # ❌ CATASTROPHIC: Using user_id as metric label
17
- E11y.configure do |config|
18
- config.metrics do
19
- counter_for pattern: 'user.action',
20
- name: 'user_actions_total',
21
- tags: [:user_id, :action_type] # ← 💸💸💸
16
+ # ❌ CATASTROPHIC: Using user_id as metric label (event-level example - avoid!)
17
+ class Events::UserAction < E11y::Event::Base
18
+ metrics do
19
+ counter :user_actions_total, tags: [:user_id, :action_type] # ← 💸💸💸 DON'T
22
20
  end
23
21
  end
24
22
 
@@ -35,42 +33,24 @@ end
35
33
  - **Query timeouts** (PromQL queries take 30+ seconds)
36
34
  - **Incident during Black Friday** (metrics system collapsed)
37
35
 
38
- ### E11y Solution
36
+ ### E11y Solution (Event-Level)
39
37
 
40
- **4-Layer Defense System + 99% Cost Reduction:**
38
+ **Use low-cardinality tags in event-level metrics:**
41
39
  ```ruby
42
- # ✅ SAFE: Aggregate user_id user_segment
43
- E11y.configure do |config|
44
- config.metrics do
45
- # Layer 1: Denylist (hard block)
46
- forbidden_labels :user_id, :order_id, :session_id, :trace_id
47
-
48
- # Layer 2: Safe aggregation
49
- counter_for pattern: 'user.action',
50
- name: 'user_actions_total',
51
- tags: [:user_segment, :action_type], # 3 segments × 10 actions = 30 series
52
- tag_extractors: {
53
- user_segment: ->(event) {
54
- user = User.find(event.payload[:user_id])
55
- user.segment # 'free', 'paid', 'enterprise'
56
- }
57
- }
58
-
59
- # Layer 3: Per-metric limits
60
- cardinality_limit_for 'user_actions_total', max: 100
61
-
62
- # Layer 4: Dynamic monitoring
63
- cardinality_monitoring do
64
- warn_threshold 0.7 # Alert at 70%
65
- auto_aggregate true # Auto-fix if exceeded
66
- end
40
+ # ✅ SAFE: Use user_segment, not user_id
41
+ class Events::UserAction < E11y::Event::Base
42
+ schema do
43
+ required(:user_id).filled(:string)
44
+ required(:action_type).filled(:string)
45
+ required(:user_segment).filled(:string) # pre-aggregated: 'free', 'paid', 'enterprise'
46
+ end
47
+
48
+ metrics do
49
+ counter :user_actions_total, tags: [:user_segment, :action_type] # 3 × 10 = 30 series
67
50
  end
68
51
  end
69
52
 
70
- # Result:
71
- # - 200 services × 10 segments × 5 dimensions = 10,000 series
72
- # - Datadog cost: $680/month
73
- # - Savings: $67,320/month (99% reduction) ✅
53
+ # Result: low cardinality, manageable cost
74
54
  ```
75
55
 
76
56
  ---
@@ -283,7 +263,7 @@ end
283
263
 
284
264
  ### Layer Processing Flow
285
265
 
286
- > **Implementation:** See [ADR-002 Section 4.1: Four-Layer Defense](../ADR-002-metrics-yabeda.md#41-four-layer-defense) for detailed architecture.
266
+ > **Implementation:** See [ADR-002 Section 4.1: Four-Layer Defense](../architecture/ADR-002-metrics-yabeda.md#41-four-layer-defense) for detailed architecture.
287
267
 
288
268
  **🔑 Critical: Layers execute SEQUENTIALLY (not simultaneously).**
289
269
 
@@ -399,14 +379,14 @@ Events::OrderPlaced.track(
399
379
  ### Layer 1: Denylist (Hard Block)
400
380
 
401
381
  > **⚠️ CRITICAL: Adapter-Specific Filtering**
402
- > **Implementation:** See [ADR-002 Section 4.2: Layer 1 - Universal Denylist](../ADR-002-metrics-yabeda.md#42-layer-1-universal-denylist) for detailed architecture.
382
+ > **Implementation:** See [ADR-002 Section 4.2: Layer 1 - Universal Denylist](../architecture/ADR-002-metrics-yabeda.md#42-layer-1-universal-denylist) for detailed architecture.
403
383
  >
404
384
  > **Cardinality protection (denylist/allowlist) applies ONLY to metrics adapters (Yabeda/Prometheus), NOT to other adapters:**
405
385
  >
406
386
  > | Adapter Type | Denylist Applied? | Why? |
407
387
  > |---|---|---|
408
388
  > | **Metrics (Yabeda/Prometheus)** | ✅ YES | High-cardinality labels cause memory explosion in time-series databases (1M labels = 1GB RAM). |
409
- > | **Logs (Loki)** | NO | Loki is designed for high-cardinality labels and uses different indexing strategy. Full payload preserved. |
389
+ > | **Logs (Loki)** | Optional | Loki labels = event_name + severity (low cardinality). Payload (user_id, etc.) in log line. Optional `enable_cardinality_protection` for labels. |
410
390
  > | **Errors (Sentry)** | ❌ NO | Sentry needs full context for debugging. High cardinality is acceptable for error tracking. |
411
391
  > | **Audit (File/PostgreSQL)** | ❌ NO | Audit trails require complete, unfiltered data for compliance. |
412
392
  >
@@ -428,85 +408,11 @@ Events::OrderPlaced.track(
428
408
  > - ✅ **Compliance stays intact:** Audit logs remain complete and unfiltered
429
409
  > - ✅ **Best of both worlds:** Safety for metrics + completeness for logs/errors
430
410
 
431
- **Universal denylist - NEVER use these as labels (for metrics adapters):**
432
-
433
- ```ruby
434
- E11y.configure do |config|
435
- config.metrics do
436
- # === UNBOUNDED IDENTIFIERS (FORBIDDEN) ===
437
- forbidden_labels :user_id, :customer_id, :account_id,
438
- :order_id, :transaction_id, :invoice_id,
439
- :session_id, :request_id, :trace_id, :span_id
440
-
441
- # === INFRASTRUCTURE (FORBIDDEN) ===
442
- forbidden_labels :pod_uid, :container_id, :instance_id,
443
- :node_name # If dynamic
444
-
445
- # === NETWORK/HTTP (FORBIDDEN) ===
446
- forbidden_labels :url, # With query strings
447
- :ip_address,
448
- :user_agent,
449
- :hostname # If ephemeral
450
-
451
- # === TIME-BASED (FORBIDDEN) ===
452
- forbidden_labels :timestamp, :created_at,
453
- :version # Patch-level: 2.5.7234
454
-
455
- # === ENFORCEMENT ===
456
- enforcement :strict # ERROR on forbidden label usage
457
- # OR
458
- enforcement :warn # Log warning but allow
459
- # OR
460
- enforcement :aggregate # Auto-aggregate to "_other"
461
- end
462
- end
463
-
464
- # Usage:
465
- counter_for pattern: 'user.action',
466
- tags: [:user_id] # ← ERROR: "user_id is forbidden!"
467
-
468
- # Development warning:
469
- # [E11y ERROR] Metric 'user.action_total' uses forbidden label 'user_id'
470
- # Cardinality explosion risk! Use 'user_segment' instead.
471
- ```
411
+ **Avoid these as metric tags:** user_id, customer_id, order_id, session_id, trace_id, url, ip_address, timestamp.
472
412
 
473
413
  ---
474
414
 
475
- ### Layer 2: Allowlist (Strict Mode)
476
-
477
- **Only allow explicitly safe labels:**
478
-
479
- ```ruby
480
- E11y.configure do |config|
481
- config.metrics do
482
- # Strict mode: ONLY these labels allowed
483
- allowed_labels_only true
484
-
485
- # === BUSINESS DIMENSIONS (< 50 values) ===
486
- allowed_labels :status, # pending, paid, failed (4-10 values)
487
- :payment_method, # card, paypal (5-20 values)
488
- :plan_tier # free, pro, enterprise (3-5 values)
489
-
490
- # === INFRASTRUCTURE (< 20 values) ===
491
- allowed_labels :env, # production, staging, dev (3 values)
492
- :region, # us-east, eu-west (5-20 values)
493
- :cluster, # main, backup (2-5 values)
494
- :availability_zone
495
-
496
- # === HTTP/SERVICE (< 100 values) ===
497
- allowed_labels :http_method, # GET, POST, PUT, DELETE (10 values)
498
- :http_status_code, # 200, 404, 500 (50 values)
499
- :controller_action # UsersController#show (20-100 values)
500
- end
501
- end
502
-
503
- # Usage:
504
- counter_for pattern: 'order.paid',
505
- tags: [:currency] # ← ERROR: "currency not in allowlist!"
506
-
507
- # Must explicitly allow:
508
- allowed_labels :currency # USD, EUR, GBP (3-20 values)
509
- ```
415
+ ### Layer 2: Safe Labels
510
416
 
511
417
  **Rule of thumb:**
512
418
  - ✅ **< 10 values** - Always safe
@@ -517,55 +423,11 @@ allowed_labels :currency # USD, EUR, GBP (3-20 values)
517
423
 
518
424
  ### Layer 3: Per-Metric Limits
519
425
 
520
- **Set cardinality limits per metric:**
521
-
522
- ```ruby
523
- E11y.configure do |config|
524
- config.metrics do
525
- # === GLOBAL DEFAULT ===
526
- default_cardinality_limit 1_000
527
-
528
- # === PER-METRIC LIMITS ===
529
- cardinality_limit_for 'http.requests' do
530
- max_cardinality 2_000 # Higher limit for this metric
531
- overflow_strategy :drop # → Drop overflow events
532
- overflow_sample_rate 0.1 # Sample 10% of overflow events
533
- end
534
-
535
- cardinality_limit_for 'user.actions' do
536
- max_cardinality 500 # Lower limit
537
- overflow_strategy :drop # Drop overflow events
538
- overflow_alert true # Alert on overflow
539
- end
540
-
541
- cardinality_limit_for 'orders.paid' do
542
- max_cardinality 100
543
- overflow_strategy :alert # Alert ops team + drop
544
- end
545
- end
546
- end
547
-
548
- # How it works:
549
- # 1. Track unique label combinations per metric
550
- # 2. If exceeds limit:
551
- # - :drop → Discard overflow events (increment drop counter)
552
- # - :alert → Alert ops team + drop
553
- #
554
- # NOTE: For aggregation/relabeling (e.g., user_id → user_segment),
555
- # use tag_extractors (see "Aggregation" section below),
556
- # NOT overflow_strategy.
557
- ```
558
-
559
- **Overflow strategies:**
560
-
561
- | Strategy | Behavior | Use Case |
562
- |----------|----------|----------|
563
- | `:drop` | Discard overflow events | Default, simplest |
564
- | `:alert` | Alert ops team + drop | Critical metrics |
426
+ **Yabeda adapter supports cardinality limits** via its config. Use low-cardinality tags in event-level metrics.
565
427
 
566
428
  #### Thread Safety
567
429
 
568
- > **Implementation:** See [ADR-002 Section 4.4: Layer 3 - Per-Metric Cardinality Limits](../ADR-002-metrics-yabeda.md#44-layer-3-per-metric-cardinality-limits) for detailed architecture.
430
+ > **Implementation:** See [ADR-002 Section 4.4: Layer 3 - Per-Metric Cardinality Limits](../architecture/ADR-002-metrics-yabeda.md#44-layer-3-per-metric-cardinality-limits) for detailed architecture.
569
431
  >
570
432
  > **Sources:**
571
433
  > - [Ruby Hash thread safety - Stack Overflow](https://stackoverflow.com/questions/22674498/thread-safety-for-hashes-in-ruby)
@@ -716,7 +578,7 @@ end
716
578
 
717
579
  #### Action Selection Guide
718
580
 
719
- > **Implementation:** See [ADR-002 Section 4.5: Layer 4 - Dynamic Actions](../ADR-002-metrics-yabeda.md#45-layer-4-dynamic-actions) for detailed architecture.
581
+ > **Implementation:** See [ADR-002 Section 4.5: Layer 4 - Dynamic Actions](../architecture/ADR-002-metrics-yabeda.md#45-layer-4-dynamic-actions) for detailed architecture.
720
582
 
721
583
  **🎯 When cardinality limit is exceeded, which action should you choose?**
722
584
 
@@ -862,7 +724,7 @@ rate(e11y_cardinality_actions_total{action="alert"}[5m])
862
724
 
863
725
  ### 1. Aggregation (Best ROI - 99% Reduction)
864
726
 
865
- > **Note:** This section describes **relabeling/normalization** (e.g., `user_id` → `user_segment`) via `tag_extractors`, which is different from `overflow_strategy`. Aggregation reduces cardinality **before** metrics are created, while overflow handling (`drop`/`alert`) deals with exceeding limits **after** creation. See [ADR-002 Section 4.5](../ADR-002-metrics-yabeda.md#45-cardinality-protection) for implementation details.
727
+ > **Note:** This section describes **relabeling/normalization** (e.g., `user_id` → `user_segment`) via `tag_extractors`, which is different from `overflow_strategy`. Aggregation reduces cardinality **before** metrics are created, while overflow handling (`drop`/`alert`) deals with exceeding limits **after** creation. See [ADR-002 Section 4.5](../architecture/ADR-002-metrics-yabeda.md#45-cardinality-protection) for implementation details.
866
728
 
867
729
  **Problem:** 1M users = 1M metric series
868
730
 
@@ -1039,7 +901,7 @@ end
1039
901
  ### 6. Universal Cardinality Protection (C04 Resolution) ⚠️ CRITICAL
1040
902
 
1041
903
  > **⚠️ CRITICAL: C04 Conflict Resolution - Cardinality Protection for ALL Backends**
1042
- > **See:** [ADR-009 Section 8](../ADR-009-cost-optimization.md#8-cardinality-protection-c04-resolution--critical) for detailed architecture and cost impact analysis.
904
+ > **See:** [ADR-009 Section 8](../architecture/ADR-009-cost-optimization.md#8-cardinality-protection-c04-resolution--critical) for detailed architecture and cost impact analysis.
1043
905
  > **Problem:** Original UC-013 cardinality protection applied ONLY to Yabeda/Prometheus metrics, but NOT to OpenTelemetry span attributes or Loki log labels. High-cardinality values (`user_id`, `order_id`) bypassed protection and caused cost explosions in OTLP backends (Datadog, Honeycomb).
1044
906
  > **Solution:** Universal `CardinalityFilter` middleware applies protection to **ALL backends** (Yabeda, OpenTelemetry, Loki) with optional per-backend overrides.
1045
907
 
@@ -1623,7 +1485,7 @@ after = calculate_cardinality_cost(
1623
1485
 
1624
1486
  ## ❓ Frequently Asked Questions
1625
1487
 
1626
- > **Technical Details:** See [ADR-002 Section 11: FAQ & Critical Clarifications](../ADR-002-metrics-yabeda.md#11-faq--critical-clarifications) for architectural rationale.
1488
+ > **Technical Details:** See [ADR-002 Section 11: FAQ & Critical Clarifications](../architecture/ADR-002-metrics-yabeda.md#11-faq--critical-clarifications) for architectural rationale.
1627
1489
 
1628
1490
  ### Q1: Does cardinality protection apply to all my logs and metrics?
1629
1491
 
@@ -2097,7 +1959,7 @@ end
2097
1959
 
2098
1960
  ## 📚 Related Use Cases
2099
1961
 
2100
- - **[UC-003: Pattern-Based Metrics](./UC-003-pattern-based-metrics.md)** - Auto-generate metrics
1962
+ - **[UC-003: Event Metrics](./UC-003-event-metrics.md)** - Metrics in event classes
2101
1963
  - **[UC-008: OpenTelemetry Integration](./UC-008-opentelemetry-integration.md)** - OTLP cardinality protection (C04)
2102
1964
  - **[UC-015: Cost Optimization](./UC-015-cost-optimization.md)** - Reduce observability costs
2103
1965
 
@@ -1026,7 +1026,7 @@ end
1026
1026
 
1027
1027
  ### Strategy 8: Stratified Sampling for Accurate SLO (C11 Resolution) ⚠️
1028
1028
 
1029
- > **Reference:** See [ADR-009 §3.7: Stratified Sampling for SLO Accuracy](../ADR-009-cost-optimization.md#37-stratified-sampling-for-slo-accuracy-c11-resolution) for full architecture and [UC-004: SLO Tracking with Sampling Correction](./UC-004-zero-config-slo-tracking.md#sampling-correction-for-accurate-slo-c11-resolution) for SLO calculation details.
1029
+ > **Reference:** See [ADR-009 §3.7: Stratified Sampling for SLO Accuracy](../architecture/ADR-009-cost-optimization.md#37-stratified-sampling-for-slo-accuracy-c11-resolution) for full architecture and [UC-004: SLO Tracking with Sampling Correction](./UC-004-zero-config-slo-tracking.md#sampling-correction-for-accurate-slo-c11-resolution) for SLO calculation details.
1030
1030
 
1031
1031
  **Problem with Random Sampling:** Breaks SLO metrics! Errors are rare (5%) → random 10% sampling drops 90% of errors → SLO appears better than reality.
1032
1032
 
@@ -1046,7 +1046,7 @@ E11y.configure do |config|
1046
1046
  stratified_rates do
1047
1047
  error 1.0 # 100% - Keep ALL errors (critical for SLO!)
1048
1048
  warn 0.5 # 50% - Medium priority
1049
- info 0.1 # 10% - Low priority (успешные запросы)
1049
+ info 0.1 # 10% - Low priority (successful requests)
1050
1050
  debug 0.05 # 5% - Very low priority
1051
1051
  end
1052
1052
  end
@@ -56,12 +56,7 @@ E11y.configure do |config|
56
56
  drop_empty_strings: true,
57
57
  truncate_strings: 1000 # chars
58
58
 
59
- # 5. Tiered storage (60% cheaper)
60
- retention_tiers do
61
- hot 7.days, storage: :loki # Fast queries
62
- warm 30.days, storage: :s3 # Slower, cheaper
63
- cold 1.year, storage: :s3_glacier # Archive
64
- end
59
+ # 5. Routing by retention_until — config.routing_rules (see Strategy 4)
65
60
 
66
61
  # 6. Smart routing (send only what's needed)
67
62
  routing do
@@ -79,13 +74,13 @@ end
79
74
  # Result:
80
75
  # - 100k events/sec → 10k events/sec (adaptive sampling)
81
76
  # - 2KB/event → 0.6KB/event (compression + minimization)
82
- # - 30 days hot storage 7 days hot + 23 days warm (tiered)
77
+ # - Short retentionstdout, long Loki (routing by retention_until)
83
78
  # - Datadog: Only errors (3k/sec instead of 100k/sec)
84
79
  #
85
80
  # New monthly cost:
86
81
  # - Datadog: $3,000 → $500 (only errors)
87
82
  # - Loki: $10,368 → $1,200 (10% volume, 70% smaller, 7 days hot)
88
- # - S3: $200 (warm storage)
83
+ # - Archival job (separate): exports by retention_until to cold storage
89
84
  # - Total: $1,900/month = $22,800/year
90
85
  #
91
86
  # SAVINGS: $160,416 - $22,800 = $137,616/year (86% reduction!)
@@ -95,7 +90,7 @@ end
95
90
 
96
91
  ## 🎯 Cost Optimization Strategies
97
92
 
98
- > **Note:** This UC focuses on proven, low-overhead optimizations. **Deduplication is intentionally NOT included** as a strategy. While it may seem like an obvious cost optimization, [ADR-009 Section 9.2.D](../ADR-009-cost-optimization.md#alternatives-considered) explains why it was rejected: high computational overhead (hash + Redis lookup per event), large memory cost (3.6GB for 1000 events/sec), false positives on legitimate retries, and debug confusion. Better alternatives (sampling + compression) achieve the same cost goals without these drawbacks.
93
+ > **Note:** This UC focuses on proven, low-overhead optimizations. **Deduplication is intentionally NOT included** as a strategy. While it may seem like an obvious cost optimization, [ADR-009 Section 9.2.D](../architecture/ADR-009-cost-optimization.md#alternatives-considered) explains why it was rejected: high computational overhead (hash + Redis lookup per event), large memory cost (3.6GB for 1000 events/sec), false positives on legitimate retries, and debug confusion. Better alternatives (sampling + compression) achieve the same cost goals without these drawbacks.
99
94
 
100
95
  ### Strategy 1: Intelligent Sampling by Value
101
96
 
@@ -254,57 +249,34 @@ end
254
249
 
255
250
  ---
256
251
 
257
- ### Strategy 4: Tiered Storage
252
+ ### Strategy 4: Routing by retention_until
258
253
 
259
- **Hot/warm/cold storage based on age:**
254
+ **Route events to adapters based on retention (at collection):**
260
255
  ```ruby
256
+ # Events declare retention_period; retention_until is auto-calculated in payload.
257
+ # Routing rules use it to choose adapter — short retention → cheap storage.
261
258
  E11y.configure do |config|
262
- config.cost_optimization do
263
- tiered_storage do
264
- # HOT: Fast queries, expensive ($0.20/GB/month)
265
- hot_tier do
266
- duration 7.days
267
- storage :loki # OR :elasticsearch
268
- query_performance :fast
269
- end
270
-
271
- # WARM: Slower queries, cheaper ($0.05/GB/month)
272
- warm_tier do
273
- duration 30.days
274
- storage :s3
275
- query_performance :medium
276
- compression :zstd # Compress when moving to warm
277
- end
278
-
279
- # COLD: Archive, very cheap ($0.004/GB/month)
280
- cold_tier do
281
- duration 1.year
282
- storage :s3_glacier
283
- query_performance :slow # Minutes to hours
284
- compression :zstd
285
- end
286
-
287
- # Auto-archival
288
- auto_archive enabled: true,
289
- schedule: '0 2 * * *' # 2 AM daily
290
- end
291
- end
259
+ config.routing_rules = [
260
+ ->(event) { :audit_encrypted if event[:audit_event] },
261
+ ->(event) {
262
+ return :loki unless event[:retention_until]
263
+ days = (Time.parse(event[:retention_until]) - Time.now) / 86400
264
+ days <= 7 ? :stdout : :loki # Short → free, long → Loki
265
+ }
266
+ ]
267
+ config.fallback_adapters = [:loki]
292
268
  end
293
269
 
294
- # Cost comparison (per 1TB):
295
- # Hot (Loki): $0.20/GB × 1000 = $200/month
296
- # Warm (S3): $0.05/GB × 1000 = $50/month
297
- # Cold (Glacier): $0.004/GB × 1000 = $4/month
298
- #
299
- # Strategy:
300
- # - 7 days hot (for active debugging)
301
- # - 30 days warm (for recent lookups)
302
- # - 1 year cold (for compliance)
303
- #
304
- # Cost for 30 days of data:
305
- # Before: 30 days × $200 = $6,000/month
306
- # After: (7 × $200) + (23 × $50) + (0 × $4) = $1,400 + $1,150 = $2,550/month
307
- # Savings: $3,450/month (58% reduction!)
270
+ # Event classes declare retention:
271
+ # class DebugEvent < E11y::Event::Base
272
+ # retention_period 7.days # stdout
273
+ # end
274
+ # class AuditEvent < E11y::Event::Base
275
+ # retention_period 7.years # → Loki (archival job exports later)
276
+ # end
277
+
278
+ # Cost: Short retention events never hit Loki. Archival job (separate) filters by
279
+ # retention_until for cold storage. Savings: ~58% vs all-events-to-Loki.
308
280
  ```
309
281
 
310
282
  ---
@@ -324,11 +296,11 @@ E11y.configure do |config|
324
296
  # High-value transactions → All (audit + analytics)
325
297
  route event_patterns: ['payment.*', 'order.*'],
326
298
  when: ->(e) { e.payload[:amount].to_i > 1000 },
327
- to: [:datadog, :loki, :s3_archive]
299
+ to: [:datadog, :loki]
328
300
 
329
301
  # Security events → Specific SIEM
330
302
  route event_patterns: ['security.*', 'audit.*'],
331
- to: [:splunk, :s3_archive]
303
+ to: [:splunk]
332
304
 
333
305
  # Debug events → Only Loki (no expensive Datadog)
334
306
  route severities: [:debug],
@@ -356,50 +328,25 @@ end
356
328
 
357
329
  ---
358
330
 
359
- ### Strategy 6: Retention-Aware Tagging
331
+ ### Strategy 6: retention_period DSL
360
332
 
361
- **Tag events with retention requirements:**
333
+ **Declare retention per event; routing uses retention_until:**
362
334
  ```ruby
363
- E11y.configure do |config|
364
- config.cost_optimization do
365
- retention_aware_tagging do
366
- # Auto-tag events with retention hints
367
- tag_with_retention do
368
- # Compliance events: Long retention
369
- when_pattern 'audit.*', 'gdpr.*', retention: 7.years
370
-
371
- # Financial: Long retention
372
- when_pattern 'payment.*', 'transaction.*', retention: 7.years
373
-
374
- # Errors: Medium retention
375
- when_severity :error, :fatal, retention: 90.days
376
-
377
- # Debug: Short retention
378
- when_severity :debug, retention: 7.days
379
-
380
- # Default
381
- default_retention 30.days
382
- end
383
-
384
- # Backend respects retention tags
385
- backends do
386
- loki retention_based: true,
387
- max_retention: 30.days
388
-
389
- s3_archive retention_based: true,
390
- max_retention: 7.years
391
- end
392
- end
393
- end
335
+ # Event-level retention (used by routing_rules + archival job)
336
+ class DebugEvent < E11y::Event::Base
337
+ retention_period 7.days
394
338
  end
395
339
 
396
- # Result:
397
- # - Debug events: 7 days in Loki (cheap)
398
- # - Errors: 90 days in Loki
399
- # - Compliance: 7 years in S3 Glacier (very cheap)
400
- # - Default: 30 days in Loki
401
- #
402
- # Cost optimization: Store data only as long as needed!
340
+ class PaymentEvent < E11y::Event::Base
341
+ retention_period 7.years
342
+ end
343
+
344
+ class OrderEvent < E11y::Event::Base
345
+ # Uses config.default_retention_period (30 days)
346
+ end
347
+
348
+ # retention_until is auto-calculated in payload. Routing (Strategy 4) and
349
+ # archival job both use it. No separate tagging — one field, two consumers.
403
350
  ```
404
351
 
405
352
  ---
@@ -641,7 +588,7 @@ end
641
588
  config.cost_optimization do
642
589
  intelligent_sampling { ... } # 90% reduction
643
590
  compression { ... } # 70% smaller payloads
644
- tiered_storage { ... } # 60% cheaper storage
591
+ routing_rules (by retention_until) # Short stdout, long → Loki
645
592
  smart_routing { ... } # 50% fewer expensive destinations
646
593
  end
647
594
  # Combined: ~95% cost reduction!
@@ -653,7 +600,7 @@ end
653
600
  # Dashboard: "Cost Optimization Savings"
654
601
  # - Monthly savings: $X
655
602
  # - YTD savings: $Y
656
- # - Optimization breakdown (sampling, compression, tiered storage)
603
+ # - Optimization breakdown (sampling, compression, routing)
657
604
  ```
658
605
 
659
606
  **3. Test in staging first**