e11y 0.2.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +130 -10
  3. data/CHANGELOG.md +80 -1
  4. data/CLAUDE.md +168 -0
  5. data/CONTRIBUTING.md +640 -0
  6. data/README.md +165 -701
  7. data/RELEASE.md +41 -12
  8. data/Rakefile +249 -57
  9. data/config/README.md +1 -1
  10. data/config/loki-local-config.yaml +12 -0
  11. data/config/otel-collector-config.yaml +44 -0
  12. data/cucumber.yml +1 -0
  13. data/docker-compose.yml +18 -2
  14. data/docs/ADAPTERS.md +76 -0
  15. data/docs/ADAPTIVE_SAMPLING.md +59 -0
  16. data/docs/COMPARISON.md +104 -0
  17. data/docs/CONFIGURATION.md +52 -0
  18. data/docs/DISTRIBUTED_TRACING.md +44 -0
  19. data/docs/LIMITATIONS.md +13 -0
  20. data/docs/METRICS_DSL.md +84 -0
  21. data/docs/PERFORMANCE.md +60 -0
  22. data/docs/PII_FILTERING.md +40 -0
  23. data/docs/PRESETS.md +65 -0
  24. data/docs/QUICK-START.md +546 -587
  25. data/docs/RAILS_INTEGRATION.md +79 -0
  26. data/docs/SCHEMA_VALIDATION.md +63 -0
  27. data/docs/SLO-PROMQL-ALERTS.md +161 -0
  28. data/docs/TESTING.md +69 -0
  29. data/docs/{ADR-001-architecture.md → architecture/ADR-001-architecture.md} +36 -65
  30. data/docs/{ADR-002-metrics-yabeda.md → architecture/ADR-002-metrics-yabeda.md} +62 -236
  31. data/docs/architecture/ADR-003-slo-observability.md +1402 -0
  32. data/docs/{ADR-004-adapter-architecture.md → architecture/ADR-004-adapter-architecture.md} +163 -146
  33. data/docs/{ADR-005-tracing-context.md → architecture/ADR-005-tracing-context.md} +10 -9
  34. data/docs/{ADR-006-security-compliance.md → architecture/ADR-006-security-compliance.md} +184 -191
  35. data/docs/{ADR-007-opentelemetry-integration.md → architecture/ADR-007-opentelemetry-integration.md} +3 -21
  36. data/docs/{ADR-008-rails-integration.md → architecture/ADR-008-rails-integration.md} +182 -743
  37. data/docs/{ADR-009-cost-optimization.md → architecture/ADR-009-cost-optimization.md} +45 -54
  38. data/docs/architecture/ADR-010-developer-experience.md +522 -0
  39. data/docs/{ADR-011-testing-strategy.md → architecture/ADR-011-testing-strategy.md} +44 -86
  40. data/docs/{ADR-012-event-evolution.md → architecture/ADR-012-event-evolution.md} +11 -11
  41. data/docs/{ADR-013-reliability-error-handling.md → architecture/ADR-013-reliability-error-handling.md} +37 -12
  42. data/docs/{ADR-014-event-driven-slo.md → architecture/ADR-014-event-driven-slo.md} +12 -24
  43. data/docs/{ADR-015-middleware-order.md → architecture/ADR-015-middleware-order.md} +43 -59
  44. data/docs/{ADR-016-self-monitoring-slo.md → architecture/ADR-016-self-monitoring-slo.md} +58 -355
  45. data/docs/{ADR-017-multi-rails-compatibility.md → architecture/ADR-017-multi-rails-compatibility.md} +4 -11
  46. data/docs/architecture/ADR-018-memory-optimization.md +366 -0
  47. data/docs/{ADR-INDEX.md → architecture/ADR-INDEX.md} +11 -6
  48. data/docs/plans/2026-03-20-browser-overlay-svelte.md +281 -0
  49. data/docs/{00-ICP-AND-TIMELINE.md → prd/00-ICP-AND-TIMELINE.md} +6 -6
  50. data/docs/{01-SCALE-REQUIREMENTS.md → prd/01-SCALE-REQUIREMENTS.md} +6 -6
  51. data/docs/prd/01-overview-vision.md +19 -14
  52. data/docs/use_cases/README.md +22 -23
  53. data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +50 -44
  54. data/docs/use_cases/UC-002-business-event-tracking.md +26 -95
  55. data/docs/use_cases/UC-003-event-metrics.md +66 -0
  56. data/docs/use_cases/UC-004-zero-config-slo-tracking.md +33 -684
  57. data/docs/use_cases/UC-005-sentry-integration.md +13 -15
  58. data/docs/use_cases/UC-006-trace-context-management.md +30 -28
  59. data/docs/use_cases/UC-007-pii-filtering.md +35 -87
  60. data/docs/use_cases/UC-008-opentelemetry-integration.md +51 -89
  61. data/docs/use_cases/UC-009-multi-service-tracing.md +30 -178
  62. data/docs/use_cases/UC-010-background-job-tracking.md +24 -91
  63. data/docs/use_cases/UC-011-rate-limiting.md +95 -168
  64. data/docs/use_cases/UC-012-audit-trail.md +21 -46
  65. data/docs/use_cases/UC-013-high-cardinality-protection.md +29 -167
  66. data/docs/use_cases/UC-014-adaptive-sampling.md +2 -2
  67. data/docs/use_cases/UC-015-cost-optimization.md +46 -99
  68. data/docs/use_cases/UC-016-rails-logger-migration.md +39 -213
  69. data/docs/use_cases/UC-017-local-development.md +203 -777
  70. data/docs/use_cases/UC-018-testing-events.md +3 -3
  71. data/docs/use_cases/UC-019-retention-based-routing.md +53 -106
  72. data/docs/use_cases/UC-020-event-versioning.md +8 -9
  73. data/docs/use_cases/UC-021-error-handling-retry-dlq.md +18 -22
  74. data/docs/use_cases/UC-022-event-registry.md +15 -21
  75. data/docs/use_cases/backlog.md +119 -87
  76. data/e11y.gemspec +2 -2
  77. data/gems/e11y-devtools/README.md +158 -0
  78. data/gems/e11y-devtools/config/routes.rb +15 -0
  79. data/gems/e11y-devtools/e11y-devtools.gemspec +25 -0
  80. data/gems/e11y-devtools/exe/e11y +34 -0
  81. data/gems/e11y-devtools/frontend/.gitignore +24 -0
  82. data/gems/e11y-devtools/frontend/README.md +51 -0
  83. data/gems/e11y-devtools/frontend/index.html +14 -0
  84. data/gems/e11y-devtools/frontend/package-lock.json +3707 -0
  85. data/gems/e11y-devtools/frontend/package.json +28 -0
  86. data/gems/e11y-devtools/frontend/public/mocks/v1/events/recent.json +4205 -0
  87. data/gems/e11y-devtools/frontend/public/mocks/v1/interactions.json +194 -0
  88. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/0a2e04027cfa22d014bc22e8b27cd913/events.json +86 -0
  89. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/0e1543af6a630fb3af6b52283154b3e0/events.json +169 -0
  90. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/1838b691faa49564f97db8592ff3978d/events.json +78 -0
  91. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/29f198f6588dacffb687777eb5f8f118/events.json +197 -0
  92. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/34bc3c9c0097de28a7a6f99b90a8e7bc/events.json +194 -0
  93. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/3ba6c20d068ab9cee00e51b180e66444/events.json +184 -0
  94. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/435bfd8f17b9009146a79812d7c3726d/events.json +144 -0
  95. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/4c7676e3fe668e99edb2b94d7d5678a9/events.json +222 -0
  96. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/6daf0d47974bedfc55d5de7004a3ea9f/events.json +194 -0
  97. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8a81ada42834d15f287bb40010043605/events.json +194 -0
  98. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8c0a98900edaae105469df8daedccf02/events.json +198 -0
  99. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8e4f645180f8a7d1dce426b07380466b/events.json +222 -0
  100. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/93db346fa5d44a032605a13b627f4b80/events.json +128 -0
  101. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/98ff6146faf7bd9be8bd03a8275817ba/events.json +223 -0
  102. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/9997ddd0247bc7e25f2ca7a5c415c93d/events.json +197 -0
  103. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/99e35f8ef3baedd798cc4fd085980ad9/events.json +194 -0
  104. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/b4f3095c1909924cbc98889a86c83d6d/events.json +131 -0
  105. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/b54b7fc32b7575a7110de809d11ccda0/events.json +128 -0
  106. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/c0b48033fa06746bcc5886745e053cff/events.json +169 -0
  107. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/c44649ac76701b4558927cd2305ab535/events.json +169 -0
  108. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/d601ae3320057580a39dbdac2edfdf4a/events.json +248 -0
  109. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/e67e724bab422d2b52eeb49635e512e1/events.json +194 -0
  110. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/e6c72765a28f158a8485b35fa63f73da/events.json +194 -0
  111. data/gems/e11y-devtools/frontend/public/mocks/v1/traces/f541b87405c9a54819b18ebe529f6419/events.json +194 -0
  112. data/gems/e11y-devtools/frontend/scripts/generate_mocks.rb +397 -0
  113. data/gems/e11y-devtools/frontend/src/App.svelte +827 -0
  114. data/gems/e11y-devtools/frontend/src/components/Fab.svelte +19 -0
  115. data/gems/e11y-devtools/frontend/src/components/FilterBar.svelte +38 -0
  116. data/gems/e11y-devtools/frontend/src/components/FullscreenPanel.svelte +82 -0
  117. data/gems/e11y-devtools/frontend/src/components/InteractionsTimeline.svelte +264 -0
  118. data/gems/e11y-devtools/frontend/src/components/RecentHistogram.svelte +354 -0
  119. data/gems/e11y-devtools/frontend/src/lib/api.ts +37 -0
  120. data/gems/e11y-devtools/frontend/src/lib/eventIdentity.ts +12 -0
  121. data/gems/e11y-devtools/frontend/src/lib/format.ts +37 -0
  122. data/gems/e11y-devtools/frontend/src/lib/listFilter.ts +43 -0
  123. data/gems/e11y-devtools/frontend/src/lib/recentVolume.ts +80 -0
  124. data/gems/e11y-devtools/frontend/src/lib/router.ts +12 -0
  125. data/gems/e11y-devtools/frontend/src/lib/transitions.ts +34 -0
  126. data/gems/e11y-devtools/frontend/src/lib/viewportOrigin.ts +25 -0
  127. data/gems/e11y-devtools/frontend/src/main.ts +8 -0
  128. data/gems/e11y-devtools/frontend/src/overlay-entry.ts +24 -0
  129. data/gems/e11y-devtools/frontend/src/overlay.css +1080 -0
  130. data/gems/e11y-devtools/frontend/svelte.config.js +2 -0
  131. data/gems/e11y-devtools/frontend/test_puppeteer.js +41 -0
  132. data/gems/e11y-devtools/frontend/test_scale.js +3 -0
  133. data/gems/e11y-devtools/frontend/tsconfig.app.json +21 -0
  134. data/gems/e11y-devtools/frontend/tsconfig.json +7 -0
  135. data/gems/e11y-devtools/frontend/tsconfig.node.json +26 -0
  136. data/gems/e11y-devtools/frontend/vite.config.ts +36 -0
  137. data/gems/e11y-devtools/lib/e11y/devtools/mcp/server.rb +96 -0
  138. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tool_base.rb +25 -0
  139. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/clear.rb +31 -0
  140. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/errors.rb +35 -0
  141. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/event_detail.rb +33 -0
  142. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/events_by_trace.rb +33 -0
  143. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/interactions.rb +40 -0
  144. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/recent_events.rb +34 -0
  145. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/search.rb +34 -0
  146. data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/stats.rb +30 -0
  147. data/gems/e11y-devtools/lib/e11y/devtools/overlay/assets/overlay.js +20 -0
  148. data/gems/e11y-devtools/lib/e11y/devtools/overlay/controller.rb +94 -0
  149. data/gems/e11y-devtools/lib/e11y/devtools/overlay/engine.rb +26 -0
  150. data/gems/e11y-devtools/lib/e11y/devtools/overlay/middleware.rb +80 -0
  151. data/gems/e11y-devtools/lib/e11y/devtools/overlay/rails_controller.rb +67 -0
  152. data/gems/e11y-devtools/lib/e11y/devtools/tui/app.rb +262 -0
  153. data/gems/e11y-devtools/lib/e11y/devtools/tui/grouping.rb +66 -0
  154. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_detail.rb +62 -0
  155. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_list.rb +70 -0
  156. data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/interaction_list.rb +47 -0
  157. data/gems/e11y-devtools/lib/e11y/devtools/version.rb +8 -0
  158. data/gems/e11y-devtools/lib/e11y/devtools.rb +13 -0
  159. data/gems/e11y-devtools/spec/e11y/devtools/mcp/tools_spec.rb +107 -0
  160. data/gems/e11y-devtools/spec/e11y/devtools/overlay/controller_spec.rb +91 -0
  161. data/gems/e11y-devtools/spec/e11y/devtools/overlay/middleware_spec.rb +46 -0
  162. data/gems/e11y-devtools/spec/e11y/devtools/tui/app_spec.rb +85 -0
  163. data/gems/e11y-devtools/spec/e11y/devtools/tui/grouping_spec.rb +64 -0
  164. data/gems/e11y-devtools/spec/spec_helper.rb +5 -0
  165. data/gems/e11y-devtools/spec/tui/widgets/event_list_spec.rb +44 -0
  166. data/gems/e11y-devtools/spec/tui/widgets/interaction_list_spec.rb +62 -0
  167. data/lib/e11y/adapters/audit_encrypted.rb +53 -11
  168. data/lib/e11y/adapters/base.rb +33 -34
  169. data/lib/e11y/adapters/dev_log/file_store.rb +143 -0
  170. data/lib/e11y/adapters/dev_log/query.rb +219 -0
  171. data/lib/e11y/adapters/dev_log.rb +118 -0
  172. data/lib/e11y/adapters/file.rb +3 -6
  173. data/lib/e11y/adapters/in_memory.rb +52 -5
  174. data/lib/e11y/adapters/in_memory_test.rb +29 -0
  175. data/lib/e11y/adapters/loki.rb +58 -23
  176. data/lib/e11y/adapters/null.rb +82 -0
  177. data/lib/e11y/adapters/opentelemetry_collector.rb +183 -0
  178. data/lib/e11y/adapters/otel_logs.rb +136 -23
  179. data/lib/e11y/adapters/sentry.rb +4 -7
  180. data/lib/e11y/adapters/stdout.rb +73 -7
  181. data/lib/e11y/adapters/yabeda.rb +153 -29
  182. data/lib/e11y/buffers/adaptive_buffer.rb +3 -17
  183. data/lib/e11y/buffers/{request_scoped_buffer.rb → ephemeral_buffer.rb} +72 -58
  184. data/lib/e11y/buffers/ring_buffer.rb +3 -16
  185. data/lib/e11y/configuration.rb +272 -0
  186. data/lib/e11y/console.rb +10 -17
  187. data/lib/e11y/current.rb +53 -1
  188. data/lib/e11y/debug/pipeline_inspector.rb +96 -0
  189. data/lib/e11y/documentation/generator.rb +48 -0
  190. data/lib/e11y/event/base.rb +176 -82
  191. data/lib/e11y/event/value_sampling_config.rb +1 -5
  192. data/lib/e11y/events/rails/database/query.rb +1 -4
  193. data/lib/e11y/events/rails/job/failed.rb +2 -0
  194. data/lib/e11y/instruments/active_job.rb +44 -12
  195. data/lib/e11y/instruments/rails_instrumentation.rb +49 -24
  196. data/lib/e11y/instruments/sidekiq.rb +135 -31
  197. data/lib/e11y/linters/base.rb +11 -0
  198. data/lib/e11y/linters/pii/pii_declaration_linter.rb +120 -0
  199. data/lib/e11y/linters/slo/config_consistency_linter.rb +76 -0
  200. data/lib/e11y/linters/slo/explicit_declaration_linter.rb +36 -0
  201. data/lib/e11y/linters/slo/slo_status_from_linter.rb +41 -0
  202. data/lib/e11y/logger/bridge.rb +26 -7
  203. data/lib/e11y/metrics/cardinality_protection.rb +10 -15
  204. data/lib/e11y/metrics/cardinality_tracker.rb +16 -6
  205. data/lib/e11y/metrics/registry.rb +3 -5
  206. data/lib/e11y/metrics/test_backend.rb +62 -0
  207. data/lib/e11y/metrics.rb +56 -10
  208. data/lib/e11y/middleware/adapter_resolver.rb +40 -0
  209. data/lib/e11y/middleware/audit_signing.rb +43 -6
  210. data/lib/e11y/middleware/baggage_protection.rb +75 -0
  211. data/lib/e11y/middleware/dev_log_source.rb +24 -0
  212. data/lib/e11y/middleware/event_slo.rb +23 -9
  213. data/lib/e11y/middleware/otel_span.rb +23 -0
  214. data/lib/e11y/middleware/pii_filter.rb +104 -75
  215. data/lib/e11y/middleware/rate_limiting.rb +54 -27
  216. data/lib/e11y/middleware/request.rb +70 -23
  217. data/lib/e11y/middleware/routing.rb +78 -21
  218. data/lib/e11y/middleware/sampling.rb +66 -17
  219. data/lib/e11y/middleware/self_monitoring_emit.rb +39 -0
  220. data/lib/e11y/middleware/trace_context.rb +45 -10
  221. data/lib/e11y/middleware/track_latency.rb +34 -0
  222. data/lib/e11y/middleware/validation.rb +7 -16
  223. data/lib/e11y/middleware/versioning.rb +26 -22
  224. data/lib/e11y/opentelemetry/semantic_conventions.rb +109 -0
  225. data/lib/e11y/opentelemetry/span_creator.rb +142 -0
  226. data/lib/e11y/pii/patterns.rb +12 -1
  227. data/lib/e11y/pipeline/builder.rb +4 -4
  228. data/lib/e11y/presets/audit_event.rb +13 -2
  229. data/lib/e11y/railtie.rb +52 -14
  230. data/lib/e11y/registry.rb +306 -0
  231. data/lib/e11y/reliability/circuit_breaker.rb +19 -21
  232. data/lib/e11y/reliability/dlq/base.rb +71 -0
  233. data/lib/e11y/reliability/dlq/file_adapter.rb +301 -0
  234. data/lib/e11y/reliability/dlq/file_storage.rb +63 -34
  235. data/lib/e11y/reliability/dlq/filter.rb +37 -54
  236. data/lib/e11y/reliability/retry_handler.rb +26 -29
  237. data/lib/e11y/reliability/retry_rate_limiter.rb +3 -11
  238. data/lib/e11y/sampling/error_spike_detector.rb +0 -2
  239. data/lib/e11y/sampling/load_monitor.rb +5 -9
  240. data/lib/e11y/sampling/stratified_tracker.rb +18 -0
  241. data/lib/e11y/self_monitoring/buffer_monitor.rb +2 -0
  242. data/lib/e11y/self_monitoring/performance_monitor.rb +19 -61
  243. data/lib/e11y/self_monitoring/reliability_monitor.rb +4 -74
  244. data/lib/e11y/slo/config_loader.rb +40 -0
  245. data/lib/e11y/slo/config_validator.rb +58 -0
  246. data/lib/e11y/slo/dashboard_generator.rb +122 -0
  247. data/lib/e11y/slo/event_driven.rb +8 -0
  248. data/lib/e11y/slo/tracker.rb +31 -4
  249. data/lib/e11y/testing/have_tracked_event_matcher.rb +190 -0
  250. data/lib/e11y/testing/rspec_matchers.rb +21 -0
  251. data/lib/e11y/testing/snapshot_matcher.rb +86 -0
  252. data/lib/e11y/trace_context/sampler.rb +35 -0
  253. data/lib/e11y/tracing/faraday_middleware.rb +31 -0
  254. data/lib/e11y/tracing/net_http_patch.rb +33 -0
  255. data/lib/e11y/tracing/propagator.rb +144 -0
  256. data/lib/e11y/tracing.rb +47 -0
  257. data/lib/e11y/version.rb +1 -1
  258. data/lib/e11y/versioning/version_extractor.rb +32 -0
  259. data/lib/e11y.rb +123 -266
  260. data/lib/generators/e11y/event/event_generator.rb +22 -0
  261. data/lib/generators/e11y/event/templates/event.rb.tt +16 -0
  262. data/lib/generators/e11y/grafana_dashboard/grafana_dashboard_generator.rb +30 -0
  263. data/lib/generators/e11y/grafana_dashboard/templates/e11y_dashboard.json +81 -0
  264. data/lib/generators/e11y/install/install_generator.rb +34 -0
  265. data/lib/generators/e11y/install/templates/e11y.rb +239 -0
  266. data/lib/generators/e11y/prometheus_alerts/prometheus_alerts_generator.rb +29 -0
  267. data/lib/generators/e11y/prometheus_alerts/templates/e11y_alerts.yml +28 -0
  268. data/lib/tasks/e11y_docs.rake +30 -0
  269. data/lib/tasks/e11y_events.rake +71 -0
  270. data/lib/tasks/e11y_lint.rake +91 -0
  271. data/lib/tasks/e11y_slo.rake +29 -0
  272. metadata +186 -39
  273. data/docs/ADR-003-slo-observability.md +0 -3337
  274. data/docs/ADR-010-developer-experience.md +0 -2166
  275. data/docs/API-REFERENCE-L28.md +0 -914
  276. data/docs/COMPREHENSIVE-CONFIGURATION.md +0 -2366
  277. data/docs/CONTRIBUTING.md +0 -312
  278. data/docs/IMPLEMENTATION_NOTES.md +0 -2804
  279. data/docs/IMPLEMENTATION_PLAN.md +0 -1971
  280. data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +0 -586
  281. data/docs/PLAN.md +0 -148
  282. data/docs/README.md +0 -296
  283. data/docs/design/00-memory-optimization.md +0 -593
  284. data/docs/guides/MIGRATION-L27-L28.md +0 -692
  285. data/docs/guides/PERFORMANCE-BENCHMARKS.md +0 -434
  286. data/docs/guides/README.md +0 -44
  287. data/docs/use_cases/UC-003-pattern-based-metrics.md +0 -1627
  288. data/lib/e11y/adapters/registry.rb +0 -141
@@ -2,6 +2,8 @@
2
2
 
3
3
  require "rack/request"
4
4
  require "securerandom"
5
+ require "e11y/tracing/propagator"
6
+ require "e11y/trace_context/sampler"
5
7
 
6
8
  module E11y
7
9
  module Middleware
@@ -32,13 +34,14 @@ module E11y
32
34
  # Process request
33
35
  # @param env [Hash] Rack environment
34
36
  # @return [Array] Rack response [status, headers, body]
35
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
37
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
36
38
  # Rack middleware request processing requires sequential setup of tracing, context, buffer, and SLO tracking
37
39
  def call(env)
38
40
  request = Rack::Request.new(env)
39
41
 
40
- # Extract or generate trace_id
41
- trace_id = extract_trace_id(request) || generate_trace_id
42
+ # Extract or generate trace context (trace_id, sampled from traceparent)
43
+ trace_ctx = extract_trace_context(request)
44
+ trace_id = trace_ctx[:trace_id] || generate_trace_id
42
45
  span_id = generate_span_id
43
46
 
44
47
  # Set request context (ActiveSupport::CurrentAttributes)
@@ -50,9 +53,10 @@ module E11y
50
53
  E11y::Current.user_agent = request.user_agent
51
54
  E11y::Current.request_method = request.request_method
52
55
  E11y::Current.request_path = request.path
56
+ E11y::Current.sampled = resolve_sampled(trace_ctx)
53
57
 
54
58
  # Start request-scoped buffer (for debug events)
55
- E11y::Buffers::RequestScopedBuffer.initialize! if E11y.config.request_buffer&.enabled
59
+ E11y::Buffers::EphemeralBuffer.initialize! if E11y.config.ephemeral_buffer_enabled
56
60
 
57
61
  # Track request start time for SLO
58
62
  start_time = Time.now
@@ -60,6 +64,9 @@ module E11y
60
64
  # Call next middleware/app
61
65
  status, headers, body = @app.call(env)
62
66
 
67
+ # Flush buffer if status matches configured flush_on_statuses (default: 5xx only)
68
+ E11y::Buffers::EphemeralBuffer.flush_on_error if should_flush_buffer?(status)
69
+
63
70
  # Track SLO metrics (if enabled)
64
71
  track_http_request_slo(env, status, start_time)
65
72
 
@@ -70,38 +77,80 @@ module E11y
70
77
  [status, headers, body]
71
78
  rescue StandardError
72
79
  # Flush request buffer on error (includes debug events)
73
- E11y::Buffers::RequestScopedBuffer.flush_on_error if E11y.config.request_buffer&.enabled
80
+ E11y::Buffers::EphemeralBuffer.flush_on_error if E11y.config.ephemeral_buffer_enabled
74
81
 
75
82
  raise # Re-raise original exception
76
83
  ensure
77
84
  # Discard request buffer on success (not on error, already flushed above)
78
85
  # We need to check if we're here from normal completion or exception
79
86
  # If there was an exception, buffer was already flushed in rescue block
80
- if !$ERROR_INFO && E11y.config.request_buffer&.enabled # No exception occurred
81
- E11y::Buffers::RequestScopedBuffer.discard
82
- end
87
+ E11y::Buffers::EphemeralBuffer.discard if !$ERROR_INFO && E11y.config.ephemeral_buffer_enabled # No exception occurred
83
88
 
84
89
  # Reset context
85
90
  E11y::Current.reset
86
91
  end
87
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
92
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
88
93
 
89
94
  private
90
95
 
91
- # Extract trace_id from request headers (W3C Trace Context or custom headers)
96
+ # Determine whether the request-scoped buffer should be flushed for this status code.
97
+ #
98
+ # Two independent conditions (either is sufficient):
99
+ # - +flush_on_error+ (default: true) — flushes on any 5xx server error
100
+ # - +flush_on_statuses+ (default: []) — extra status codes/ranges, e.g. [403]
101
+ #
102
+ # @example Default behaviour — flush on 5xx only
103
+ # config.ephemeral_buffer_flush_on_error = true # default
104
+ # config.ephemeral_buffer_flush_on_statuses = [] # default
105
+ #
106
+ # @example Flush on 403 in addition to 5xx
107
+ # config.ephemeral_buffer_flush_on_statuses = [403]
108
+ #
109
+ # @example Flush only on explicit statuses (disable 5xx default)
110
+ # config.ephemeral_buffer_flush_on_error = false
111
+ # config.ephemeral_buffer_flush_on_statuses = [403, 422]
112
+ #
113
+ # @param status [Integer] HTTP response status code
114
+ # @return [Boolean]
115
+ def should_flush_buffer?(status)
116
+ return false unless E11y.config.ephemeral_buffer_enabled
117
+
118
+ # Condition 1: server error flush (5xx)
119
+ return true if E11y.config.ephemeral_buffer_flush_on_error && status >= 500
120
+
121
+ # Condition 2: explicit extra statuses
122
+ extra = E11y.config.ephemeral_buffer_flush_on_statuses
123
+ extra&.any? { |s| s === status } || false # rubocop:disable Style/CaseEquality
124
+ end
125
+
126
+ # Extract trace context from request headers (W3C Trace Context or custom).
127
+ # Also extracts tracestate into E11y::Current.baggage (F-014).
92
128
  # @param request [Rack::Request] Rack request
93
- # @return [String, nil] Trace ID or nil if not found
94
- def extract_trace_id(request)
95
- # W3C Trace Context (traceparent header)
96
- # Format: version-trace_id-span_id-flags
97
- # Example: 00-0af7651916cd43dd8448eb211c80319c-00f067aa0ba902b7-01
129
+ # @return [Hash] { trace_id:, sampled: (from traceparent, or nil if new trace) }
130
+ def extract_trace_context(request)
98
131
  traceparent = request.get_header("HTTP_TRACEPARENT")
99
- return traceparent.split("-")[1] if traceparent
132
+ tracestate = request.get_header("HTTP_TRACESTATE")
133
+
134
+ if tracestate && E11y::Current.respond_to?(:baggage=)
135
+ baggage = E11y::Tracing::Propagator.parse_tracestate(tracestate)
136
+ E11y::Current.baggage = baggage if baggage.any?
137
+ end
138
+
139
+ if traceparent
140
+ parsed = E11y::Tracing::Propagator.parse(traceparent)
141
+ return { trace_id: parsed[:trace_id], sampled: parsed[:sampled] } if parsed
142
+ end
143
+
144
+ trace_id = request.get_header("HTTP_X_REQUEST_ID") || request.get_header("HTTP_X_TRACE_ID")
145
+ { trace_id: trace_id, sampled: nil }
146
+ end
147
+
148
+ # Resolve sampling decision: from parent (traceparent) or Sampler for new trace.
149
+ # Context for Sampler = E11y::Current.to_context (already set above).
150
+ def resolve_sampled(trace_ctx)
151
+ return trace_ctx[:sampled] if trace_ctx.key?(:sampled) && !trace_ctx[:sampled].nil?
100
152
 
101
- # X-Request-ID (Rails default)
102
- request.get_header("HTTP_X_REQUEST_ID") ||
103
- # X-Trace-Id (custom)
104
- request.get_header("HTTP_X_TRACE_ID")
153
+ E11y::TraceContext::Sampler.should_sample?(E11y::Current.to_context)
105
154
  end
106
155
 
107
156
  # Extract request_id from Rack env
@@ -141,10 +190,9 @@ module E11y
141
190
  # @param start_time [Time] Request start time
142
191
  # @return [void]
143
192
  # @api private
144
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
145
193
  # SLO tracking requires extracting controller/action, calculating duration, and error handling
146
194
  def track_http_request_slo(env, status, start_time)
147
- return unless E11y.config.slo_tracking&.enabled
195
+ return unless E11y.config.respond_to?(:slo_tracking_enabled) && E11y.config.slo_tracking_enabled
148
196
 
149
197
  duration_ms = ((Time.now - start_time) * 1000).round(2)
150
198
 
@@ -163,7 +211,6 @@ module E11y
163
211
  # Don't fail if SLO tracking fails
164
212
  warn "[E11y] SLO tracking error: #{e.message}"
165
213
  end
166
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity
167
214
  end
168
215
  end
169
216
  end
@@ -40,13 +40,8 @@ module E11y
40
40
  # # Rule: ->(e) { :audit_encrypted if e[:audit_event] }
41
41
  # # Routes to: [:audit_encrypted]
42
42
  #
43
- # @example Retention-based routing
44
- # event_data = {
45
- # event_name: 'order.placed',
46
- # retention_until: '2026-04-21T...' # 90 days
47
- # }
48
- # # Rule: ->(e) { days > 30 ? :s3_standard : :loki }
49
- # # Routes to: [:s3_standard]
43
+ # Note: retention_until is for archival jobs (run separately), not for routing.
44
+ # Archival happens later — cron/Loki compaction filters by retention_until.
50
45
  class Routing < Base
51
46
  middleware_zone :adapters
52
47
 
@@ -58,10 +53,23 @@ module E11y
58
53
  # @option event_data [Boolean] :audit_event Audit event flag (optional, for routing rules)
59
54
  # @option event_data [Symbol] :severity Event severity (optional, for routing rules)
60
55
  # @return [Hash, nil] Event data (passed to next middleware), or nil if dropped
61
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
56
+ # rubocop:disable Metrics/PerceivedComplexity
62
57
  # Routing logic requires adapter selection, iteration with error handling,
63
58
  # metadata enrichment, and metrics tracking
64
59
  def call(event_data)
60
+ # Handle nil from upstream middleware (e.g., rate limiting, sampling)
61
+ return nil unless event_data
62
+
63
+ # 0. Request-scoped buffer: buffer debug events instead of writing when enabled
64
+ # Skip when event is from a flush (avoid re-buffering)
65
+ if !event_data[:from_ephemeral_buffer_flush] &&
66
+ event_data[:severity] == :debug &&
67
+ E11y.config.ephemeral_buffer_enabled &&
68
+ E11y::Buffers::EphemeralBuffer.active? && E11y::Buffers::EphemeralBuffer.add_event(event_data)
69
+ # Buffered — skip adapter writes, pass through
70
+ return @app&.call(event_data)
71
+ end
72
+
65
73
  # 1. Determine target adapters (explicit or via routing rules)
66
74
  target_adapters = if event_data[:adapters]&.any?
67
75
  # Explicit adapters bypass routing rules
@@ -71,18 +79,28 @@ module E11y
71
79
  apply_routing_rules(event_data)
72
80
  end
73
81
 
82
+ # 1.5. Validate audit events have proper routing (UC-012 compliance requirement)
83
+ validate_audit_routing!(event_data, target_adapters)
84
+
74
85
  # 2. Write to selected adapters
75
86
  target_adapters.each do |adapter_name|
76
87
  adapter = E11y.configuration.adapters[adapter_name]
77
88
  next unless adapter
78
89
 
90
+ # Per-adapter payload: merge payload_rewrites only when present (explicit_pii exclude_adapters)
91
+ data_to_write = if event_data[:payload_rewrites] && event_data[:payload_rewrites][adapter_name]
92
+ payload = event_data[:payload]&.dup || {}
93
+ payload.merge!(event_data[:payload_rewrites][adapter_name])
94
+ event_data.merge(payload: payload)
95
+ else
96
+ event_data
97
+ end
98
+
79
99
  begin
80
- adapter.write(event_data)
81
- increment_metric("e11y.middleware.routing.write_success", adapter: adapter_name)
100
+ adapter.write(data_to_write)
82
101
  rescue StandardError => e
83
102
  # Log routing error but don't fail pipeline
84
103
  warn "E11y routing error for adapter #{adapter_name}: #{e.message}"
85
- increment_metric("e11y.middleware.routing.write_error", adapter: adapter_name)
86
104
  end
87
105
  end
88
106
 
@@ -94,9 +112,9 @@ module E11y
94
112
  }
95
113
 
96
114
  # 4. Increment metrics
97
- increment_metric("e11y.middleware.routing.routed",
98
- adapters_count: target_adapters.size,
99
- routing_type: event_data[:routing][:routing_type])
115
+ E11y::Metrics.increment("e11y.middleware.routing.routed",
116
+ adapters_count: target_adapters.size,
117
+ routing_type: event_data[:routing][:routing_type])
100
118
 
101
119
  # 5. Log routing decision (for debugging)
102
120
  log_routing_decision(event_data, target_adapters) if debug_enabled?
@@ -104,7 +122,7 @@ module E11y
104
122
  # 6. Pass to next app (if any)
105
123
  @app&.call(event_data)
106
124
  end
107
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
125
+ # rubocop:enable Metrics/PerceivedComplexity
108
126
 
109
127
  private
110
128
 
@@ -124,12 +142,12 @@ module E11y
124
142
  # ->(event) { :audit_encrypted if event[:audit_event] },
125
143
  # ->(event) {
126
144
  # days = (Time.parse(event[:retention_until]) - Time.now) / 86400
127
- # days > 90 ? :s3_glacier : :loki
145
+ # days > 90 ? :archive : :loki
128
146
  # }
129
147
  # ]
130
148
  #
131
149
  # apply_routing_rules(event_data)
132
- # # => [:audit_encrypted] or [:loki] or [:s3_glacier]
150
+ # # => [:audit_encrypted] or [:loki] or [:archive]
133
151
  def apply_routing_rules(event_data)
134
152
  matched_adapters = []
135
153
 
@@ -143,10 +161,12 @@ module E11y
143
161
  warn "E11y routing rule error: #{e.message}"
144
162
  end
145
163
 
146
- # Return unique adapters or fallback
164
+ # Track whether fallback was used (for audit validation)
147
165
  if matched_adapters.any?
166
+ event_data[:routing_used_fallback] = false
148
167
  matched_adapters.uniq
149
168
  else
169
+ event_data[:routing_used_fallback] = true
150
170
  E11y.configuration.fallback_adapters || [:stdout]
151
171
  end
152
172
  end
@@ -175,9 +195,46 @@ module E11y
175
195
  # @param metric_name [String] Metric name
176
196
  # @param tags [Hash] Metric tags
177
197
  # @return [void]
178
- def increment_metric(_metric_name, **_tags)
179
- # TODO: Integrate with Yabeda/Prometheus
180
- # Yabeda.e11y.middleware_routing_routed.increment(tags)
198
+ # Validate audit events have proper routing configuration.
199
+ #
200
+ # Audit events MUST be routed via explicit adapters OR routing rules.
201
+ # Relying on fallback routing (no rule matched) is a compliance configuration error.
202
+ #
203
+ # @param event_data [Hash] Event data
204
+ # @param target_adapters [Array<Symbol>] Target adapters
205
+ # @raise [E11y::Error] if audit event misconfigured
206
+ # @return [void]
207
+ def validate_audit_routing!(event_data, target_adapters)
208
+ return unless event_data[:audit_event]
209
+
210
+ # Audit events are valid if:
211
+ # 1. They have explicit adapters (non-empty), OR
212
+ # 2. They matched a routing rule (routing_used_fallback = false)
213
+
214
+ has_explicit_adapters = event_data[:adapters]&.any?
215
+ return if has_explicit_adapters # Explicit adapters → valid
216
+
217
+ # Check if fallback was used (set by apply_routing_rules)
218
+ used_fallback = event_data[:routing_used_fallback]
219
+ return unless used_fallback
220
+
221
+ # CRITICAL: Audit event using fallback routing (no rule matched!)
222
+ error_message = <<~ERROR
223
+ [E11y] CRITICAL: Audit event has no routing configuration!
224
+
225
+ Event: #{event_data[:event_name]}
226
+ Routed to: #{target_adapters.inspect} (fallback adapters)
227
+
228
+ Audit events MUST be explicitly routed to compliance-grade storage.
229
+
230
+ Fix options:
231
+ 1. Add explicit adapters: `adapters :audit_encrypted`
232
+ 2. Configure routing rule: `config.routing_rules = [->(e) { :audit_encrypted if e[:audit_event] }]`
233
+
234
+ See UC-012 Audit Trail documentation for details.
235
+ ERROR
236
+
237
+ raise E11y::Error, error_message
181
238
  end
182
239
  end
183
240
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "e11y/middleware/base"
4
+ require "e11y/sampling/stratified_tracker"
4
5
 
5
6
  module E11y
6
7
  module Middleware
@@ -51,6 +52,8 @@ module E11y
51
52
  # }
52
53
  # }
53
54
  # end
55
+ # rubocop:disable Metrics/ClassLength
56
+ # Class has 6 adaptive sampling strategies each requiring dedicated setup + private methods
54
57
  class Sampling < Base
55
58
  middleware_zone :routing
56
59
 
@@ -79,6 +82,9 @@ module E11y
79
82
  # @param event_data [Hash] The event payload
80
83
  # @return [Hash, nil] The event payload if sampled, nil if dropped
81
84
  def call(event_data)
85
+ # Handle nil from upstream middleware (e.g., rate limiting)
86
+ return nil unless event_data
87
+
82
88
  event_class = event_data[:event_class]
83
89
 
84
90
  # Track errors for error-based adaptive sampling (FEAT-4838)
@@ -87,13 +93,34 @@ module E11y
87
93
  # Track events for load-based adaptive sampling (FEAT-4842)
88
94
  @load_monitor&.record_event
89
95
 
96
+ # C11: Get sample rate and severity before decision (for StratifiedTracker)
97
+ sample_rate = determine_sample_rate(event_class, event_data)
98
+ severity = event_data[:severity] || (event_class.respond_to?(:severity) ? event_class.severity : :info)
99
+
90
100
  # Determine if event should be sampled
91
101
  # Drop event if not sampled
92
- return nil unless should_sample?(event_data, event_class)
102
+ unless should_sample?(event_data, event_class)
103
+ # C11: Record dropped event to StratifiedTracker for sampling correction
104
+ E11y::Sampling.stratified_tracker.record_sample(severity: severity, sample_rate: sample_rate, sampled: false)
105
+ begin
106
+ if defined?(E11y::Metrics) && E11y::Metrics.respond_to?(:increment)
107
+ E11y::Metrics.increment(:e11y_events_dropped_total, {
108
+ reason: "sampled_out",
109
+ event_type: event_data[:event_name].to_s
110
+ })
111
+ end
112
+ rescue StandardError
113
+ # non-fatal
114
+ end
115
+ return nil
116
+ end
93
117
 
94
118
  # Mark as sampled for downstream middleware
95
119
  event_data[:sampled] = true
96
- event_data[:sample_rate] = determine_sample_rate(event_class, event_data)
120
+ event_data[:sample_rate] = sample_rate
121
+
122
+ # C11: Record sampled event to StratifiedTracker for sampling correction
123
+ E11y::Sampling.stratified_tracker.record_sample(severity: severity, sample_rate: sample_rate, sampled: true)
97
124
 
98
125
  # Pass to next middleware
99
126
  @app.call(event_data)
@@ -121,6 +148,7 @@ module E11y
121
148
  @default_sample_rate = config.fetch(:default_sample_rate, 1.0)
122
149
  @trace_aware = config.fetch(:trace_aware, true)
123
150
  @severity_rates = config.fetch(:severity_rates, {})
151
+ @pattern_rates = config.fetch(:pattern_rates, []) # [[Regexp, Float], ...]
124
152
  @trace_decisions = {} # Cache for trace-level sampling decisions
125
153
  @trace_decisions_mutex = Mutex.new
126
154
  end
@@ -158,8 +186,10 @@ module E11y
158
186
  # 1. Check if audit event (never sample audit events!)
159
187
  return true if event_class.respond_to?(:audit_event?) && event_class.audit_event?
160
188
 
161
- # 2. Check trace-aware sampling (C05)
189
+ # 2. Trace-consistent sampling (ADR-005 §7): prefer E11y::Current.sampled when trace_aware
162
190
  if @trace_aware && event_data[:trace_id]
191
+ return E11y::Current.sampled if E11y::Current.respond_to?(:sampled) && !E11y::Current.sampled.nil?
192
+
163
193
  return trace_sampling_decision(event_data[:trace_id], event_class, event_data)
164
194
  end
165
195
 
@@ -183,22 +213,32 @@ module E11y
183
213
  # @param event_class [Class] The event class
184
214
  # @param event_data [Hash] Event payload (for value-based sampling)
185
215
  # @return [Float] Sample rate (0.0-1.0)
186
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
187
- # Sample rate determination follows priority chain: error spike → value-based →
188
- # load-based severity event-level default
216
+ # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
217
+ # Sample rate determination follows a 6-step priority chain:
218
+ # error spike (0) → pattern-based (0.5)value-based (1)
219
+ # load-based (2) → severity (3) → event-level (4) → default (5)
189
220
  def determine_sample_rate(event_class, event_data = nil)
190
221
  # 0. Error-based adaptive sampling (FEAT-4838) - highest priority!
191
222
  if @error_based_adaptive && @error_spike_detector&.error_spike?
192
223
  return 1.0 # 100% sampling during error spike
193
224
  end
194
225
 
226
+ # 0.5. Pattern-based sampling (by event_name) - overrides event-level config
227
+ if event_data && !@pattern_rates.empty?
228
+ event_name = event_data[:event_name].to_s
229
+ @pattern_rates.each do |pattern, rate|
230
+ return rate if pattern.match?(event_name)
231
+ end
232
+ end
233
+
195
234
  # 1. Value-based sampling (FEAT-4849) - high-value events always sampled
196
235
  if event_data && event_class.respond_to?(:value_sampling_configs)
197
236
  configs = event_class.value_sampling_configs
198
237
  unless configs.empty?
199
238
  require "e11y/sampling/value_extractor"
200
239
  extractor = E11y::Sampling::ValueExtractor.new
201
- if configs.any? { |config| config.matches?(event_data, extractor) }
240
+ payload = event_data[:payload] || event_data
241
+ if configs.any? { |config| config.matches?(payload, extractor) }
202
242
  return 1.0 # 100% sampling for high-value events
203
243
  end
204
244
  end
@@ -228,7 +268,7 @@ module E11y
228
268
  # 4. Default/load-based rate
229
269
  base_rate
230
270
  end
231
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
271
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
232
272
 
233
273
  # Trace-aware sampling decision (C05 Resolution)
234
274
  #
@@ -241,15 +281,21 @@ module E11y
241
281
  # @return [Boolean] true if trace should be sampled
242
282
  def trace_sampling_decision(trace_id, event_class, event_data = nil)
243
283
  @trace_decisions_mutex.synchronize do
284
+ # Use monotonic clock (Float) to avoid Time object allocation — prevents memory leak in hot path
285
+ now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
286
+
244
287
  # Check if decision already made for this trace
245
- return @trace_decisions[trace_id] if @trace_decisions.key?(trace_id)
288
+ if (entry = @trace_decisions[trace_id])
289
+ entry[:last_access] = now # LRU touch
290
+ return entry[:decision]
291
+ end
246
292
 
247
293
  # Make new sampling decision
248
294
  sample_rate = determine_sample_rate(event_class, event_data)
249
295
  decision = rand < sample_rate
250
296
 
251
- # Cache decision (TTL handled by periodic cleanup)
252
- @trace_decisions[trace_id] = decision
297
+ # Cache decision with LRU metadata (evict oldest on cleanup)
298
+ @trace_decisions[trace_id] = { decision: decision, last_access: now }
253
299
 
254
300
  # Cleanup old decisions periodically (every 1000 traces)
255
301
  cleanup_trace_decisions if @trace_decisions.size > 1000
@@ -260,14 +306,17 @@ module E11y
260
306
 
261
307
  # Cleanup old trace decisions to prevent memory leaks
262
308
  #
263
- # Removes random 50% of cached decisions when cache grows too large.
264
- # This is a simple heuristic - traces typically complete in <10 seconds,
265
- # so old decisions are likely stale.
309
+ # Evicts oldest 50% by last_access (LRU). Active traces stay in cache
310
+ # because they are touched on each lookup, preserving trace-level consistency.
266
311
  def cleanup_trace_decisions
267
- # Remove random 50% of decisions
268
- keys_to_remove = @trace_decisions.keys.sample(@trace_decisions.size / 2)
269
- keys_to_remove.each { |key| @trace_decisions.delete(key) }
312
+ return if @trace_decisions.size <= 100
313
+
314
+ size_to_remove = @trace_decisions.size / 2
315
+ sorted = @trace_decisions.to_a.sort_by { |_, v| v[:last_access] }
316
+ keys_to_remove = sorted.first(size_to_remove).map(&:first)
317
+ keys_to_remove.each { |k| @trace_decisions.delete(k) }
270
318
  end
271
319
  end
320
+ # rubocop:enable Metrics/ClassLength
272
321
  end
273
322
  end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "e11y/middleware/base"
4
+ require "e11y/slo/config_loader"
5
+
6
+ module E11y
7
+ module Middleware
8
+ # SelfMonitoringEmit middleware — emits e11y_events_tracked_total at pipeline end.
9
+ #
10
+ # When e11y_self_monitoring.enabled is true in slo.yml, increments the counter
11
+ # for each event that reaches the end of the pipeline (after EventSlo).
12
+ #
13
+ # **Middleware Zone:** `:post_processing` (last in pipeline)
14
+ #
15
+ # @example slo.yml
16
+ # e11y_self_monitoring:
17
+ # enabled: true
18
+ # targets:
19
+ # reliability: 0.999
20
+ #
21
+ # @see docs/plans/2026-03-13-slo-linters-self-monitoring-plan.md
22
+ class SelfMonitoringEmit < Base
23
+ middleware_zone :post_processing
24
+
25
+ # Process event and optionally emit self-monitoring metric.
26
+ #
27
+ # @param event_data [Hash, nil] Event payload (nil passes through)
28
+ # @return [Hash, nil] Unchanged event_data (passthrough)
29
+ def call(event_data)
30
+ if event_data && E11y::SLO::ConfigLoader.self_monitoring_enabled?
31
+ event_name = event_data[:event_name].to_s.presence || "unknown"
32
+ E11y::Metrics.increment(:e11y_events_tracked_total, result: "success", event_name: event_name)
33
+ end
34
+
35
+ @app&.call(event_data) || event_data
36
+ end
37
+ end
38
+ end
39
+ end
@@ -56,19 +56,19 @@ module E11y
56
56
  def call(event_data)
57
57
  enrich_trace_context(event_data)
58
58
  enrich_service_context(event_data)
59
- increment_metric("e11y.middleware.trace_context.processed")
59
+ E11y::Metrics.increment("e11y.middleware.trace_context.processed")
60
60
  @app.call(event_data)
61
61
  end
62
62
 
63
63
  private
64
64
 
65
- # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
65
+ # rubocop:disable Metrics/AbcSize
66
66
  # Add distributed tracing fields to event data
67
67
  # @param event_data [Hash] Event data to enrich
68
68
  # @return [void]
69
69
  def enrich_trace_context(event_data)
70
70
  event_data[:trace_id] ||= current_trace_id || generate_trace_id
71
- event_data[:span_id] ||= generate_span_id
71
+ event_data[:span_id] ||= current_span_id || generate_span_id
72
72
  event_data[:parent_trace_id] ||= current_parent_trace_id if current_parent_trace_id
73
73
 
74
74
  # Format timestamp if it's a Time object
@@ -93,7 +93,7 @@ module E11y
93
93
 
94
94
  event_data[:audit_event] = event_class.audit_event?
95
95
  end
96
- # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
96
+ # rubocop:enable Metrics/AbcSize
97
97
 
98
98
  # Add service context fields to event data
99
99
  # @param event_data [Hash] Event data to enrich
@@ -103,15 +103,54 @@ module E11y
103
103
  event_data[:environment] ||= E11y.config.environment
104
104
  end
105
105
 
106
- # Get current trace ID from E11y::Current or thread-local storage (request context).
106
+ # Get current trace ID from configured source (ADR-007 §8).
107
107
  #
108
- # Priority: E11y::Current > Thread.current
108
+ # When config.tracing_source is :opentelemetry and OTel SDK has an active span,
109
+ # uses trace_id from OpenTelemetry::Trace.current_span.
110
+ # Otherwise: E11y::Current > Thread.current
109
111
  #
110
112
  # @return [String, nil] Current trace ID if set, nil otherwise
111
113
  def current_trace_id
114
+ if tracing_source_opentelemetry?
115
+ otel = otel_trace_context
116
+ return otel[:trace_id] if otel[:trace_id]
117
+ end
112
118
  E11y::Current.trace_id || Thread.current[:e11y_trace_id]
113
119
  end
114
120
 
121
+ # Get current span ID (for event correlation).
122
+ # When using OTel source and span exists, returns OTel span_id; otherwise nil (caller generates).
123
+ #
124
+ # @return [String, nil]
125
+ def current_span_id
126
+ return nil unless tracing_source_opentelemetry?
127
+
128
+ otel = otel_trace_context
129
+ otel[:span_id]
130
+ end
131
+
132
+ def tracing_source_opentelemetry?
133
+ E11y.config&.tracing_source == :opentelemetry
134
+ end
135
+
136
+ def otel_trace_context
137
+ return {} unless defined?(OpenTelemetry::Trace)
138
+
139
+ span = OpenTelemetry::Trace.current_span
140
+ ctx = span.context
141
+ return {} unless ctx.respond_to?(:valid?) && ctx.valid?
142
+
143
+ trace_id = ctx.respond_to?(:hex_trace_id) ? ctx.hex_trace_id : nil
144
+ span_id = ctx.respond_to?(:hex_span_id) ? ctx.hex_span_id : nil
145
+ return {} if trace_id.to_s.empty?
146
+
147
+ # Sync to E11y::Current so downstream uses same context
148
+ E11y::Current.trace_id = trace_id
149
+ E11y::Current.span_id = span_id
150
+
151
+ { trace_id: trace_id, span_id: span_id }
152
+ end
153
+
115
154
  # Get current parent trace ID from E11y::Current (background job context).
116
155
  #
117
156
  # Only set for background jobs that have a parent request trace.
@@ -151,10 +190,6 @@ module E11y
151
190
  #
152
191
  # @param metric_name [String] Metric name
153
192
  # @return [void]
154
- def increment_metric(_metric_name)
155
- # TODO: Integrate with Yabeda/Prometheus in Phase 2
156
- # Yabeda.e11y.middleware_trace_context_processed.increment
157
- end
158
193
  end
159
194
  end
160
195
  end