e11y 0.2.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +130 -10
- data/CHANGELOG.md +80 -1
- data/CLAUDE.md +168 -0
- data/CONTRIBUTING.md +640 -0
- data/README.md +165 -701
- data/RELEASE.md +41 -12
- data/Rakefile +249 -57
- data/config/README.md +1 -1
- data/config/loki-local-config.yaml +12 -0
- data/config/otel-collector-config.yaml +44 -0
- data/cucumber.yml +1 -0
- data/docker-compose.yml +18 -2
- data/docs/ADAPTERS.md +76 -0
- data/docs/ADAPTIVE_SAMPLING.md +59 -0
- data/docs/COMPARISON.md +104 -0
- data/docs/CONFIGURATION.md +52 -0
- data/docs/DISTRIBUTED_TRACING.md +44 -0
- data/docs/LIMITATIONS.md +13 -0
- data/docs/METRICS_DSL.md +84 -0
- data/docs/PERFORMANCE.md +60 -0
- data/docs/PII_FILTERING.md +40 -0
- data/docs/PRESETS.md +65 -0
- data/docs/QUICK-START.md +546 -587
- data/docs/RAILS_INTEGRATION.md +79 -0
- data/docs/SCHEMA_VALIDATION.md +63 -0
- data/docs/SLO-PROMQL-ALERTS.md +161 -0
- data/docs/TESTING.md +69 -0
- data/docs/{ADR-001-architecture.md → architecture/ADR-001-architecture.md} +36 -65
- data/docs/{ADR-002-metrics-yabeda.md → architecture/ADR-002-metrics-yabeda.md} +62 -236
- data/docs/architecture/ADR-003-slo-observability.md +1402 -0
- data/docs/{ADR-004-adapter-architecture.md → architecture/ADR-004-adapter-architecture.md} +163 -146
- data/docs/{ADR-005-tracing-context.md → architecture/ADR-005-tracing-context.md} +10 -9
- data/docs/{ADR-006-security-compliance.md → architecture/ADR-006-security-compliance.md} +184 -191
- data/docs/{ADR-007-opentelemetry-integration.md → architecture/ADR-007-opentelemetry-integration.md} +3 -21
- data/docs/{ADR-008-rails-integration.md → architecture/ADR-008-rails-integration.md} +182 -743
- data/docs/{ADR-009-cost-optimization.md → architecture/ADR-009-cost-optimization.md} +45 -54
- data/docs/architecture/ADR-010-developer-experience.md +522 -0
- data/docs/{ADR-011-testing-strategy.md → architecture/ADR-011-testing-strategy.md} +44 -86
- data/docs/{ADR-012-event-evolution.md → architecture/ADR-012-event-evolution.md} +11 -11
- data/docs/{ADR-013-reliability-error-handling.md → architecture/ADR-013-reliability-error-handling.md} +37 -12
- data/docs/{ADR-014-event-driven-slo.md → architecture/ADR-014-event-driven-slo.md} +12 -24
- data/docs/{ADR-015-middleware-order.md → architecture/ADR-015-middleware-order.md} +43 -59
- data/docs/{ADR-016-self-monitoring-slo.md → architecture/ADR-016-self-monitoring-slo.md} +58 -355
- data/docs/{ADR-017-multi-rails-compatibility.md → architecture/ADR-017-multi-rails-compatibility.md} +4 -11
- data/docs/architecture/ADR-018-memory-optimization.md +366 -0
- data/docs/{ADR-INDEX.md → architecture/ADR-INDEX.md} +11 -6
- data/docs/plans/2026-03-20-browser-overlay-svelte.md +281 -0
- data/docs/{00-ICP-AND-TIMELINE.md → prd/00-ICP-AND-TIMELINE.md} +6 -6
- data/docs/{01-SCALE-REQUIREMENTS.md → prd/01-SCALE-REQUIREMENTS.md} +6 -6
- data/docs/prd/01-overview-vision.md +19 -14
- data/docs/use_cases/README.md +22 -23
- data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +50 -44
- data/docs/use_cases/UC-002-business-event-tracking.md +26 -95
- data/docs/use_cases/UC-003-event-metrics.md +66 -0
- data/docs/use_cases/UC-004-zero-config-slo-tracking.md +33 -684
- data/docs/use_cases/UC-005-sentry-integration.md +13 -15
- data/docs/use_cases/UC-006-trace-context-management.md +30 -28
- data/docs/use_cases/UC-007-pii-filtering.md +35 -87
- data/docs/use_cases/UC-008-opentelemetry-integration.md +51 -89
- data/docs/use_cases/UC-009-multi-service-tracing.md +30 -178
- data/docs/use_cases/UC-010-background-job-tracking.md +24 -91
- data/docs/use_cases/UC-011-rate-limiting.md +95 -168
- data/docs/use_cases/UC-012-audit-trail.md +21 -46
- data/docs/use_cases/UC-013-high-cardinality-protection.md +29 -167
- data/docs/use_cases/UC-014-adaptive-sampling.md +2 -2
- data/docs/use_cases/UC-015-cost-optimization.md +46 -99
- data/docs/use_cases/UC-016-rails-logger-migration.md +39 -213
- data/docs/use_cases/UC-017-local-development.md +203 -777
- data/docs/use_cases/UC-018-testing-events.md +3 -3
- data/docs/use_cases/UC-019-retention-based-routing.md +53 -106
- data/docs/use_cases/UC-020-event-versioning.md +8 -9
- data/docs/use_cases/UC-021-error-handling-retry-dlq.md +18 -22
- data/docs/use_cases/UC-022-event-registry.md +15 -21
- data/docs/use_cases/backlog.md +119 -87
- data/e11y.gemspec +2 -2
- data/gems/e11y-devtools/README.md +158 -0
- data/gems/e11y-devtools/config/routes.rb +15 -0
- data/gems/e11y-devtools/e11y-devtools.gemspec +25 -0
- data/gems/e11y-devtools/exe/e11y +34 -0
- data/gems/e11y-devtools/frontend/.gitignore +24 -0
- data/gems/e11y-devtools/frontend/README.md +51 -0
- data/gems/e11y-devtools/frontend/index.html +14 -0
- data/gems/e11y-devtools/frontend/package-lock.json +3707 -0
- data/gems/e11y-devtools/frontend/package.json +28 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/events/recent.json +4205 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/interactions.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/0a2e04027cfa22d014bc22e8b27cd913/events.json +86 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/0e1543af6a630fb3af6b52283154b3e0/events.json +169 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/1838b691faa49564f97db8592ff3978d/events.json +78 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/29f198f6588dacffb687777eb5f8f118/events.json +197 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/34bc3c9c0097de28a7a6f99b90a8e7bc/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/3ba6c20d068ab9cee00e51b180e66444/events.json +184 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/435bfd8f17b9009146a79812d7c3726d/events.json +144 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/4c7676e3fe668e99edb2b94d7d5678a9/events.json +222 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/6daf0d47974bedfc55d5de7004a3ea9f/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8a81ada42834d15f287bb40010043605/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8c0a98900edaae105469df8daedccf02/events.json +198 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8e4f645180f8a7d1dce426b07380466b/events.json +222 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/93db346fa5d44a032605a13b627f4b80/events.json +128 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/98ff6146faf7bd9be8bd03a8275817ba/events.json +223 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/9997ddd0247bc7e25f2ca7a5c415c93d/events.json +197 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/99e35f8ef3baedd798cc4fd085980ad9/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/b4f3095c1909924cbc98889a86c83d6d/events.json +131 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/b54b7fc32b7575a7110de809d11ccda0/events.json +128 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/c0b48033fa06746bcc5886745e053cff/events.json +169 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/c44649ac76701b4558927cd2305ab535/events.json +169 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/d601ae3320057580a39dbdac2edfdf4a/events.json +248 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/e67e724bab422d2b52eeb49635e512e1/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/e6c72765a28f158a8485b35fa63f73da/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/f541b87405c9a54819b18ebe529f6419/events.json +194 -0
- data/gems/e11y-devtools/frontend/scripts/generate_mocks.rb +397 -0
- data/gems/e11y-devtools/frontend/src/App.svelte +827 -0
- data/gems/e11y-devtools/frontend/src/components/Fab.svelte +19 -0
- data/gems/e11y-devtools/frontend/src/components/FilterBar.svelte +38 -0
- data/gems/e11y-devtools/frontend/src/components/FullscreenPanel.svelte +82 -0
- data/gems/e11y-devtools/frontend/src/components/InteractionsTimeline.svelte +264 -0
- data/gems/e11y-devtools/frontend/src/components/RecentHistogram.svelte +354 -0
- data/gems/e11y-devtools/frontend/src/lib/api.ts +37 -0
- data/gems/e11y-devtools/frontend/src/lib/eventIdentity.ts +12 -0
- data/gems/e11y-devtools/frontend/src/lib/format.ts +37 -0
- data/gems/e11y-devtools/frontend/src/lib/listFilter.ts +43 -0
- data/gems/e11y-devtools/frontend/src/lib/recentVolume.ts +80 -0
- data/gems/e11y-devtools/frontend/src/lib/router.ts +12 -0
- data/gems/e11y-devtools/frontend/src/lib/transitions.ts +34 -0
- data/gems/e11y-devtools/frontend/src/lib/viewportOrigin.ts +25 -0
- data/gems/e11y-devtools/frontend/src/main.ts +8 -0
- data/gems/e11y-devtools/frontend/src/overlay-entry.ts +24 -0
- data/gems/e11y-devtools/frontend/src/overlay.css +1080 -0
- data/gems/e11y-devtools/frontend/svelte.config.js +2 -0
- data/gems/e11y-devtools/frontend/test_puppeteer.js +41 -0
- data/gems/e11y-devtools/frontend/test_scale.js +3 -0
- data/gems/e11y-devtools/frontend/tsconfig.app.json +21 -0
- data/gems/e11y-devtools/frontend/tsconfig.json +7 -0
- data/gems/e11y-devtools/frontend/tsconfig.node.json +26 -0
- data/gems/e11y-devtools/frontend/vite.config.ts +36 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/server.rb +96 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tool_base.rb +25 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/clear.rb +31 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/errors.rb +35 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/event_detail.rb +33 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/events_by_trace.rb +33 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/interactions.rb +40 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/recent_events.rb +34 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/search.rb +34 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/stats.rb +30 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/assets/overlay.js +20 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/controller.rb +94 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/engine.rb +26 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/middleware.rb +80 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/rails_controller.rb +67 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/app.rb +262 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/grouping.rb +66 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_detail.rb +62 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_list.rb +70 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/interaction_list.rb +47 -0
- data/gems/e11y-devtools/lib/e11y/devtools/version.rb +8 -0
- data/gems/e11y-devtools/lib/e11y/devtools.rb +13 -0
- data/gems/e11y-devtools/spec/e11y/devtools/mcp/tools_spec.rb +107 -0
- data/gems/e11y-devtools/spec/e11y/devtools/overlay/controller_spec.rb +91 -0
- data/gems/e11y-devtools/spec/e11y/devtools/overlay/middleware_spec.rb +46 -0
- data/gems/e11y-devtools/spec/e11y/devtools/tui/app_spec.rb +85 -0
- data/gems/e11y-devtools/spec/e11y/devtools/tui/grouping_spec.rb +64 -0
- data/gems/e11y-devtools/spec/spec_helper.rb +5 -0
- data/gems/e11y-devtools/spec/tui/widgets/event_list_spec.rb +44 -0
- data/gems/e11y-devtools/spec/tui/widgets/interaction_list_spec.rb +62 -0
- data/lib/e11y/adapters/audit_encrypted.rb +53 -11
- data/lib/e11y/adapters/base.rb +33 -34
- data/lib/e11y/adapters/dev_log/file_store.rb +143 -0
- data/lib/e11y/adapters/dev_log/query.rb +219 -0
- data/lib/e11y/adapters/dev_log.rb +118 -0
- data/lib/e11y/adapters/file.rb +3 -6
- data/lib/e11y/adapters/in_memory.rb +52 -5
- data/lib/e11y/adapters/in_memory_test.rb +29 -0
- data/lib/e11y/adapters/loki.rb +58 -23
- data/lib/e11y/adapters/null.rb +82 -0
- data/lib/e11y/adapters/opentelemetry_collector.rb +183 -0
- data/lib/e11y/adapters/otel_logs.rb +136 -23
- data/lib/e11y/adapters/sentry.rb +4 -7
- data/lib/e11y/adapters/stdout.rb +73 -7
- data/lib/e11y/adapters/yabeda.rb +153 -29
- data/lib/e11y/buffers/adaptive_buffer.rb +3 -17
- data/lib/e11y/buffers/{request_scoped_buffer.rb → ephemeral_buffer.rb} +72 -58
- data/lib/e11y/buffers/ring_buffer.rb +3 -16
- data/lib/e11y/configuration.rb +272 -0
- data/lib/e11y/console.rb +10 -17
- data/lib/e11y/current.rb +53 -1
- data/lib/e11y/debug/pipeline_inspector.rb +96 -0
- data/lib/e11y/documentation/generator.rb +48 -0
- data/lib/e11y/event/base.rb +176 -82
- data/lib/e11y/event/value_sampling_config.rb +1 -5
- data/lib/e11y/events/rails/database/query.rb +1 -4
- data/lib/e11y/events/rails/job/failed.rb +2 -0
- data/lib/e11y/instruments/active_job.rb +44 -12
- data/lib/e11y/instruments/rails_instrumentation.rb +49 -24
- data/lib/e11y/instruments/sidekiq.rb +135 -31
- data/lib/e11y/linters/base.rb +11 -0
- data/lib/e11y/linters/pii/pii_declaration_linter.rb +120 -0
- data/lib/e11y/linters/slo/config_consistency_linter.rb +76 -0
- data/lib/e11y/linters/slo/explicit_declaration_linter.rb +36 -0
- data/lib/e11y/linters/slo/slo_status_from_linter.rb +41 -0
- data/lib/e11y/logger/bridge.rb +26 -7
- data/lib/e11y/metrics/cardinality_protection.rb +10 -15
- data/lib/e11y/metrics/cardinality_tracker.rb +16 -6
- data/lib/e11y/metrics/registry.rb +3 -5
- data/lib/e11y/metrics/test_backend.rb +62 -0
- data/lib/e11y/metrics.rb +56 -10
- data/lib/e11y/middleware/adapter_resolver.rb +40 -0
- data/lib/e11y/middleware/audit_signing.rb +43 -6
- data/lib/e11y/middleware/baggage_protection.rb +75 -0
- data/lib/e11y/middleware/dev_log_source.rb +24 -0
- data/lib/e11y/middleware/event_slo.rb +23 -9
- data/lib/e11y/middleware/otel_span.rb +23 -0
- data/lib/e11y/middleware/pii_filter.rb +104 -75
- data/lib/e11y/middleware/rate_limiting.rb +54 -27
- data/lib/e11y/middleware/request.rb +70 -23
- data/lib/e11y/middleware/routing.rb +78 -21
- data/lib/e11y/middleware/sampling.rb +66 -17
- data/lib/e11y/middleware/self_monitoring_emit.rb +39 -0
- data/lib/e11y/middleware/trace_context.rb +45 -10
- data/lib/e11y/middleware/track_latency.rb +34 -0
- data/lib/e11y/middleware/validation.rb +7 -16
- data/lib/e11y/middleware/versioning.rb +26 -22
- data/lib/e11y/opentelemetry/semantic_conventions.rb +109 -0
- data/lib/e11y/opentelemetry/span_creator.rb +142 -0
- data/lib/e11y/pii/patterns.rb +12 -1
- data/lib/e11y/pipeline/builder.rb +4 -4
- data/lib/e11y/presets/audit_event.rb +13 -2
- data/lib/e11y/railtie.rb +52 -14
- data/lib/e11y/registry.rb +306 -0
- data/lib/e11y/reliability/circuit_breaker.rb +19 -21
- data/lib/e11y/reliability/dlq/base.rb +71 -0
- data/lib/e11y/reliability/dlq/file_adapter.rb +301 -0
- data/lib/e11y/reliability/dlq/file_storage.rb +63 -34
- data/lib/e11y/reliability/dlq/filter.rb +37 -54
- data/lib/e11y/reliability/retry_handler.rb +26 -29
- data/lib/e11y/reliability/retry_rate_limiter.rb +3 -11
- data/lib/e11y/sampling/error_spike_detector.rb +0 -2
- data/lib/e11y/sampling/load_monitor.rb +5 -9
- data/lib/e11y/sampling/stratified_tracker.rb +18 -0
- data/lib/e11y/self_monitoring/buffer_monitor.rb +2 -0
- data/lib/e11y/self_monitoring/performance_monitor.rb +19 -61
- data/lib/e11y/self_monitoring/reliability_monitor.rb +4 -74
- data/lib/e11y/slo/config_loader.rb +40 -0
- data/lib/e11y/slo/config_validator.rb +58 -0
- data/lib/e11y/slo/dashboard_generator.rb +122 -0
- data/lib/e11y/slo/event_driven.rb +8 -0
- data/lib/e11y/slo/tracker.rb +31 -4
- data/lib/e11y/testing/have_tracked_event_matcher.rb +190 -0
- data/lib/e11y/testing/rspec_matchers.rb +21 -0
- data/lib/e11y/testing/snapshot_matcher.rb +86 -0
- data/lib/e11y/trace_context/sampler.rb +35 -0
- data/lib/e11y/tracing/faraday_middleware.rb +31 -0
- data/lib/e11y/tracing/net_http_patch.rb +33 -0
- data/lib/e11y/tracing/propagator.rb +144 -0
- data/lib/e11y/tracing.rb +47 -0
- data/lib/e11y/version.rb +1 -1
- data/lib/e11y/versioning/version_extractor.rb +32 -0
- data/lib/e11y.rb +123 -266
- data/lib/generators/e11y/event/event_generator.rb +22 -0
- data/lib/generators/e11y/event/templates/event.rb.tt +16 -0
- data/lib/generators/e11y/grafana_dashboard/grafana_dashboard_generator.rb +30 -0
- data/lib/generators/e11y/grafana_dashboard/templates/e11y_dashboard.json +81 -0
- data/lib/generators/e11y/install/install_generator.rb +34 -0
- data/lib/generators/e11y/install/templates/e11y.rb +239 -0
- data/lib/generators/e11y/prometheus_alerts/prometheus_alerts_generator.rb +29 -0
- data/lib/generators/e11y/prometheus_alerts/templates/e11y_alerts.yml +28 -0
- data/lib/tasks/e11y_docs.rake +30 -0
- data/lib/tasks/e11y_events.rake +71 -0
- data/lib/tasks/e11y_lint.rake +91 -0
- data/lib/tasks/e11y_slo.rake +29 -0
- metadata +186 -39
- data/docs/ADR-003-slo-observability.md +0 -3337
- data/docs/ADR-010-developer-experience.md +0 -2166
- data/docs/API-REFERENCE-L28.md +0 -914
- data/docs/COMPREHENSIVE-CONFIGURATION.md +0 -2366
- data/docs/CONTRIBUTING.md +0 -312
- data/docs/IMPLEMENTATION_NOTES.md +0 -2804
- data/docs/IMPLEMENTATION_PLAN.md +0 -1971
- data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +0 -586
- data/docs/PLAN.md +0 -148
- data/docs/README.md +0 -296
- data/docs/design/00-memory-optimization.md +0 -593
- data/docs/guides/MIGRATION-L27-L28.md +0 -692
- data/docs/guides/PERFORMANCE-BENCHMARKS.md +0 -434
- data/docs/guides/README.md +0 -44
- data/docs/use_cases/UC-003-pattern-based-metrics.md +0 -1627
- data/lib/e11y/adapters/registry.rb +0 -141
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
require "rack/request"
|
|
4
4
|
require "securerandom"
|
|
5
|
+
require "e11y/tracing/propagator"
|
|
6
|
+
require "e11y/trace_context/sampler"
|
|
5
7
|
|
|
6
8
|
module E11y
|
|
7
9
|
module Middleware
|
|
@@ -32,13 +34,14 @@ module E11y
|
|
|
32
34
|
# Process request
|
|
33
35
|
# @param env [Hash] Rack environment
|
|
34
36
|
# @return [Array] Rack response [status, headers, body]
|
|
35
|
-
# rubocop:disable Metrics/AbcSize, Metrics/
|
|
37
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
36
38
|
# Rack middleware request processing requires sequential setup of tracing, context, buffer, and SLO tracking
|
|
37
39
|
def call(env)
|
|
38
40
|
request = Rack::Request.new(env)
|
|
39
41
|
|
|
40
|
-
# Extract or generate trace_id
|
|
41
|
-
|
|
42
|
+
# Extract or generate trace context (trace_id, sampled from traceparent)
|
|
43
|
+
trace_ctx = extract_trace_context(request)
|
|
44
|
+
trace_id = trace_ctx[:trace_id] || generate_trace_id
|
|
42
45
|
span_id = generate_span_id
|
|
43
46
|
|
|
44
47
|
# Set request context (ActiveSupport::CurrentAttributes)
|
|
@@ -50,9 +53,10 @@ module E11y
|
|
|
50
53
|
E11y::Current.user_agent = request.user_agent
|
|
51
54
|
E11y::Current.request_method = request.request_method
|
|
52
55
|
E11y::Current.request_path = request.path
|
|
56
|
+
E11y::Current.sampled = resolve_sampled(trace_ctx)
|
|
53
57
|
|
|
54
58
|
# Start request-scoped buffer (for debug events)
|
|
55
|
-
E11y::Buffers::
|
|
59
|
+
E11y::Buffers::EphemeralBuffer.initialize! if E11y.config.ephemeral_buffer_enabled
|
|
56
60
|
|
|
57
61
|
# Track request start time for SLO
|
|
58
62
|
start_time = Time.now
|
|
@@ -60,6 +64,9 @@ module E11y
|
|
|
60
64
|
# Call next middleware/app
|
|
61
65
|
status, headers, body = @app.call(env)
|
|
62
66
|
|
|
67
|
+
# Flush buffer if status matches configured flush_on_statuses (default: 5xx only)
|
|
68
|
+
E11y::Buffers::EphemeralBuffer.flush_on_error if should_flush_buffer?(status)
|
|
69
|
+
|
|
63
70
|
# Track SLO metrics (if enabled)
|
|
64
71
|
track_http_request_slo(env, status, start_time)
|
|
65
72
|
|
|
@@ -70,38 +77,80 @@ module E11y
|
|
|
70
77
|
[status, headers, body]
|
|
71
78
|
rescue StandardError
|
|
72
79
|
# Flush request buffer on error (includes debug events)
|
|
73
|
-
E11y::Buffers::
|
|
80
|
+
E11y::Buffers::EphemeralBuffer.flush_on_error if E11y.config.ephemeral_buffer_enabled
|
|
74
81
|
|
|
75
82
|
raise # Re-raise original exception
|
|
76
83
|
ensure
|
|
77
84
|
# Discard request buffer on success (not on error, already flushed above)
|
|
78
85
|
# We need to check if we're here from normal completion or exception
|
|
79
86
|
# If there was an exception, buffer was already flushed in rescue block
|
|
80
|
-
if !$ERROR_INFO && E11y.config.
|
|
81
|
-
E11y::Buffers::RequestScopedBuffer.discard
|
|
82
|
-
end
|
|
87
|
+
E11y::Buffers::EphemeralBuffer.discard if !$ERROR_INFO && E11y.config.ephemeral_buffer_enabled # No exception occurred
|
|
83
88
|
|
|
84
89
|
# Reset context
|
|
85
90
|
E11y::Current.reset
|
|
86
91
|
end
|
|
87
|
-
# rubocop:enable Metrics/AbcSize, Metrics/
|
|
92
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
|
88
93
|
|
|
89
94
|
private
|
|
90
95
|
|
|
91
|
-
#
|
|
96
|
+
# Determine whether the request-scoped buffer should be flushed for this status code.
|
|
97
|
+
#
|
|
98
|
+
# Two independent conditions (either is sufficient):
|
|
99
|
+
# - +flush_on_error+ (default: true) — flushes on any 5xx server error
|
|
100
|
+
# - +flush_on_statuses+ (default: []) — extra status codes/ranges, e.g. [403]
|
|
101
|
+
#
|
|
102
|
+
# @example Default behaviour — flush on 5xx only
|
|
103
|
+
# config.ephemeral_buffer_flush_on_error = true # default
|
|
104
|
+
# config.ephemeral_buffer_flush_on_statuses = [] # default
|
|
105
|
+
#
|
|
106
|
+
# @example Flush on 403 in addition to 5xx
|
|
107
|
+
# config.ephemeral_buffer_flush_on_statuses = [403]
|
|
108
|
+
#
|
|
109
|
+
# @example Flush only on explicit statuses (disable 5xx default)
|
|
110
|
+
# config.ephemeral_buffer_flush_on_error = false
|
|
111
|
+
# config.ephemeral_buffer_flush_on_statuses = [403, 422]
|
|
112
|
+
#
|
|
113
|
+
# @param status [Integer] HTTP response status code
|
|
114
|
+
# @return [Boolean]
|
|
115
|
+
def should_flush_buffer?(status)
|
|
116
|
+
return false unless E11y.config.ephemeral_buffer_enabled
|
|
117
|
+
|
|
118
|
+
# Condition 1: server error flush (5xx)
|
|
119
|
+
return true if E11y.config.ephemeral_buffer_flush_on_error && status >= 500
|
|
120
|
+
|
|
121
|
+
# Condition 2: explicit extra statuses
|
|
122
|
+
extra = E11y.config.ephemeral_buffer_flush_on_statuses
|
|
123
|
+
extra&.any? { |s| s === status } || false # rubocop:disable Style/CaseEquality
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Extract trace context from request headers (W3C Trace Context or custom).
|
|
127
|
+
# Also extracts tracestate into E11y::Current.baggage (F-014).
|
|
92
128
|
# @param request [Rack::Request] Rack request
|
|
93
|
-
# @return [
|
|
94
|
-
def
|
|
95
|
-
# W3C Trace Context (traceparent header)
|
|
96
|
-
# Format: version-trace_id-span_id-flags
|
|
97
|
-
# Example: 00-0af7651916cd43dd8448eb211c80319c-00f067aa0ba902b7-01
|
|
129
|
+
# @return [Hash] { trace_id:, sampled: (from traceparent, or nil if new trace) }
|
|
130
|
+
def extract_trace_context(request)
|
|
98
131
|
traceparent = request.get_header("HTTP_TRACEPARENT")
|
|
99
|
-
|
|
132
|
+
tracestate = request.get_header("HTTP_TRACESTATE")
|
|
133
|
+
|
|
134
|
+
if tracestate && E11y::Current.respond_to?(:baggage=)
|
|
135
|
+
baggage = E11y::Tracing::Propagator.parse_tracestate(tracestate)
|
|
136
|
+
E11y::Current.baggage = baggage if baggage.any?
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
if traceparent
|
|
140
|
+
parsed = E11y::Tracing::Propagator.parse(traceparent)
|
|
141
|
+
return { trace_id: parsed[:trace_id], sampled: parsed[:sampled] } if parsed
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
trace_id = request.get_header("HTTP_X_REQUEST_ID") || request.get_header("HTTP_X_TRACE_ID")
|
|
145
|
+
{ trace_id: trace_id, sampled: nil }
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Resolve sampling decision: from parent (traceparent) or Sampler for new trace.
|
|
149
|
+
# Context for Sampler = E11y::Current.to_context (already set above).
|
|
150
|
+
def resolve_sampled(trace_ctx)
|
|
151
|
+
return trace_ctx[:sampled] if trace_ctx.key?(:sampled) && !trace_ctx[:sampled].nil?
|
|
100
152
|
|
|
101
|
-
|
|
102
|
-
request.get_header("HTTP_X_REQUEST_ID") ||
|
|
103
|
-
# X-Trace-Id (custom)
|
|
104
|
-
request.get_header("HTTP_X_TRACE_ID")
|
|
153
|
+
E11y::TraceContext::Sampler.should_sample?(E11y::Current.to_context)
|
|
105
154
|
end
|
|
106
155
|
|
|
107
156
|
# Extract request_id from Rack env
|
|
@@ -141,10 +190,9 @@ module E11y
|
|
|
141
190
|
# @param start_time [Time] Request start time
|
|
142
191
|
# @return [void]
|
|
143
192
|
# @api private
|
|
144
|
-
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
|
145
193
|
# SLO tracking requires extracting controller/action, calculating duration, and error handling
|
|
146
194
|
def track_http_request_slo(env, status, start_time)
|
|
147
|
-
return unless E11y.config.
|
|
195
|
+
return unless E11y.config.respond_to?(:slo_tracking_enabled) && E11y.config.slo_tracking_enabled
|
|
148
196
|
|
|
149
197
|
duration_ms = ((Time.now - start_time) * 1000).round(2)
|
|
150
198
|
|
|
@@ -163,7 +211,6 @@ module E11y
|
|
|
163
211
|
# Don't fail if SLO tracking fails
|
|
164
212
|
warn "[E11y] SLO tracking error: #{e.message}"
|
|
165
213
|
end
|
|
166
|
-
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
|
167
214
|
end
|
|
168
215
|
end
|
|
169
216
|
end
|
|
@@ -40,13 +40,8 @@ module E11y
|
|
|
40
40
|
# # Rule: ->(e) { :audit_encrypted if e[:audit_event] }
|
|
41
41
|
# # Routes to: [:audit_encrypted]
|
|
42
42
|
#
|
|
43
|
-
#
|
|
44
|
-
#
|
|
45
|
-
# event_name: 'order.placed',
|
|
46
|
-
# retention_until: '2026-04-21T...' # 90 days
|
|
47
|
-
# }
|
|
48
|
-
# # Rule: ->(e) { days > 30 ? :s3_standard : :loki }
|
|
49
|
-
# # Routes to: [:s3_standard]
|
|
43
|
+
# Note: retention_until is for archival jobs (run separately), not for routing.
|
|
44
|
+
# Archival happens later — cron/Loki compaction filters by retention_until.
|
|
50
45
|
class Routing < Base
|
|
51
46
|
middleware_zone :adapters
|
|
52
47
|
|
|
@@ -58,10 +53,23 @@ module E11y
|
|
|
58
53
|
# @option event_data [Boolean] :audit_event Audit event flag (optional, for routing rules)
|
|
59
54
|
# @option event_data [Symbol] :severity Event severity (optional, for routing rules)
|
|
60
55
|
# @return [Hash, nil] Event data (passed to next middleware), or nil if dropped
|
|
61
|
-
# rubocop:disable Metrics/
|
|
56
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
|
62
57
|
# Routing logic requires adapter selection, iteration with error handling,
|
|
63
58
|
# metadata enrichment, and metrics tracking
|
|
64
59
|
def call(event_data)
|
|
60
|
+
# Handle nil from upstream middleware (e.g., rate limiting, sampling)
|
|
61
|
+
return nil unless event_data
|
|
62
|
+
|
|
63
|
+
# 0. Request-scoped buffer: buffer debug events instead of writing when enabled
|
|
64
|
+
# Skip when event is from a flush (avoid re-buffering)
|
|
65
|
+
if !event_data[:from_ephemeral_buffer_flush] &&
|
|
66
|
+
event_data[:severity] == :debug &&
|
|
67
|
+
E11y.config.ephemeral_buffer_enabled &&
|
|
68
|
+
E11y::Buffers::EphemeralBuffer.active? && E11y::Buffers::EphemeralBuffer.add_event(event_data)
|
|
69
|
+
# Buffered — skip adapter writes, pass through
|
|
70
|
+
return @app&.call(event_data)
|
|
71
|
+
end
|
|
72
|
+
|
|
65
73
|
# 1. Determine target adapters (explicit or via routing rules)
|
|
66
74
|
target_adapters = if event_data[:adapters]&.any?
|
|
67
75
|
# Explicit adapters bypass routing rules
|
|
@@ -71,18 +79,28 @@ module E11y
|
|
|
71
79
|
apply_routing_rules(event_data)
|
|
72
80
|
end
|
|
73
81
|
|
|
82
|
+
# 1.5. Validate audit events have proper routing (UC-012 compliance requirement)
|
|
83
|
+
validate_audit_routing!(event_data, target_adapters)
|
|
84
|
+
|
|
74
85
|
# 2. Write to selected adapters
|
|
75
86
|
target_adapters.each do |adapter_name|
|
|
76
87
|
adapter = E11y.configuration.adapters[adapter_name]
|
|
77
88
|
next unless adapter
|
|
78
89
|
|
|
90
|
+
# Per-adapter payload: merge payload_rewrites only when present (explicit_pii exclude_adapters)
|
|
91
|
+
data_to_write = if event_data[:payload_rewrites] && event_data[:payload_rewrites][adapter_name]
|
|
92
|
+
payload = event_data[:payload]&.dup || {}
|
|
93
|
+
payload.merge!(event_data[:payload_rewrites][adapter_name])
|
|
94
|
+
event_data.merge(payload: payload)
|
|
95
|
+
else
|
|
96
|
+
event_data
|
|
97
|
+
end
|
|
98
|
+
|
|
79
99
|
begin
|
|
80
|
-
adapter.write(
|
|
81
|
-
increment_metric("e11y.middleware.routing.write_success", adapter: adapter_name)
|
|
100
|
+
adapter.write(data_to_write)
|
|
82
101
|
rescue StandardError => e
|
|
83
102
|
# Log routing error but don't fail pipeline
|
|
84
103
|
warn "E11y routing error for adapter #{adapter_name}: #{e.message}"
|
|
85
|
-
increment_metric("e11y.middleware.routing.write_error", adapter: adapter_name)
|
|
86
104
|
end
|
|
87
105
|
end
|
|
88
106
|
|
|
@@ -94,9 +112,9 @@ module E11y
|
|
|
94
112
|
}
|
|
95
113
|
|
|
96
114
|
# 4. Increment metrics
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
115
|
+
E11y::Metrics.increment("e11y.middleware.routing.routed",
|
|
116
|
+
adapters_count: target_adapters.size,
|
|
117
|
+
routing_type: event_data[:routing][:routing_type])
|
|
100
118
|
|
|
101
119
|
# 5. Log routing decision (for debugging)
|
|
102
120
|
log_routing_decision(event_data, target_adapters) if debug_enabled?
|
|
@@ -104,7 +122,7 @@ module E11y
|
|
|
104
122
|
# 6. Pass to next app (if any)
|
|
105
123
|
@app&.call(event_data)
|
|
106
124
|
end
|
|
107
|
-
# rubocop:enable Metrics/
|
|
125
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
|
108
126
|
|
|
109
127
|
private
|
|
110
128
|
|
|
@@ -124,12 +142,12 @@ module E11y
|
|
|
124
142
|
# ->(event) { :audit_encrypted if event[:audit_event] },
|
|
125
143
|
# ->(event) {
|
|
126
144
|
# days = (Time.parse(event[:retention_until]) - Time.now) / 86400
|
|
127
|
-
# days > 90 ? :
|
|
145
|
+
# days > 90 ? :archive : :loki
|
|
128
146
|
# }
|
|
129
147
|
# ]
|
|
130
148
|
#
|
|
131
149
|
# apply_routing_rules(event_data)
|
|
132
|
-
# # => [:audit_encrypted] or [:loki] or [:
|
|
150
|
+
# # => [:audit_encrypted] or [:loki] or [:archive]
|
|
133
151
|
def apply_routing_rules(event_data)
|
|
134
152
|
matched_adapters = []
|
|
135
153
|
|
|
@@ -143,10 +161,12 @@ module E11y
|
|
|
143
161
|
warn "E11y routing rule error: #{e.message}"
|
|
144
162
|
end
|
|
145
163
|
|
|
146
|
-
#
|
|
164
|
+
# Track whether fallback was used (for audit validation)
|
|
147
165
|
if matched_adapters.any?
|
|
166
|
+
event_data[:routing_used_fallback] = false
|
|
148
167
|
matched_adapters.uniq
|
|
149
168
|
else
|
|
169
|
+
event_data[:routing_used_fallback] = true
|
|
150
170
|
E11y.configuration.fallback_adapters || [:stdout]
|
|
151
171
|
end
|
|
152
172
|
end
|
|
@@ -175,9 +195,46 @@ module E11y
|
|
|
175
195
|
# @param metric_name [String] Metric name
|
|
176
196
|
# @param tags [Hash] Metric tags
|
|
177
197
|
# @return [void]
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
198
|
+
# Validate audit events have proper routing configuration.
|
|
199
|
+
#
|
|
200
|
+
# Audit events MUST be routed via explicit adapters OR routing rules.
|
|
201
|
+
# Relying on fallback routing (no rule matched) is a compliance configuration error.
|
|
202
|
+
#
|
|
203
|
+
# @param event_data [Hash] Event data
|
|
204
|
+
# @param target_adapters [Array<Symbol>] Target adapters
|
|
205
|
+
# @raise [E11y::Error] if audit event misconfigured
|
|
206
|
+
# @return [void]
|
|
207
|
+
def validate_audit_routing!(event_data, target_adapters)
|
|
208
|
+
return unless event_data[:audit_event]
|
|
209
|
+
|
|
210
|
+
# Audit events are valid if:
|
|
211
|
+
# 1. They have explicit adapters (non-empty), OR
|
|
212
|
+
# 2. They matched a routing rule (routing_used_fallback = false)
|
|
213
|
+
|
|
214
|
+
has_explicit_adapters = event_data[:adapters]&.any?
|
|
215
|
+
return if has_explicit_adapters # Explicit adapters → valid
|
|
216
|
+
|
|
217
|
+
# Check if fallback was used (set by apply_routing_rules)
|
|
218
|
+
used_fallback = event_data[:routing_used_fallback]
|
|
219
|
+
return unless used_fallback
|
|
220
|
+
|
|
221
|
+
# CRITICAL: Audit event using fallback routing (no rule matched!)
|
|
222
|
+
error_message = <<~ERROR
|
|
223
|
+
[E11y] CRITICAL: Audit event has no routing configuration!
|
|
224
|
+
|
|
225
|
+
Event: #{event_data[:event_name]}
|
|
226
|
+
Routed to: #{target_adapters.inspect} (fallback adapters)
|
|
227
|
+
|
|
228
|
+
Audit events MUST be explicitly routed to compliance-grade storage.
|
|
229
|
+
|
|
230
|
+
Fix options:
|
|
231
|
+
1. Add explicit adapters: `adapters :audit_encrypted`
|
|
232
|
+
2. Configure routing rule: `config.routing_rules = [->(e) { :audit_encrypted if e[:audit_event] }]`
|
|
233
|
+
|
|
234
|
+
See UC-012 Audit Trail documentation for details.
|
|
235
|
+
ERROR
|
|
236
|
+
|
|
237
|
+
raise E11y::Error, error_message
|
|
181
238
|
end
|
|
182
239
|
end
|
|
183
240
|
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "e11y/middleware/base"
|
|
4
|
+
require "e11y/sampling/stratified_tracker"
|
|
4
5
|
|
|
5
6
|
module E11y
|
|
6
7
|
module Middleware
|
|
@@ -51,6 +52,8 @@ module E11y
|
|
|
51
52
|
# }
|
|
52
53
|
# }
|
|
53
54
|
# end
|
|
55
|
+
# rubocop:disable Metrics/ClassLength
|
|
56
|
+
# Class has 6 adaptive sampling strategies each requiring dedicated setup + private methods
|
|
54
57
|
class Sampling < Base
|
|
55
58
|
middleware_zone :routing
|
|
56
59
|
|
|
@@ -79,6 +82,9 @@ module E11y
|
|
|
79
82
|
# @param event_data [Hash] The event payload
|
|
80
83
|
# @return [Hash, nil] The event payload if sampled, nil if dropped
|
|
81
84
|
def call(event_data)
|
|
85
|
+
# Handle nil from upstream middleware (e.g., rate limiting)
|
|
86
|
+
return nil unless event_data
|
|
87
|
+
|
|
82
88
|
event_class = event_data[:event_class]
|
|
83
89
|
|
|
84
90
|
# Track errors for error-based adaptive sampling (FEAT-4838)
|
|
@@ -87,13 +93,34 @@ module E11y
|
|
|
87
93
|
# Track events for load-based adaptive sampling (FEAT-4842)
|
|
88
94
|
@load_monitor&.record_event
|
|
89
95
|
|
|
96
|
+
# C11: Get sample rate and severity before decision (for StratifiedTracker)
|
|
97
|
+
sample_rate = determine_sample_rate(event_class, event_data)
|
|
98
|
+
severity = event_data[:severity] || (event_class.respond_to?(:severity) ? event_class.severity : :info)
|
|
99
|
+
|
|
90
100
|
# Determine if event should be sampled
|
|
91
101
|
# Drop event if not sampled
|
|
92
|
-
|
|
102
|
+
unless should_sample?(event_data, event_class)
|
|
103
|
+
# C11: Record dropped event to StratifiedTracker for sampling correction
|
|
104
|
+
E11y::Sampling.stratified_tracker.record_sample(severity: severity, sample_rate: sample_rate, sampled: false)
|
|
105
|
+
begin
|
|
106
|
+
if defined?(E11y::Metrics) && E11y::Metrics.respond_to?(:increment)
|
|
107
|
+
E11y::Metrics.increment(:e11y_events_dropped_total, {
|
|
108
|
+
reason: "sampled_out",
|
|
109
|
+
event_type: event_data[:event_name].to_s
|
|
110
|
+
})
|
|
111
|
+
end
|
|
112
|
+
rescue StandardError
|
|
113
|
+
# non-fatal
|
|
114
|
+
end
|
|
115
|
+
return nil
|
|
116
|
+
end
|
|
93
117
|
|
|
94
118
|
# Mark as sampled for downstream middleware
|
|
95
119
|
event_data[:sampled] = true
|
|
96
|
-
event_data[:sample_rate] =
|
|
120
|
+
event_data[:sample_rate] = sample_rate
|
|
121
|
+
|
|
122
|
+
# C11: Record sampled event to StratifiedTracker for sampling correction
|
|
123
|
+
E11y::Sampling.stratified_tracker.record_sample(severity: severity, sample_rate: sample_rate, sampled: true)
|
|
97
124
|
|
|
98
125
|
# Pass to next middleware
|
|
99
126
|
@app.call(event_data)
|
|
@@ -121,6 +148,7 @@ module E11y
|
|
|
121
148
|
@default_sample_rate = config.fetch(:default_sample_rate, 1.0)
|
|
122
149
|
@trace_aware = config.fetch(:trace_aware, true)
|
|
123
150
|
@severity_rates = config.fetch(:severity_rates, {})
|
|
151
|
+
@pattern_rates = config.fetch(:pattern_rates, []) # [[Regexp, Float], ...]
|
|
124
152
|
@trace_decisions = {} # Cache for trace-level sampling decisions
|
|
125
153
|
@trace_decisions_mutex = Mutex.new
|
|
126
154
|
end
|
|
@@ -158,8 +186,10 @@ module E11y
|
|
|
158
186
|
# 1. Check if audit event (never sample audit events!)
|
|
159
187
|
return true if event_class.respond_to?(:audit_event?) && event_class.audit_event?
|
|
160
188
|
|
|
161
|
-
# 2.
|
|
189
|
+
# 2. Trace-consistent sampling (ADR-005 §7): prefer E11y::Current.sampled when trace_aware
|
|
162
190
|
if @trace_aware && event_data[:trace_id]
|
|
191
|
+
return E11y::Current.sampled if E11y::Current.respond_to?(:sampled) && !E11y::Current.sampled.nil?
|
|
192
|
+
|
|
163
193
|
return trace_sampling_decision(event_data[:trace_id], event_class, event_data)
|
|
164
194
|
end
|
|
165
195
|
|
|
@@ -183,22 +213,32 @@ module E11y
|
|
|
183
213
|
# @param event_class [Class] The event class
|
|
184
214
|
# @param event_data [Hash] Event payload (for value-based sampling)
|
|
185
215
|
# @return [Float] Sample rate (0.0-1.0)
|
|
186
|
-
# rubocop:disable Metrics/
|
|
187
|
-
# Sample rate determination follows priority chain:
|
|
188
|
-
#
|
|
216
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
217
|
+
# Sample rate determination follows a 6-step priority chain:
|
|
218
|
+
# error spike (0) → pattern-based (0.5) → value-based (1) →
|
|
219
|
+
# load-based (2) → severity (3) → event-level (4) → default (5)
|
|
189
220
|
def determine_sample_rate(event_class, event_data = nil)
|
|
190
221
|
# 0. Error-based adaptive sampling (FEAT-4838) - highest priority!
|
|
191
222
|
if @error_based_adaptive && @error_spike_detector&.error_spike?
|
|
192
223
|
return 1.0 # 100% sampling during error spike
|
|
193
224
|
end
|
|
194
225
|
|
|
226
|
+
# 0.5. Pattern-based sampling (by event_name) - overrides event-level config
|
|
227
|
+
if event_data && !@pattern_rates.empty?
|
|
228
|
+
event_name = event_data[:event_name].to_s
|
|
229
|
+
@pattern_rates.each do |pattern, rate|
|
|
230
|
+
return rate if pattern.match?(event_name)
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
195
234
|
# 1. Value-based sampling (FEAT-4849) - high-value events always sampled
|
|
196
235
|
if event_data && event_class.respond_to?(:value_sampling_configs)
|
|
197
236
|
configs = event_class.value_sampling_configs
|
|
198
237
|
unless configs.empty?
|
|
199
238
|
require "e11y/sampling/value_extractor"
|
|
200
239
|
extractor = E11y::Sampling::ValueExtractor.new
|
|
201
|
-
|
|
240
|
+
payload = event_data[:payload] || event_data
|
|
241
|
+
if configs.any? { |config| config.matches?(payload, extractor) }
|
|
202
242
|
return 1.0 # 100% sampling for high-value events
|
|
203
243
|
end
|
|
204
244
|
end
|
|
@@ -228,7 +268,7 @@ module E11y
|
|
|
228
268
|
# 4. Default/load-based rate
|
|
229
269
|
base_rate
|
|
230
270
|
end
|
|
231
|
-
# rubocop:enable Metrics/
|
|
271
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
232
272
|
|
|
233
273
|
# Trace-aware sampling decision (C05 Resolution)
|
|
234
274
|
#
|
|
@@ -241,15 +281,21 @@ module E11y
|
|
|
241
281
|
# @return [Boolean] true if trace should be sampled
|
|
242
282
|
def trace_sampling_decision(trace_id, event_class, event_data = nil)
|
|
243
283
|
@trace_decisions_mutex.synchronize do
|
|
284
|
+
# Use monotonic clock (Float) to avoid Time object allocation — prevents memory leak in hot path
|
|
285
|
+
now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
286
|
+
|
|
244
287
|
# Check if decision already made for this trace
|
|
245
|
-
|
|
288
|
+
if (entry = @trace_decisions[trace_id])
|
|
289
|
+
entry[:last_access] = now # LRU touch
|
|
290
|
+
return entry[:decision]
|
|
291
|
+
end
|
|
246
292
|
|
|
247
293
|
# Make new sampling decision
|
|
248
294
|
sample_rate = determine_sample_rate(event_class, event_data)
|
|
249
295
|
decision = rand < sample_rate
|
|
250
296
|
|
|
251
|
-
# Cache decision (
|
|
252
|
-
@trace_decisions[trace_id] = decision
|
|
297
|
+
# Cache decision with LRU metadata (evict oldest on cleanup)
|
|
298
|
+
@trace_decisions[trace_id] = { decision: decision, last_access: now }
|
|
253
299
|
|
|
254
300
|
# Cleanup old decisions periodically (every 1000 traces)
|
|
255
301
|
cleanup_trace_decisions if @trace_decisions.size > 1000
|
|
@@ -260,14 +306,17 @@ module E11y
|
|
|
260
306
|
|
|
261
307
|
# Cleanup old trace decisions to prevent memory leaks
|
|
262
308
|
#
|
|
263
|
-
#
|
|
264
|
-
#
|
|
265
|
-
# so old decisions are likely stale.
|
|
309
|
+
# Evicts oldest 50% by last_access (LRU). Active traces stay in cache
|
|
310
|
+
# because they are touched on each lookup, preserving trace-level consistency.
|
|
266
311
|
def cleanup_trace_decisions
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
312
|
+
return if @trace_decisions.size <= 100
|
|
313
|
+
|
|
314
|
+
size_to_remove = @trace_decisions.size / 2
|
|
315
|
+
sorted = @trace_decisions.to_a.sort_by { |_, v| v[:last_access] }
|
|
316
|
+
keys_to_remove = sorted.first(size_to_remove).map(&:first)
|
|
317
|
+
keys_to_remove.each { |k| @trace_decisions.delete(k) }
|
|
270
318
|
end
|
|
271
319
|
end
|
|
320
|
+
# rubocop:enable Metrics/ClassLength
|
|
272
321
|
end
|
|
273
322
|
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "e11y/middleware/base"
|
|
4
|
+
require "e11y/slo/config_loader"
|
|
5
|
+
|
|
6
|
+
module E11y
|
|
7
|
+
module Middleware
|
|
8
|
+
# SelfMonitoringEmit middleware — emits e11y_events_tracked_total at pipeline end.
|
|
9
|
+
#
|
|
10
|
+
# When e11y_self_monitoring.enabled is true in slo.yml, increments the counter
|
|
11
|
+
# for each event that reaches the end of the pipeline (after EventSlo).
|
|
12
|
+
#
|
|
13
|
+
# **Middleware Zone:** `:post_processing` (last in pipeline)
|
|
14
|
+
#
|
|
15
|
+
# @example slo.yml
|
|
16
|
+
# e11y_self_monitoring:
|
|
17
|
+
# enabled: true
|
|
18
|
+
# targets:
|
|
19
|
+
# reliability: 0.999
|
|
20
|
+
#
|
|
21
|
+
# @see docs/plans/2026-03-13-slo-linters-self-monitoring-plan.md
|
|
22
|
+
class SelfMonitoringEmit < Base
|
|
23
|
+
middleware_zone :post_processing
|
|
24
|
+
|
|
25
|
+
# Process event and optionally emit self-monitoring metric.
|
|
26
|
+
#
|
|
27
|
+
# @param event_data [Hash, nil] Event payload (nil passes through)
|
|
28
|
+
# @return [Hash, nil] Unchanged event_data (passthrough)
|
|
29
|
+
def call(event_data)
|
|
30
|
+
if event_data && E11y::SLO::ConfigLoader.self_monitoring_enabled?
|
|
31
|
+
event_name = event_data[:event_name].to_s.presence || "unknown"
|
|
32
|
+
E11y::Metrics.increment(:e11y_events_tracked_total, result: "success", event_name: event_name)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
@app&.call(event_data) || event_data
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -56,19 +56,19 @@ module E11y
|
|
|
56
56
|
def call(event_data)
|
|
57
57
|
enrich_trace_context(event_data)
|
|
58
58
|
enrich_service_context(event_data)
|
|
59
|
-
|
|
59
|
+
E11y::Metrics.increment("e11y.middleware.trace_context.processed")
|
|
60
60
|
@app.call(event_data)
|
|
61
61
|
end
|
|
62
62
|
|
|
63
63
|
private
|
|
64
64
|
|
|
65
|
-
# rubocop:disable Metrics/AbcSize
|
|
65
|
+
# rubocop:disable Metrics/AbcSize
|
|
66
66
|
# Add distributed tracing fields to event data
|
|
67
67
|
# @param event_data [Hash] Event data to enrich
|
|
68
68
|
# @return [void]
|
|
69
69
|
def enrich_trace_context(event_data)
|
|
70
70
|
event_data[:trace_id] ||= current_trace_id || generate_trace_id
|
|
71
|
-
event_data[:span_id] ||= generate_span_id
|
|
71
|
+
event_data[:span_id] ||= current_span_id || generate_span_id
|
|
72
72
|
event_data[:parent_trace_id] ||= current_parent_trace_id if current_parent_trace_id
|
|
73
73
|
|
|
74
74
|
# Format timestamp if it's a Time object
|
|
@@ -93,7 +93,7 @@ module E11y
|
|
|
93
93
|
|
|
94
94
|
event_data[:audit_event] = event_class.audit_event?
|
|
95
95
|
end
|
|
96
|
-
# rubocop:enable Metrics/AbcSize
|
|
96
|
+
# rubocop:enable Metrics/AbcSize
|
|
97
97
|
|
|
98
98
|
# Add service context fields to event data
|
|
99
99
|
# @param event_data [Hash] Event data to enrich
|
|
@@ -103,15 +103,54 @@ module E11y
|
|
|
103
103
|
event_data[:environment] ||= E11y.config.environment
|
|
104
104
|
end
|
|
105
105
|
|
|
106
|
-
# Get current trace ID from
|
|
106
|
+
# Get current trace ID from configured source (ADR-007 §8).
|
|
107
107
|
#
|
|
108
|
-
#
|
|
108
|
+
# When config.tracing_source is :opentelemetry and OTel SDK has an active span,
|
|
109
|
+
# uses trace_id from OpenTelemetry::Trace.current_span.
|
|
110
|
+
# Otherwise: E11y::Current > Thread.current
|
|
109
111
|
#
|
|
110
112
|
# @return [String, nil] Current trace ID if set, nil otherwise
|
|
111
113
|
def current_trace_id
|
|
114
|
+
if tracing_source_opentelemetry?
|
|
115
|
+
otel = otel_trace_context
|
|
116
|
+
return otel[:trace_id] if otel[:trace_id]
|
|
117
|
+
end
|
|
112
118
|
E11y::Current.trace_id || Thread.current[:e11y_trace_id]
|
|
113
119
|
end
|
|
114
120
|
|
|
121
|
+
# Get current span ID (for event correlation).
|
|
122
|
+
# When using OTel source and span exists, returns OTel span_id; otherwise nil (caller generates).
|
|
123
|
+
#
|
|
124
|
+
# @return [String, nil]
|
|
125
|
+
def current_span_id
|
|
126
|
+
return nil unless tracing_source_opentelemetry?
|
|
127
|
+
|
|
128
|
+
otel = otel_trace_context
|
|
129
|
+
otel[:span_id]
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def tracing_source_opentelemetry?
|
|
133
|
+
E11y.config&.tracing_source == :opentelemetry
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def otel_trace_context
|
|
137
|
+
return {} unless defined?(OpenTelemetry::Trace)
|
|
138
|
+
|
|
139
|
+
span = OpenTelemetry::Trace.current_span
|
|
140
|
+
ctx = span.context
|
|
141
|
+
return {} unless ctx.respond_to?(:valid?) && ctx.valid?
|
|
142
|
+
|
|
143
|
+
trace_id = ctx.respond_to?(:hex_trace_id) ? ctx.hex_trace_id : nil
|
|
144
|
+
span_id = ctx.respond_to?(:hex_span_id) ? ctx.hex_span_id : nil
|
|
145
|
+
return {} if trace_id.to_s.empty?
|
|
146
|
+
|
|
147
|
+
# Sync to E11y::Current so downstream uses same context
|
|
148
|
+
E11y::Current.trace_id = trace_id
|
|
149
|
+
E11y::Current.span_id = span_id
|
|
150
|
+
|
|
151
|
+
{ trace_id: trace_id, span_id: span_id }
|
|
152
|
+
end
|
|
153
|
+
|
|
115
154
|
# Get current parent trace ID from E11y::Current (background job context).
|
|
116
155
|
#
|
|
117
156
|
# Only set for background jobs that have a parent request trace.
|
|
@@ -151,10 +190,6 @@ module E11y
|
|
|
151
190
|
#
|
|
152
191
|
# @param metric_name [String] Metric name
|
|
153
192
|
# @return [void]
|
|
154
|
-
def increment_metric(_metric_name)
|
|
155
|
-
# TODO: Integrate with Yabeda/Prometheus in Phase 2
|
|
156
|
-
# Yabeda.e11y.middleware_trace_context_processed.increment
|
|
157
|
-
end
|
|
158
193
|
end
|
|
159
194
|
end
|
|
160
195
|
end
|