e11y 0.2.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +130 -10
- data/CHANGELOG.md +80 -1
- data/CLAUDE.md +168 -0
- data/CONTRIBUTING.md +640 -0
- data/README.md +165 -701
- data/RELEASE.md +41 -12
- data/Rakefile +249 -57
- data/config/README.md +1 -1
- data/config/loki-local-config.yaml +12 -0
- data/config/otel-collector-config.yaml +44 -0
- data/cucumber.yml +1 -0
- data/docker-compose.yml +18 -2
- data/docs/ADAPTERS.md +76 -0
- data/docs/ADAPTIVE_SAMPLING.md +59 -0
- data/docs/COMPARISON.md +104 -0
- data/docs/CONFIGURATION.md +52 -0
- data/docs/DISTRIBUTED_TRACING.md +44 -0
- data/docs/LIMITATIONS.md +13 -0
- data/docs/METRICS_DSL.md +84 -0
- data/docs/PERFORMANCE.md +60 -0
- data/docs/PII_FILTERING.md +40 -0
- data/docs/PRESETS.md +65 -0
- data/docs/QUICK-START.md +546 -587
- data/docs/RAILS_INTEGRATION.md +79 -0
- data/docs/SCHEMA_VALIDATION.md +63 -0
- data/docs/SLO-PROMQL-ALERTS.md +161 -0
- data/docs/TESTING.md +69 -0
- data/docs/{ADR-001-architecture.md → architecture/ADR-001-architecture.md} +36 -65
- data/docs/{ADR-002-metrics-yabeda.md → architecture/ADR-002-metrics-yabeda.md} +62 -236
- data/docs/architecture/ADR-003-slo-observability.md +1402 -0
- data/docs/{ADR-004-adapter-architecture.md → architecture/ADR-004-adapter-architecture.md} +163 -146
- data/docs/{ADR-005-tracing-context.md → architecture/ADR-005-tracing-context.md} +10 -9
- data/docs/{ADR-006-security-compliance.md → architecture/ADR-006-security-compliance.md} +184 -191
- data/docs/{ADR-007-opentelemetry-integration.md → architecture/ADR-007-opentelemetry-integration.md} +3 -21
- data/docs/{ADR-008-rails-integration.md → architecture/ADR-008-rails-integration.md} +182 -743
- data/docs/{ADR-009-cost-optimization.md → architecture/ADR-009-cost-optimization.md} +45 -54
- data/docs/architecture/ADR-010-developer-experience.md +522 -0
- data/docs/{ADR-011-testing-strategy.md → architecture/ADR-011-testing-strategy.md} +44 -86
- data/docs/{ADR-012-event-evolution.md → architecture/ADR-012-event-evolution.md} +11 -11
- data/docs/{ADR-013-reliability-error-handling.md → architecture/ADR-013-reliability-error-handling.md} +37 -12
- data/docs/{ADR-014-event-driven-slo.md → architecture/ADR-014-event-driven-slo.md} +12 -24
- data/docs/{ADR-015-middleware-order.md → architecture/ADR-015-middleware-order.md} +43 -59
- data/docs/{ADR-016-self-monitoring-slo.md → architecture/ADR-016-self-monitoring-slo.md} +58 -355
- data/docs/{ADR-017-multi-rails-compatibility.md → architecture/ADR-017-multi-rails-compatibility.md} +4 -11
- data/docs/architecture/ADR-018-memory-optimization.md +366 -0
- data/docs/{ADR-INDEX.md → architecture/ADR-INDEX.md} +11 -6
- data/docs/plans/2026-03-20-browser-overlay-svelte.md +281 -0
- data/docs/{00-ICP-AND-TIMELINE.md → prd/00-ICP-AND-TIMELINE.md} +6 -6
- data/docs/{01-SCALE-REQUIREMENTS.md → prd/01-SCALE-REQUIREMENTS.md} +6 -6
- data/docs/prd/01-overview-vision.md +19 -14
- data/docs/use_cases/README.md +22 -23
- data/docs/use_cases/UC-001-request-scoped-debug-buffering.md +50 -44
- data/docs/use_cases/UC-002-business-event-tracking.md +26 -95
- data/docs/use_cases/UC-003-event-metrics.md +66 -0
- data/docs/use_cases/UC-004-zero-config-slo-tracking.md +33 -684
- data/docs/use_cases/UC-005-sentry-integration.md +13 -15
- data/docs/use_cases/UC-006-trace-context-management.md +30 -28
- data/docs/use_cases/UC-007-pii-filtering.md +35 -87
- data/docs/use_cases/UC-008-opentelemetry-integration.md +51 -89
- data/docs/use_cases/UC-009-multi-service-tracing.md +30 -178
- data/docs/use_cases/UC-010-background-job-tracking.md +24 -91
- data/docs/use_cases/UC-011-rate-limiting.md +95 -168
- data/docs/use_cases/UC-012-audit-trail.md +21 -46
- data/docs/use_cases/UC-013-high-cardinality-protection.md +29 -167
- data/docs/use_cases/UC-014-adaptive-sampling.md +2 -2
- data/docs/use_cases/UC-015-cost-optimization.md +46 -99
- data/docs/use_cases/UC-016-rails-logger-migration.md +39 -213
- data/docs/use_cases/UC-017-local-development.md +203 -777
- data/docs/use_cases/UC-018-testing-events.md +3 -3
- data/docs/use_cases/UC-019-retention-based-routing.md +53 -106
- data/docs/use_cases/UC-020-event-versioning.md +8 -9
- data/docs/use_cases/UC-021-error-handling-retry-dlq.md +18 -22
- data/docs/use_cases/UC-022-event-registry.md +15 -21
- data/docs/use_cases/backlog.md +119 -87
- data/e11y.gemspec +2 -2
- data/gems/e11y-devtools/README.md +158 -0
- data/gems/e11y-devtools/config/routes.rb +15 -0
- data/gems/e11y-devtools/e11y-devtools.gemspec +25 -0
- data/gems/e11y-devtools/exe/e11y +34 -0
- data/gems/e11y-devtools/frontend/.gitignore +24 -0
- data/gems/e11y-devtools/frontend/README.md +51 -0
- data/gems/e11y-devtools/frontend/index.html +14 -0
- data/gems/e11y-devtools/frontend/package-lock.json +3707 -0
- data/gems/e11y-devtools/frontend/package.json +28 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/events/recent.json +4205 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/interactions.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/0a2e04027cfa22d014bc22e8b27cd913/events.json +86 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/0e1543af6a630fb3af6b52283154b3e0/events.json +169 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/1838b691faa49564f97db8592ff3978d/events.json +78 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/29f198f6588dacffb687777eb5f8f118/events.json +197 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/34bc3c9c0097de28a7a6f99b90a8e7bc/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/3ba6c20d068ab9cee00e51b180e66444/events.json +184 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/435bfd8f17b9009146a79812d7c3726d/events.json +144 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/4c7676e3fe668e99edb2b94d7d5678a9/events.json +222 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/6daf0d47974bedfc55d5de7004a3ea9f/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8a81ada42834d15f287bb40010043605/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8c0a98900edaae105469df8daedccf02/events.json +198 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/8e4f645180f8a7d1dce426b07380466b/events.json +222 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/93db346fa5d44a032605a13b627f4b80/events.json +128 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/98ff6146faf7bd9be8bd03a8275817ba/events.json +223 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/9997ddd0247bc7e25f2ca7a5c415c93d/events.json +197 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/99e35f8ef3baedd798cc4fd085980ad9/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/b4f3095c1909924cbc98889a86c83d6d/events.json +131 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/b54b7fc32b7575a7110de809d11ccda0/events.json +128 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/c0b48033fa06746bcc5886745e053cff/events.json +169 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/c44649ac76701b4558927cd2305ab535/events.json +169 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/d601ae3320057580a39dbdac2edfdf4a/events.json +248 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/e67e724bab422d2b52eeb49635e512e1/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/e6c72765a28f158a8485b35fa63f73da/events.json +194 -0
- data/gems/e11y-devtools/frontend/public/mocks/v1/traces/f541b87405c9a54819b18ebe529f6419/events.json +194 -0
- data/gems/e11y-devtools/frontend/scripts/generate_mocks.rb +397 -0
- data/gems/e11y-devtools/frontend/src/App.svelte +827 -0
- data/gems/e11y-devtools/frontend/src/components/Fab.svelte +19 -0
- data/gems/e11y-devtools/frontend/src/components/FilterBar.svelte +38 -0
- data/gems/e11y-devtools/frontend/src/components/FullscreenPanel.svelte +82 -0
- data/gems/e11y-devtools/frontend/src/components/InteractionsTimeline.svelte +264 -0
- data/gems/e11y-devtools/frontend/src/components/RecentHistogram.svelte +354 -0
- data/gems/e11y-devtools/frontend/src/lib/api.ts +37 -0
- data/gems/e11y-devtools/frontend/src/lib/eventIdentity.ts +12 -0
- data/gems/e11y-devtools/frontend/src/lib/format.ts +37 -0
- data/gems/e11y-devtools/frontend/src/lib/listFilter.ts +43 -0
- data/gems/e11y-devtools/frontend/src/lib/recentVolume.ts +80 -0
- data/gems/e11y-devtools/frontend/src/lib/router.ts +12 -0
- data/gems/e11y-devtools/frontend/src/lib/transitions.ts +34 -0
- data/gems/e11y-devtools/frontend/src/lib/viewportOrigin.ts +25 -0
- data/gems/e11y-devtools/frontend/src/main.ts +8 -0
- data/gems/e11y-devtools/frontend/src/overlay-entry.ts +24 -0
- data/gems/e11y-devtools/frontend/src/overlay.css +1080 -0
- data/gems/e11y-devtools/frontend/svelte.config.js +2 -0
- data/gems/e11y-devtools/frontend/test_puppeteer.js +41 -0
- data/gems/e11y-devtools/frontend/test_scale.js +3 -0
- data/gems/e11y-devtools/frontend/tsconfig.app.json +21 -0
- data/gems/e11y-devtools/frontend/tsconfig.json +7 -0
- data/gems/e11y-devtools/frontend/tsconfig.node.json +26 -0
- data/gems/e11y-devtools/frontend/vite.config.ts +36 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/server.rb +96 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tool_base.rb +25 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/clear.rb +31 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/errors.rb +35 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/event_detail.rb +33 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/events_by_trace.rb +33 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/interactions.rb +40 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/recent_events.rb +34 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/search.rb +34 -0
- data/gems/e11y-devtools/lib/e11y/devtools/mcp/tools/stats.rb +30 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/assets/overlay.js +20 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/controller.rb +94 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/engine.rb +26 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/middleware.rb +80 -0
- data/gems/e11y-devtools/lib/e11y/devtools/overlay/rails_controller.rb +67 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/app.rb +262 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/grouping.rb +66 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_detail.rb +62 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/event_list.rb +70 -0
- data/gems/e11y-devtools/lib/e11y/devtools/tui/widgets/interaction_list.rb +47 -0
- data/gems/e11y-devtools/lib/e11y/devtools/version.rb +8 -0
- data/gems/e11y-devtools/lib/e11y/devtools.rb +13 -0
- data/gems/e11y-devtools/spec/e11y/devtools/mcp/tools_spec.rb +107 -0
- data/gems/e11y-devtools/spec/e11y/devtools/overlay/controller_spec.rb +91 -0
- data/gems/e11y-devtools/spec/e11y/devtools/overlay/middleware_spec.rb +46 -0
- data/gems/e11y-devtools/spec/e11y/devtools/tui/app_spec.rb +85 -0
- data/gems/e11y-devtools/spec/e11y/devtools/tui/grouping_spec.rb +64 -0
- data/gems/e11y-devtools/spec/spec_helper.rb +5 -0
- data/gems/e11y-devtools/spec/tui/widgets/event_list_spec.rb +44 -0
- data/gems/e11y-devtools/spec/tui/widgets/interaction_list_spec.rb +62 -0
- data/lib/e11y/adapters/audit_encrypted.rb +53 -11
- data/lib/e11y/adapters/base.rb +33 -34
- data/lib/e11y/adapters/dev_log/file_store.rb +143 -0
- data/lib/e11y/adapters/dev_log/query.rb +219 -0
- data/lib/e11y/adapters/dev_log.rb +118 -0
- data/lib/e11y/adapters/file.rb +3 -6
- data/lib/e11y/adapters/in_memory.rb +52 -5
- data/lib/e11y/adapters/in_memory_test.rb +29 -0
- data/lib/e11y/adapters/loki.rb +58 -23
- data/lib/e11y/adapters/null.rb +82 -0
- data/lib/e11y/adapters/opentelemetry_collector.rb +183 -0
- data/lib/e11y/adapters/otel_logs.rb +136 -23
- data/lib/e11y/adapters/sentry.rb +4 -7
- data/lib/e11y/adapters/stdout.rb +73 -7
- data/lib/e11y/adapters/yabeda.rb +153 -29
- data/lib/e11y/buffers/adaptive_buffer.rb +3 -17
- data/lib/e11y/buffers/{request_scoped_buffer.rb → ephemeral_buffer.rb} +72 -58
- data/lib/e11y/buffers/ring_buffer.rb +3 -16
- data/lib/e11y/configuration.rb +272 -0
- data/lib/e11y/console.rb +10 -17
- data/lib/e11y/current.rb +53 -1
- data/lib/e11y/debug/pipeline_inspector.rb +96 -0
- data/lib/e11y/documentation/generator.rb +48 -0
- data/lib/e11y/event/base.rb +176 -82
- data/lib/e11y/event/value_sampling_config.rb +1 -5
- data/lib/e11y/events/rails/database/query.rb +1 -4
- data/lib/e11y/events/rails/job/failed.rb +2 -0
- data/lib/e11y/instruments/active_job.rb +44 -12
- data/lib/e11y/instruments/rails_instrumentation.rb +49 -24
- data/lib/e11y/instruments/sidekiq.rb +135 -31
- data/lib/e11y/linters/base.rb +11 -0
- data/lib/e11y/linters/pii/pii_declaration_linter.rb +120 -0
- data/lib/e11y/linters/slo/config_consistency_linter.rb +76 -0
- data/lib/e11y/linters/slo/explicit_declaration_linter.rb +36 -0
- data/lib/e11y/linters/slo/slo_status_from_linter.rb +41 -0
- data/lib/e11y/logger/bridge.rb +26 -7
- data/lib/e11y/metrics/cardinality_protection.rb +10 -15
- data/lib/e11y/metrics/cardinality_tracker.rb +16 -6
- data/lib/e11y/metrics/registry.rb +3 -5
- data/lib/e11y/metrics/test_backend.rb +62 -0
- data/lib/e11y/metrics.rb +56 -10
- data/lib/e11y/middleware/adapter_resolver.rb +40 -0
- data/lib/e11y/middleware/audit_signing.rb +43 -6
- data/lib/e11y/middleware/baggage_protection.rb +75 -0
- data/lib/e11y/middleware/dev_log_source.rb +24 -0
- data/lib/e11y/middleware/event_slo.rb +23 -9
- data/lib/e11y/middleware/otel_span.rb +23 -0
- data/lib/e11y/middleware/pii_filter.rb +104 -75
- data/lib/e11y/middleware/rate_limiting.rb +54 -27
- data/lib/e11y/middleware/request.rb +70 -23
- data/lib/e11y/middleware/routing.rb +78 -21
- data/lib/e11y/middleware/sampling.rb +66 -17
- data/lib/e11y/middleware/self_monitoring_emit.rb +39 -0
- data/lib/e11y/middleware/trace_context.rb +45 -10
- data/lib/e11y/middleware/track_latency.rb +34 -0
- data/lib/e11y/middleware/validation.rb +7 -16
- data/lib/e11y/middleware/versioning.rb +26 -22
- data/lib/e11y/opentelemetry/semantic_conventions.rb +109 -0
- data/lib/e11y/opentelemetry/span_creator.rb +142 -0
- data/lib/e11y/pii/patterns.rb +12 -1
- data/lib/e11y/pipeline/builder.rb +4 -4
- data/lib/e11y/presets/audit_event.rb +13 -2
- data/lib/e11y/railtie.rb +52 -14
- data/lib/e11y/registry.rb +306 -0
- data/lib/e11y/reliability/circuit_breaker.rb +19 -21
- data/lib/e11y/reliability/dlq/base.rb +71 -0
- data/lib/e11y/reliability/dlq/file_adapter.rb +301 -0
- data/lib/e11y/reliability/dlq/file_storage.rb +63 -34
- data/lib/e11y/reliability/dlq/filter.rb +37 -54
- data/lib/e11y/reliability/retry_handler.rb +26 -29
- data/lib/e11y/reliability/retry_rate_limiter.rb +3 -11
- data/lib/e11y/sampling/error_spike_detector.rb +0 -2
- data/lib/e11y/sampling/load_monitor.rb +5 -9
- data/lib/e11y/sampling/stratified_tracker.rb +18 -0
- data/lib/e11y/self_monitoring/buffer_monitor.rb +2 -0
- data/lib/e11y/self_monitoring/performance_monitor.rb +19 -61
- data/lib/e11y/self_monitoring/reliability_monitor.rb +4 -74
- data/lib/e11y/slo/config_loader.rb +40 -0
- data/lib/e11y/slo/config_validator.rb +58 -0
- data/lib/e11y/slo/dashboard_generator.rb +122 -0
- data/lib/e11y/slo/event_driven.rb +8 -0
- data/lib/e11y/slo/tracker.rb +31 -4
- data/lib/e11y/testing/have_tracked_event_matcher.rb +190 -0
- data/lib/e11y/testing/rspec_matchers.rb +21 -0
- data/lib/e11y/testing/snapshot_matcher.rb +86 -0
- data/lib/e11y/trace_context/sampler.rb +35 -0
- data/lib/e11y/tracing/faraday_middleware.rb +31 -0
- data/lib/e11y/tracing/net_http_patch.rb +33 -0
- data/lib/e11y/tracing/propagator.rb +144 -0
- data/lib/e11y/tracing.rb +47 -0
- data/lib/e11y/version.rb +1 -1
- data/lib/e11y/versioning/version_extractor.rb +32 -0
- data/lib/e11y.rb +123 -266
- data/lib/generators/e11y/event/event_generator.rb +22 -0
- data/lib/generators/e11y/event/templates/event.rb.tt +16 -0
- data/lib/generators/e11y/grafana_dashboard/grafana_dashboard_generator.rb +30 -0
- data/lib/generators/e11y/grafana_dashboard/templates/e11y_dashboard.json +81 -0
- data/lib/generators/e11y/install/install_generator.rb +34 -0
- data/lib/generators/e11y/install/templates/e11y.rb +239 -0
- data/lib/generators/e11y/prometheus_alerts/prometheus_alerts_generator.rb +29 -0
- data/lib/generators/e11y/prometheus_alerts/templates/e11y_alerts.yml +28 -0
- data/lib/tasks/e11y_docs.rake +30 -0
- data/lib/tasks/e11y_events.rake +71 -0
- data/lib/tasks/e11y_lint.rake +91 -0
- data/lib/tasks/e11y_slo.rake +29 -0
- metadata +186 -39
- data/docs/ADR-003-slo-observability.md +0 -3337
- data/docs/ADR-010-developer-experience.md +0 -2166
- data/docs/API-REFERENCE-L28.md +0 -914
- data/docs/COMPREHENSIVE-CONFIGURATION.md +0 -2366
- data/docs/CONTRIBUTING.md +0 -312
- data/docs/IMPLEMENTATION_NOTES.md +0 -2804
- data/docs/IMPLEMENTATION_PLAN.md +0 -1971
- data/docs/IMPLEMENTATION_PLAN_ARCHITECTURE.md +0 -586
- data/docs/PLAN.md +0 -148
- data/docs/README.md +0 -296
- data/docs/design/00-memory-optimization.md +0 -593
- data/docs/guides/MIGRATION-L27-L28.md +0 -692
- data/docs/guides/PERFORMANCE-BENCHMARKS.md +0 -434
- data/docs/guides/README.md +0 -44
- data/docs/use_cases/UC-003-pattern-based-metrics.md +0 -1627
- data/lib/e11y/adapters/registry.rb +0 -141
|
@@ -50,12 +50,15 @@ module E11y
|
|
|
50
50
|
# @option config [Float] :max_delay_ms Maximum delay in milliseconds (default: 5000)
|
|
51
51
|
# @option config [Float] :jitter_factor Jitter factor (0.0-1.0, default: 0.1)
|
|
52
52
|
# @option config [Boolean] :fail_on_error Raise error after max retries (default: true)
|
|
53
|
-
|
|
53
|
+
# @param rate_limiter [RetryRateLimiter, nil] Optional rate limiter for thundering herd prevention (C06)
|
|
54
|
+
# @param retry_rate_limiter [RetryRateLimiter, nil] Alias for rate_limiter (backward compatibility)
|
|
55
|
+
def initialize(config: {}, rate_limiter: nil, retry_rate_limiter: nil)
|
|
54
56
|
@max_attempts = config[:max_attempts] || 3
|
|
55
57
|
@base_delay_ms = config[:base_delay_ms] || 100.0
|
|
56
58
|
@max_delay_ms = config[:max_delay_ms] || 5000.0
|
|
57
59
|
@jitter_factor = config[:jitter_factor] || 0.1
|
|
58
60
|
@fail_on_error = config.fetch(:fail_on_error, true)
|
|
61
|
+
@rate_limiter = rate_limiter || retry_rate_limiter
|
|
59
62
|
end
|
|
60
63
|
|
|
61
64
|
# Execute block with retry logic.
|
|
@@ -98,6 +101,22 @@ module E11y
|
|
|
98
101
|
delay_ms = calculate_backoff_delay(attempt)
|
|
99
102
|
on_retry_attempt(adapter, event, e, attempt, delay_ms)
|
|
100
103
|
|
|
104
|
+
# C06: Thundering herd prevention — check rate limiter before sleeping
|
|
105
|
+
if @rate_limiter && !@rate_limiter.allow?(adapter.class.name, event)
|
|
106
|
+
# Rate limit exceeded: stop retrying to prevent thundering herd
|
|
107
|
+
# With :delay strategy, sleep first to spread out retry load
|
|
108
|
+
on_limit = @rate_limiter.instance_variable_get(:@on_limit_exceeded)
|
|
109
|
+
if on_limit == :delay
|
|
110
|
+
window_sec = @rate_limiter.instance_variable_get(:@window)
|
|
111
|
+
jitter = rand(0..(delay_ms * 0.2))
|
|
112
|
+
sleep(((window_sec * 1000) + jitter) / 1000.0)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
raise RetryExhaustedError.new(e, retry_count: attempt) if @fail_on_error
|
|
116
|
+
|
|
117
|
+
return nil
|
|
118
|
+
end
|
|
119
|
+
|
|
101
120
|
# Sleep with backoff
|
|
102
121
|
sleep(delay_ms / 1000.0)
|
|
103
122
|
end
|
|
@@ -147,17 +166,17 @@ module E11y
|
|
|
147
166
|
|
|
148
167
|
# Handle successful execution.
|
|
149
168
|
def on_success(adapter, _event, attempt)
|
|
150
|
-
|
|
169
|
+
E11y::Metrics.increment("e11y.retry.success", adapter: adapter.class.name, attempts: attempt)
|
|
151
170
|
|
|
152
171
|
# Log if retry was needed
|
|
153
172
|
return unless attempt > 1
|
|
154
173
|
|
|
155
|
-
|
|
174
|
+
E11y::Metrics.increment("e11y.retry.recovered", adapter: adapter.class.name, attempts: attempt)
|
|
156
175
|
end
|
|
157
176
|
|
|
158
177
|
# Handle permanent failure (non-retriable error).
|
|
159
178
|
def on_permanent_failure(adapter, _event, error, attempt)
|
|
160
|
-
|
|
179
|
+
E11y::Metrics.increment(
|
|
161
180
|
"e11y.retry.permanent_failure",
|
|
162
181
|
adapter: adapter.class.name,
|
|
163
182
|
error: error.class.name,
|
|
@@ -167,7 +186,7 @@ module E11y
|
|
|
167
186
|
|
|
168
187
|
# Handle max retries exhausted (all attempts failed).
|
|
169
188
|
def on_max_retries_exhausted(adapter, _event, error, attempt)
|
|
170
|
-
|
|
189
|
+
E11y::Metrics.increment(
|
|
171
190
|
"e11y.retry.exhausted",
|
|
172
191
|
adapter: adapter.class.name,
|
|
173
192
|
error: error.class.name,
|
|
@@ -176,35 +195,13 @@ module E11y
|
|
|
176
195
|
end
|
|
177
196
|
|
|
178
197
|
# Handle retry attempt.
|
|
179
|
-
def on_retry_attempt(adapter, _event, error, attempt,
|
|
180
|
-
|
|
198
|
+
def on_retry_attempt(adapter, _event, error, attempt, _delay_ms)
|
|
199
|
+
E11y::Metrics.increment(
|
|
181
200
|
"e11y.retry.attempt",
|
|
182
201
|
adapter: adapter.class.name,
|
|
183
202
|
error: error.class.name,
|
|
184
203
|
attempt: attempt
|
|
185
204
|
)
|
|
186
|
-
|
|
187
|
-
# Track backoff delay histogram
|
|
188
|
-
track_histogram("e11y.retry.backoff_delay_ms", delay_ms, adapter: adapter.class.name)
|
|
189
|
-
end
|
|
190
|
-
|
|
191
|
-
# Increment retry metric.
|
|
192
|
-
#
|
|
193
|
-
# @param metric_name [String] Metric name
|
|
194
|
-
# @param tags [Hash] Additional tags
|
|
195
|
-
def increment_metric(metric_name, tags = {})
|
|
196
|
-
# TODO: Integrate with Yabeda metrics
|
|
197
|
-
# E11y::Metrics.increment(metric_name, tags)
|
|
198
|
-
end
|
|
199
|
-
|
|
200
|
-
# Track histogram metric.
|
|
201
|
-
#
|
|
202
|
-
# @param metric_name [String] Metric name
|
|
203
|
-
# @param value [Numeric] Value to track
|
|
204
|
-
# @param tags [Hash] Additional tags
|
|
205
|
-
def track_histogram(metric_name, value, tags = {})
|
|
206
|
-
# TODO: Integrate with Yabeda metrics
|
|
207
|
-
# E11y::Metrics.histogram(metric_name, value, tags)
|
|
208
205
|
end
|
|
209
206
|
end
|
|
210
207
|
end
|
|
@@ -46,7 +46,6 @@ module E11y
|
|
|
46
46
|
false
|
|
47
47
|
else
|
|
48
48
|
@retry_counts[adapter_name] << Time.now
|
|
49
|
-
increment_metric("e11y.retry_rate_limiter.allowed", adapter: adapter_name)
|
|
50
49
|
true
|
|
51
50
|
end
|
|
52
51
|
end
|
|
@@ -93,25 +92,18 @@ module E11y
|
|
|
93
92
|
|
|
94
93
|
# Handle limit exceeded based on configured strategy.
|
|
95
94
|
def on_limit_exceeded(adapter_name, _event_data)
|
|
96
|
-
|
|
95
|
+
E11y::Metrics.increment(:e11y_retry_rate_limiter_total, adapter: adapter_name, event: "exceeded", delay_sec: "")
|
|
97
96
|
|
|
98
97
|
case @on_limit_exceeded
|
|
99
98
|
when :delay
|
|
100
99
|
# Calculate delay with jitter
|
|
101
100
|
delay_sec = @window + rand((-@jitter_range * @window)..(@jitter_range * @window))
|
|
102
|
-
|
|
101
|
+
E11y::Metrics.increment(:e11y_retry_rate_limiter_total, adapter: adapter_name, event: "delayed", delay_sec: delay_sec.round(1).to_s)
|
|
103
102
|
# Caller should sleep(delay_sec) before retry
|
|
104
103
|
when :dlq
|
|
105
|
-
|
|
106
|
-
increment_metric("e11y.retry_rate_limiter.dlq", adapter: adapter_name)
|
|
104
|
+
E11y::Metrics.increment(:e11y_retry_rate_limiter_total, adapter: adapter_name, event: "dlq", delay_sec: "")
|
|
107
105
|
end
|
|
108
106
|
end
|
|
109
|
-
|
|
110
|
-
# Increment retry rate limiter metric.
|
|
111
|
-
def increment_metric(metric_name, tags = {})
|
|
112
|
-
# TODO: Integrate with Yabeda metrics
|
|
113
|
-
# E11y::Metrics.increment(metric_name, tags)
|
|
114
|
-
end
|
|
115
107
|
end
|
|
116
108
|
end
|
|
117
109
|
end
|
|
@@ -66,7 +66,6 @@ module E11y
|
|
|
66
66
|
# Check if currently in error spike state
|
|
67
67
|
#
|
|
68
68
|
# @return [Boolean] true if error spike detected
|
|
69
|
-
# rubocop:disable Metrics/MethodLength
|
|
70
69
|
# Error spike detection requires checking active spike, expiration, and new spike detection
|
|
71
70
|
def error_spike?
|
|
72
71
|
@mutex.synchronize do
|
|
@@ -94,7 +93,6 @@ module E11y
|
|
|
94
93
|
false
|
|
95
94
|
end
|
|
96
95
|
end
|
|
97
|
-
# rubocop:enable Metrics/MethodLength
|
|
98
96
|
|
|
99
97
|
# Record an event for error rate tracking
|
|
100
98
|
#
|
|
@@ -92,21 +92,17 @@ module E11y
|
|
|
92
92
|
def load_level
|
|
93
93
|
rate = current_rate
|
|
94
94
|
|
|
95
|
-
# Check thresholds in descending order
|
|
96
|
-
#
|
|
97
|
-
# Values between normal and high thresholds intentionally mapped to :high
|
|
95
|
+
# Check thresholds in descending order.
|
|
96
|
+
# rate <= normal → :normal; rate > normal and < high → :high; etc.
|
|
98
97
|
if rate >= @thresholds[:overload]
|
|
99
98
|
:overload
|
|
100
99
|
elsif rate >= @thresholds[:very_high]
|
|
101
100
|
:very_high
|
|
102
|
-
elsif rate >= @thresholds[:high]
|
|
103
|
-
:high
|
|
104
|
-
elsif rate >= @thresholds[:normal]
|
|
105
|
-
:high # Between normal and high threshold
|
|
101
|
+
elsif rate >= @thresholds[:high] || rate > @thresholds[:normal]
|
|
102
|
+
:high # rate > normal (includes rate >= high)
|
|
106
103
|
else
|
|
107
|
-
:normal
|
|
104
|
+
:normal # rate <= normal (inclusive of exact threshold)
|
|
108
105
|
end
|
|
109
|
-
# rubocop:enable Lint/DuplicateBranch
|
|
110
106
|
end
|
|
111
107
|
|
|
112
108
|
# Get recommended sample rate for current load
|
|
@@ -1,7 +1,25 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
# Stratified sampling for SLO accuracy (FEAT-4851, C11 Resolution).
|
|
4
|
+
# Tracks sampling statistics per severity stratum for sampling correction.
|
|
3
5
|
module E11y
|
|
6
|
+
# Stratified sampling module — provides StratifiedTracker for SLO correction.
|
|
4
7
|
module Sampling
|
|
8
|
+
# Module-level singleton for StratifiedTracker (C11 Resolution).
|
|
9
|
+
# Used by Sampling middleware and EventSlo for sampling correction.
|
|
10
|
+
#
|
|
11
|
+
# @return [StratifiedTracker]
|
|
12
|
+
def self.stratified_tracker
|
|
13
|
+
@stratified_tracker ||= StratifiedTracker.new
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Reset stratified tracker (for testing).
|
|
17
|
+
# @return [void]
|
|
18
|
+
def self.reset_stratified_tracker!
|
|
19
|
+
@stratified_tracker&.reset!
|
|
20
|
+
@stratified_tracker = nil
|
|
21
|
+
end
|
|
22
|
+
|
|
5
23
|
# Stratified Sampling Tracker for SLO accuracy (FEAT-4851, C11 Resolution)
|
|
6
24
|
#
|
|
7
25
|
# Tracks sampling statistics per severity stratum to enable sampling correction
|
|
@@ -6,45 +6,34 @@ module E11y
|
|
|
6
6
|
module SelfMonitoring
|
|
7
7
|
# Performance monitoring for E11y internal operations.
|
|
8
8
|
#
|
|
9
|
-
# Tracks
|
|
10
|
-
# - Event
|
|
11
|
-
# -
|
|
12
|
-
# - Adapter writes
|
|
13
|
-
# - Buffer flushes
|
|
9
|
+
# Tracks:
|
|
10
|
+
# - Event.track() latency (via TrackLatency middleware)
|
|
11
|
+
# - Adapter send latency (used by Base adapter)
|
|
14
12
|
#
|
|
15
13
|
# @see ADR-016 §3.1 (Performance Metrics)
|
|
16
14
|
# @example
|
|
17
|
-
# E11y::SelfMonitoring::PerformanceMonitor.track_latency(0.5, event_class: '
|
|
15
|
+
# E11y::SelfMonitoring::PerformanceMonitor.track_latency(0.5, event_class: 'Events::OrderPaid', severity: 'info', result: :success)
|
|
16
|
+
# E11y::SelfMonitoring::PerformanceMonitor.track_adapter_latency('E11y::Adapters::Loki', 42)
|
|
18
17
|
module PerformanceMonitor
|
|
19
|
-
|
|
20
|
-
#
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
# @param severity [Symbol] Event severity
|
|
24
|
-
# @return [void]
|
|
25
|
-
def self.track_latency(duration_ms, event_class:, severity:)
|
|
26
|
-
E11y::Metrics.histogram(
|
|
27
|
-
:e11y_track_duration_seconds,
|
|
28
|
-
duration_ms / 1000.0,
|
|
29
|
-
{
|
|
30
|
-
event_class: event_class,
|
|
31
|
-
severity: severity
|
|
32
|
-
},
|
|
33
|
-
buckets: [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1] # 0.1ms to 100ms
|
|
34
|
-
)
|
|
35
|
-
end
|
|
18
|
+
TRACK_BUCKETS = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1].freeze # 0.1ms to 100ms
|
|
19
|
+
ADAPTER_BUCKETS = [0.001, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0].freeze # 1ms to 5s
|
|
20
|
+
RESULT_SUCCESS = "success"
|
|
21
|
+
RESULT_DROPPED = "dropped"
|
|
36
22
|
|
|
37
|
-
# Track
|
|
23
|
+
# Track Event.track() pipeline latency (from entry to exit).
|
|
38
24
|
#
|
|
39
|
-
# @param middleware_name [String] Middleware class name
|
|
40
25
|
# @param duration_ms [Numeric] Duration in milliseconds
|
|
26
|
+
# @param event_class [String] Event class name (e.g. 'Events::OrderPaid')
|
|
27
|
+
# @param severity [String] Severity (e.g. 'info', 'error')
|
|
28
|
+
# @param result [Symbol] :success or :dropped
|
|
41
29
|
# @return [void]
|
|
42
|
-
def self.
|
|
30
|
+
def self.track_latency(duration_ms, event_class:, severity:, result:)
|
|
31
|
+
result_str = result == :success ? RESULT_SUCCESS : RESULT_DROPPED
|
|
43
32
|
E11y::Metrics.histogram(
|
|
44
|
-
:
|
|
33
|
+
:e11y_track_duration_seconds,
|
|
45
34
|
duration_ms / 1000.0,
|
|
46
|
-
{
|
|
47
|
-
buckets:
|
|
35
|
+
{ event_class: event_class, severity: severity, result: result_str },
|
|
36
|
+
buckets: TRACK_BUCKETS
|
|
48
37
|
)
|
|
49
38
|
end
|
|
50
39
|
|
|
@@ -58,40 +47,9 @@ module E11y
|
|
|
58
47
|
:e11y_adapter_send_duration_seconds,
|
|
59
48
|
duration_ms / 1000.0,
|
|
60
49
|
{ adapter: adapter_name },
|
|
61
|
-
buckets:
|
|
50
|
+
buckets: ADAPTER_BUCKETS
|
|
62
51
|
)
|
|
63
52
|
end
|
|
64
|
-
|
|
65
|
-
# Track buffer flush latency.
|
|
66
|
-
#
|
|
67
|
-
# @param duration_ms [Numeric] Duration in milliseconds
|
|
68
|
-
# @param event_count [Integer] Number of events flushed
|
|
69
|
-
# @return [void]
|
|
70
|
-
def self.track_flush_latency(duration_ms, event_count)
|
|
71
|
-
E11y::Metrics.histogram(
|
|
72
|
-
:e11y_buffer_flush_duration_seconds,
|
|
73
|
-
duration_ms / 1000.0,
|
|
74
|
-
{ event_count_bucket: bucket_event_count(event_count) },
|
|
75
|
-
buckets: [0.001, 0.01, 0.05, 0.1, 0.5, 1.0]
|
|
76
|
-
)
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
# Convert event count to a low-cardinality bucket label.
|
|
80
|
-
#
|
|
81
|
-
# @param count [Integer] Event count
|
|
82
|
-
# @return [String] Bucket label
|
|
83
|
-
# @api private
|
|
84
|
-
def self.bucket_event_count(count)
|
|
85
|
-
case count
|
|
86
|
-
when 0..10 then "1-10"
|
|
87
|
-
when 11..50 then "11-50"
|
|
88
|
-
when 51..100 then "51-100"
|
|
89
|
-
when 101..500 then "101-500"
|
|
90
|
-
else "500+"
|
|
91
|
-
end
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
private_class_method :bucket_event_count
|
|
95
53
|
end
|
|
96
54
|
end
|
|
97
55
|
end
|
|
@@ -6,61 +6,12 @@ module E11y
|
|
|
6
6
|
module SelfMonitoring
|
|
7
7
|
# Reliability monitoring for E11y internal operations.
|
|
8
8
|
#
|
|
9
|
-
# Tracks success/failure rates for
|
|
10
|
-
# - Event tracking
|
|
11
|
-
# - Adapter writes
|
|
12
|
-
# - Buffer operations
|
|
13
|
-
# - DLQ saves
|
|
9
|
+
# Tracks success/failure rates for adapter writes and circuit breaker state.
|
|
14
10
|
#
|
|
15
11
|
# @see ADR-016 §3.2 (Reliability Metrics)
|
|
16
12
|
# @example
|
|
17
|
-
# E11y::SelfMonitoring::ReliabilityMonitor.
|
|
13
|
+
# E11y::SelfMonitoring::ReliabilityMonitor.track_adapter_success(adapter_name: 'E11y::Adapters::Loki')
|
|
18
14
|
module ReliabilityMonitor
|
|
19
|
-
# Track successful event tracking.
|
|
20
|
-
#
|
|
21
|
-
# @param event_type [String] Event type/name
|
|
22
|
-
# @return [void]
|
|
23
|
-
def self.track_event_success(event_type:)
|
|
24
|
-
E11y::Metrics.increment(
|
|
25
|
-
:e11y_events_tracked_total,
|
|
26
|
-
{
|
|
27
|
-
event_type: event_type,
|
|
28
|
-
status: "success"
|
|
29
|
-
}
|
|
30
|
-
)
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Track failed event tracking.
|
|
34
|
-
#
|
|
35
|
-
# @param event_type [String] Event type/name
|
|
36
|
-
# @param reason [String] Failure reason (e.g., 'validation_error', 'adapter_error')
|
|
37
|
-
# @return [void]
|
|
38
|
-
def self.track_event_failure(event_type:, reason:)
|
|
39
|
-
E11y::Metrics.increment(
|
|
40
|
-
:e11y_events_tracked_total,
|
|
41
|
-
{
|
|
42
|
-
event_type: event_type,
|
|
43
|
-
status: "failure",
|
|
44
|
-
reason: reason
|
|
45
|
-
}
|
|
46
|
-
)
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# Track dropped event (rate limited, sampled out, etc).
|
|
50
|
-
#
|
|
51
|
-
# @param event_type [String] Event type/name
|
|
52
|
-
# @param reason [String] Drop reason (e.g., 'rate_limited', 'sampled_out')
|
|
53
|
-
# @return [void]
|
|
54
|
-
def self.track_event_dropped(event_type:, reason:)
|
|
55
|
-
E11y::Metrics.increment(
|
|
56
|
-
:e11y_events_dropped_total,
|
|
57
|
-
{
|
|
58
|
-
event_type: event_type,
|
|
59
|
-
reason: reason
|
|
60
|
-
}
|
|
61
|
-
)
|
|
62
|
-
end
|
|
63
|
-
|
|
64
15
|
# Track adapter write success.
|
|
65
16
|
#
|
|
66
17
|
# @param adapter_name [String] Adapter class name
|
|
@@ -70,7 +21,8 @@ module E11y
|
|
|
70
21
|
:e11y_adapter_writes_total,
|
|
71
22
|
{
|
|
72
23
|
adapter: adapter_name,
|
|
73
|
-
status: "success"
|
|
24
|
+
status: "success",
|
|
25
|
+
error_class: "" # Prometheus requires consistent label signature with track_adapter_failure
|
|
74
26
|
}
|
|
75
27
|
)
|
|
76
28
|
end
|
|
@@ -91,28 +43,6 @@ module E11y
|
|
|
91
43
|
)
|
|
92
44
|
end
|
|
93
45
|
|
|
94
|
-
# Track DLQ save operation.
|
|
95
|
-
#
|
|
96
|
-
# @param reason [String] Reason for DLQ save (e.g., 'adapter_error', 'rate_limited')
|
|
97
|
-
# @return [void]
|
|
98
|
-
def self.track_dlq_save(reason:)
|
|
99
|
-
E11y::Metrics.increment(
|
|
100
|
-
:e11y_dlq_saves_total,
|
|
101
|
-
{ reason: reason }
|
|
102
|
-
)
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
# Track DLQ replay operation.
|
|
106
|
-
#
|
|
107
|
-
# @param status [String] Replay status ('success' or 'failure')
|
|
108
|
-
# @return [void]
|
|
109
|
-
def self.track_dlq_replay(status:)
|
|
110
|
-
E11y::Metrics.increment(
|
|
111
|
-
:e11y_dlq_replays_total,
|
|
112
|
-
{ status: status }
|
|
113
|
-
)
|
|
114
|
-
end
|
|
115
|
-
|
|
116
46
|
# Track circuit breaker state change.
|
|
117
47
|
#
|
|
118
48
|
# @param adapter_name [String] Adapter class name
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "yaml"
|
|
4
|
+
|
|
5
|
+
module E11y
|
|
6
|
+
module SLO
|
|
7
|
+
# Loads SLO configuration from slo.yml files in configurable search paths.
|
|
8
|
+
class ConfigLoader
|
|
9
|
+
class << self
|
|
10
|
+
def load(search_paths: default_search_paths)
|
|
11
|
+
search_paths.each do |base|
|
|
12
|
+
path = File.join(base.to_s, "slo.yml")
|
|
13
|
+
next unless File.file?(path)
|
|
14
|
+
|
|
15
|
+
content = File.read(path)
|
|
16
|
+
return YAML.safe_load(content) || {}
|
|
17
|
+
end
|
|
18
|
+
nil
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Returns true when e11y_self_monitoring.enabled is true in slo.yml.
|
|
22
|
+
#
|
|
23
|
+
# @return [Boolean]
|
|
24
|
+
def self_monitoring_enabled?
|
|
25
|
+
config = load
|
|
26
|
+
return false if config.nil?
|
|
27
|
+
|
|
28
|
+
config.dig("e11y_self_monitoring", "enabled") == true
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def default_search_paths
|
|
34
|
+
base = defined?(Rails) ? Rails.root.to_s : Dir.pwd
|
|
35
|
+
[File.join(base, "config"), File.join(base, "config", "e11y"), Dir.pwd]
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module E11y
|
|
4
|
+
module SLO
|
|
5
|
+
# Validates slo.yml schema: version, endpoints (controller/pattern), app_wide.aggregated_slo.
|
|
6
|
+
class ConfigValidator
|
|
7
|
+
class << self
|
|
8
|
+
def validate(config)
|
|
9
|
+
return ["Config is nil or empty"] if config.nil? || config.empty?
|
|
10
|
+
|
|
11
|
+
errors = []
|
|
12
|
+
errors.concat(validate_version(config))
|
|
13
|
+
errors.concat(validate_endpoints(config["endpoints"]))
|
|
14
|
+
errors.concat(validate_app_wide(config["app_wide"]))
|
|
15
|
+
errors.concat(validate_e11y_self_monitoring(config["e11y_self_monitoring"]))
|
|
16
|
+
errors
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def validate_version(config)
|
|
22
|
+
return ["Missing required key: version"] unless config.key?("version")
|
|
23
|
+
|
|
24
|
+
[]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def validate_endpoints(endpoints)
|
|
28
|
+
return [] if endpoints.nil? || endpoints.empty?
|
|
29
|
+
|
|
30
|
+
errors = []
|
|
31
|
+
endpoints.each_with_index do |ep, i|
|
|
32
|
+
errors << "endpoints[#{i}]: missing controller or pattern" if ep["controller"].to_s.empty? && ep["pattern"].to_s.empty?
|
|
33
|
+
end
|
|
34
|
+
errors
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def validate_app_wide(app_wide)
|
|
38
|
+
return [] if app_wide.nil?
|
|
39
|
+
|
|
40
|
+
agg = app_wide["aggregated_slo"]
|
|
41
|
+
return [] if agg.nil? || !agg["enabled"]
|
|
42
|
+
|
|
43
|
+
errors = []
|
|
44
|
+
errors << "app_wide.aggregated_slo: strategy required when enabled" if agg["strategy"].to_s.empty?
|
|
45
|
+
errors << "app_wide.aggregated_slo: components required" if agg["components"].to_a.empty?
|
|
46
|
+
errors
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def validate_e11y_self_monitoring(e11y_self_monitoring)
|
|
50
|
+
return [] if e11y_self_monitoring.nil? || !e11y_self_monitoring["enabled"]
|
|
51
|
+
|
|
52
|
+
# When enabled, targets structure is optional; no validation errors for now
|
|
53
|
+
[]
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module E11y
|
|
6
|
+
module SLO
|
|
7
|
+
# Generates Grafana dashboard JSON from SLO config (endpoints, event SLOs, app-wide).
|
|
8
|
+
class DashboardGenerator
|
|
9
|
+
class << self
|
|
10
|
+
def generate(config)
|
|
11
|
+
return "{}" if config.nil? || config.empty?
|
|
12
|
+
|
|
13
|
+
panels = []
|
|
14
|
+
panels.concat(build_endpoint_panels(config["endpoints"]))
|
|
15
|
+
panels.concat(build_app_wide_panels(config["app_wide"]))
|
|
16
|
+
panels.concat(build_event_slo_panels(config["custom_slos"])) if config["custom_slos"]
|
|
17
|
+
panels.concat(build_self_monitoring_panels(config["e11y_self_monitoring"]))
|
|
18
|
+
|
|
19
|
+
dashboard = {
|
|
20
|
+
title: "E11y SLO Dashboard",
|
|
21
|
+
panels: panels,
|
|
22
|
+
schemaVersion: 38,
|
|
23
|
+
version: 1
|
|
24
|
+
}
|
|
25
|
+
JSON.pretty_generate(dashboard)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def build_endpoint_panels(endpoints)
|
|
31
|
+
return [] if endpoints.to_a.empty?
|
|
32
|
+
|
|
33
|
+
[{
|
|
34
|
+
id: 1,
|
|
35
|
+
title: "HTTP Availability (Per-Endpoint)",
|
|
36
|
+
type: "timeseries",
|
|
37
|
+
targets: [{
|
|
38
|
+
expr: "sum(rate(e11y_slo_http_requests_total{status=~\"2..|3..\"}[30d])) " \
|
|
39
|
+
"by (controller, action) / sum(rate(e11y_slo_http_requests_total[30d])) by (controller, action)",
|
|
40
|
+
legendFormat: '{{controller}}#{{action}}'
|
|
41
|
+
}]
|
|
42
|
+
}]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def build_self_monitoring_panels(e11y_self_monitoring)
|
|
46
|
+
return [] if e11y_self_monitoring.nil? || !e11y_self_monitoring["enabled"]
|
|
47
|
+
|
|
48
|
+
[{
|
|
49
|
+
id: 200,
|
|
50
|
+
title: "E11y Self-Monitoring Reliability",
|
|
51
|
+
type: "timeseries",
|
|
52
|
+
targets: [{
|
|
53
|
+
expr: 'sum(rate(e11y_e11y_events_tracked_total{result="success"}[30d])) / sum(rate(e11y_e11y_events_tracked_total[30d]))',
|
|
54
|
+
legendFormat: "reliability"
|
|
55
|
+
}]
|
|
56
|
+
}]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def build_event_slo_panels(custom_slos)
|
|
60
|
+
custom_slos.map.with_index do |slo, i|
|
|
61
|
+
name = slo["name"] || "event_slo_#{i}"
|
|
62
|
+
{
|
|
63
|
+
id: 10 + i,
|
|
64
|
+
title: "Event SLO: #{name}",
|
|
65
|
+
type: "timeseries",
|
|
66
|
+
targets: [{
|
|
67
|
+
expr: "sum(rate(e11y_slo_event_result_total{slo_name=\"#{name}\",slo_status=\"success\"}[30d])) / " \
|
|
68
|
+
"sum(rate(e11y_slo_event_result_total{slo_name=\"#{name}\"}[30d]))",
|
|
69
|
+
legendFormat: "success_rate"
|
|
70
|
+
}]
|
|
71
|
+
}
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def build_app_wide_panels(app_wide)
|
|
76
|
+
return [] if app_wide.nil?
|
|
77
|
+
|
|
78
|
+
agg = app_wide["aggregated_slo"]
|
|
79
|
+
return [] if agg.nil? || !agg["enabled"]
|
|
80
|
+
|
|
81
|
+
components = agg["components"] || []
|
|
82
|
+
return [] if components.empty?
|
|
83
|
+
|
|
84
|
+
window = agg["window"] || "30d"
|
|
85
|
+
|
|
86
|
+
expr = case agg["strategy"].to_s
|
|
87
|
+
when "min"
|
|
88
|
+
build_min_expr(components, window)
|
|
89
|
+
else
|
|
90
|
+
build_weighted_expr(components, window)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
[{
|
|
94
|
+
id: 100,
|
|
95
|
+
title: "App-Wide Aggregated SLO",
|
|
96
|
+
type: "timeseries",
|
|
97
|
+
targets: [{ expr: expr, legendFormat: "aggregated" }],
|
|
98
|
+
fieldConfig: { defaults: { min: 0.99, max: 1.0 } }
|
|
99
|
+
}]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def build_weighted_expr(components, window)
|
|
103
|
+
parts = components.map do |c|
|
|
104
|
+
weight = c["weight"] || (1.0 / components.size)
|
|
105
|
+
metric = (c["metric"] || "").gsub(/\[\d+d\]/, "[#{window}]")
|
|
106
|
+
metric = metric.strip
|
|
107
|
+
"(#{weight} * (#{metric}))"
|
|
108
|
+
end
|
|
109
|
+
parts.join(" + ")
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def build_min_expr(components, window)
|
|
113
|
+
parts = components.map do |c|
|
|
114
|
+
metric = (c["metric"] || "").gsub(/\[\d+d\]/, "[#{window}]")
|
|
115
|
+
metric.strip
|
|
116
|
+
end
|
|
117
|
+
"min(#{parts.join(', ')})"
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|