rails_health_checks 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +47 -2
- data/app/controllers/rails_health_checks/application_controller.rb +8 -0
- data/app/controllers/rails_health_checks/groups_controller.rb +14 -0
- data/app/controllers/rails_health_checks/health_controller.rb +0 -26
- data/app/controllers/rails_health_checks/live_controller.rb +14 -0
- data/app/controllers/rails_health_checks/metrics_controller.rb +12 -0
- data/config/routes.rb +4 -3
- data/lib/rails_health_checks/check.rb +1 -0
- data/lib/rails_health_checks/check_registry.rb +26 -2
- data/lib/rails_health_checks/configuration.rb +2 -1
- data/lib/rails_health_checks/prometheus_formatter.rb +28 -0
- data/lib/rails_health_checks/version.rb +1 -1
- data/lib/rails_health_checks.rb +1 -0
- metadata +5 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 010ea2d8734800da5baaaa805b0671ac685f3c5a26a11967ca64f00cfd03500c
|
|
4
|
+
data.tar.gz: 7f81bbd9940d17d7b67f779da47b6fe42796ab6542080fcf2a790976c9e83be5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f6da67674aa9871b760450a01834a09b1bdebcbacbd21c330c60d2a6c98725b331321c3e187e7741269f414b11562b1dcb48ea99ec8d8face847677c678bbdd0
|
|
7
|
+
data.tar.gz: f5cf9df5e54de04defd67440c8d838f7c7a085f2712351b1e79920975d64d5c957c43d02cf5de3497ef25ae4ee77ac8a6d8e52d32c682fb0158e753d958a683b
|
data/README.md
CHANGED
|
@@ -15,6 +15,8 @@ A Rails engine providing structured, pluggable health check endpoints for monito
|
|
|
15
15
|
- [Configuration](#configuration)
|
|
16
16
|
- [Authentication](#authentication)
|
|
17
17
|
- [Built-in Checks](#built-in-checks)
|
|
18
|
+
- [Notifications](#notifications)
|
|
19
|
+
- [Prometheus Metrics](#prometheus-metrics)
|
|
18
20
|
- [Per-Environment Toggling](#per-environment-toggling)
|
|
19
21
|
- [Check Groups](#check-groups)
|
|
20
22
|
- [Custom Checks](#custom-checks)
|
|
@@ -53,8 +55,9 @@ mount RailsHealthChecks::Engine => "/health"
|
|
|
53
55
|
|----------|--------|----------|
|
|
54
56
|
| `GET /health` | JSON | Monitoring dashboards, detailed diagnostics |
|
|
55
57
|
| `GET /health/live` | Plain text | Load balancer liveness probes |
|
|
58
|
+
| `GET /health/metrics` | Prometheus text | Prometheus / OpenMetrics scraping |
|
|
56
59
|
|
|
57
|
-
HTTP status is `200 OK` when all checks pass, `503 Service Unavailable` otherwise.
|
|
60
|
+
HTTP status is `200 OK` when all checks pass, `503 Service Unavailable` otherwise (except `/metrics` which always returns `200`).
|
|
58
61
|
|
|
59
62
|
### JSON response shape
|
|
60
63
|
|
|
@@ -142,6 +145,44 @@ The block receives the `ActionDispatch::Request` object and must return a truthy
|
|
|
142
145
|
|
|
143
146
|
---
|
|
144
147
|
|
|
148
|
+
## Notifications
|
|
149
|
+
|
|
150
|
+
Every health check run publishes an `ActiveSupport::Notifications` event:
|
|
151
|
+
|
|
152
|
+
```ruby
|
|
153
|
+
ActiveSupport::Notifications.subscribe("health_check.rails_health_checks") do |*args|
|
|
154
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
|
155
|
+
Rails.logger.info "Health check: #{event.payload[:status]} (#{event.duration.round}ms)"
|
|
156
|
+
# event.payload[:checks] => { database: { status: "ok", latency_ms: 3 }, ... }
|
|
157
|
+
end
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
The payload includes `status` (overall: `ok`/`degraded`/`critical`) and `checks` (per-check hash with `status`, `latency_ms`, and `message` when present). Duration is measured over the entire parallel check run.
|
|
161
|
+
|
|
162
|
+
[↑ Back to top](#table-of-contents)
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Prometheus Metrics
|
|
167
|
+
|
|
168
|
+
`GET /health/metrics` returns Prometheus text exposition format (`text/plain; version=0.0.4`). This endpoint always returns HTTP 200 — Prometheus convention is that scrape targets should always respond successfully, with check state encoded in metric values.
|
|
169
|
+
|
|
170
|
+
```
|
|
171
|
+
# HELP rails_health_check_status Health check status (0=ok, 1=degraded, 2=critical)
|
|
172
|
+
# TYPE rails_health_check_status gauge
|
|
173
|
+
rails_health_check_status{check="database"} 0
|
|
174
|
+
rails_health_check_status{check="cache"} 0
|
|
175
|
+
|
|
176
|
+
# HELP rails_health_check_latency_ms Health check latency in milliseconds
|
|
177
|
+
# TYPE rails_health_check_latency_ms gauge
|
|
178
|
+
rails_health_check_latency_ms{check="database"} 4
|
|
179
|
+
rails_health_check_latency_ms{check="cache"} 2
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
[↑ Back to top](#table-of-contents)
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
145
186
|
## Per-Environment Toggling
|
|
146
187
|
|
|
147
188
|
Disable specific checks in specific environments:
|
|
@@ -199,7 +240,11 @@ RailsHealthChecks.configure do |config|
|
|
|
199
240
|
end
|
|
200
241
|
```
|
|
201
242
|
|
|
202
|
-
`config.register` automatically adds the check to the active checks list.
|
|
243
|
+
`config.register` automatically adds the check to the active checks list. Pass `timeout:` to override the global timeout for this check only:
|
|
244
|
+
|
|
245
|
+
```ruby
|
|
246
|
+
config.register :slow_api, MyApiCheck.new, timeout: 10
|
|
247
|
+
``` Use `pass`, `warn_with`, and `fail_with` (inherited from `Check`) to set status, and `measure { }` to record latency.
|
|
203
248
|
|
|
204
249
|
[↑ Back to top](#table-of-contents)
|
|
205
250
|
|
|
@@ -4,5 +4,13 @@ module RailsHealthChecks
|
|
|
4
4
|
class ApplicationController < ActionController::API
|
|
5
5
|
include Authentication
|
|
6
6
|
before_action :authenticate!
|
|
7
|
+
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
def run_checks(check_names)
|
|
11
|
+
config = RailsHealthChecks.configuration
|
|
12
|
+
checks = CheckRegistry.build(check_names)
|
|
13
|
+
CheckRegistry.run(checks, timeout: config.timeout)
|
|
14
|
+
end
|
|
7
15
|
end
|
|
8
16
|
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsHealthChecks
|
|
4
|
+
class GroupsController < ApplicationController
|
|
5
|
+
def show
|
|
6
|
+
group_name = params[:id].to_sym
|
|
7
|
+
check_names = RailsHealthChecks.configuration.groups[group_name]
|
|
8
|
+
return render json: { error: "Group '#{group_name}' not found" }, status: :not_found unless check_names
|
|
9
|
+
|
|
10
|
+
builder = ResponseBuilder.new(run_checks(check_names))
|
|
11
|
+
render json: builder.to_json, status: builder.http_status
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -6,31 +6,5 @@ module RailsHealthChecks
|
|
|
6
6
|
builder = ResponseBuilder.new(run_checks(RailsHealthChecks.configuration.checks))
|
|
7
7
|
render json: builder.to_json, status: builder.http_status
|
|
8
8
|
end
|
|
9
|
-
|
|
10
|
-
def live
|
|
11
|
-
builder = ResponseBuilder.new(run_checks(RailsHealthChecks.configuration.checks))
|
|
12
|
-
if builder.overall_status == "ok"
|
|
13
|
-
render plain: "OK", status: :ok
|
|
14
|
-
else
|
|
15
|
-
render plain: "Service Unavailable", status: :service_unavailable
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
def group
|
|
20
|
-
group_name = params[:group].to_sym
|
|
21
|
-
check_names = RailsHealthChecks.configuration.groups[group_name]
|
|
22
|
-
return render json: { error: "Group '#{group_name}' not found" }, status: :not_found unless check_names
|
|
23
|
-
|
|
24
|
-
builder = ResponseBuilder.new(run_checks(check_names))
|
|
25
|
-
render json: builder.to_json, status: builder.http_status
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
private
|
|
29
|
-
|
|
30
|
-
def run_checks(check_names)
|
|
31
|
-
config = RailsHealthChecks.configuration
|
|
32
|
-
checks = CheckRegistry.build(check_names)
|
|
33
|
-
CheckRegistry.run(checks, timeout: config.timeout)
|
|
34
|
-
end
|
|
35
9
|
end
|
|
36
10
|
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsHealthChecks
|
|
4
|
+
class LiveController < ApplicationController
|
|
5
|
+
def show
|
|
6
|
+
builder = ResponseBuilder.new(run_checks(RailsHealthChecks.configuration.checks))
|
|
7
|
+
if builder.overall_status == "ok"
|
|
8
|
+
render plain: "OK", status: :ok
|
|
9
|
+
else
|
|
10
|
+
render plain: "Service Unavailable", status: :service_unavailable
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsHealthChecks
|
|
4
|
+
class MetricsController < ApplicationController
|
|
5
|
+
def show
|
|
6
|
+
results = run_checks(RailsHealthChecks.configuration.checks)
|
|
7
|
+
render plain: PrometheusFormatter.new(results).to_text,
|
|
8
|
+
content_type: "text/plain; version=0.0.4",
|
|
9
|
+
status: :ok
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
data/config/routes.rb
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
RailsHealthChecks::Engine.routes.draw do
|
|
4
|
-
get "/",
|
|
5
|
-
get "/live",
|
|
6
|
-
get "
|
|
4
|
+
get "/", to: "health#show", as: :health
|
|
5
|
+
get "/live", to: "live#show", as: :health_live
|
|
6
|
+
get "/metrics", to: "metrics#show", as: :health_metrics
|
|
7
|
+
get "/:id", to: "groups#show", as: :health_group
|
|
7
8
|
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "timeout"
|
|
4
|
+
require "concurrent"
|
|
4
5
|
|
|
5
6
|
module RailsHealthChecks
|
|
6
7
|
class CheckRegistry
|
|
@@ -38,7 +39,22 @@ module RailsHealthChecks
|
|
|
38
39
|
end
|
|
39
40
|
|
|
40
41
|
def self.run(checks, timeout:)
|
|
41
|
-
|
|
42
|
+
results = {}
|
|
43
|
+
ActiveSupport::Notifications.instrument("health_check.rails_health_checks") do |payload|
|
|
44
|
+
futures = checks.transform_values do |check|
|
|
45
|
+
t = check.timeout || timeout
|
|
46
|
+
Concurrent::Future.execute { run_check(check, timeout: t) }
|
|
47
|
+
end
|
|
48
|
+
checks.each do |name, check|
|
|
49
|
+
t = check.timeout || timeout
|
|
50
|
+
results[name] = futures[name].value(t + 1) || mark_critical(check, "timed out")
|
|
51
|
+
end
|
|
52
|
+
payload[:status] = overall_status(results)
|
|
53
|
+
payload[:checks] = results.transform_values do |c|
|
|
54
|
+
{ status: c.status, message: c.message, latency_ms: c.latency_ms }.compact
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
results
|
|
42
58
|
end
|
|
43
59
|
|
|
44
60
|
def self.run_check(check, timeout:)
|
|
@@ -56,6 +72,14 @@ module RailsHealthChecks
|
|
|
56
72
|
check
|
|
57
73
|
end
|
|
58
74
|
|
|
59
|
-
|
|
75
|
+
def self.overall_status(results)
|
|
76
|
+
statuses = results.values.map(&:status)
|
|
77
|
+
if statuses.include?("critical") then "critical"
|
|
78
|
+
elsif statuses.include?("degraded") then "degraded"
|
|
79
|
+
else "ok"
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
private_class_method :run_check, :mark_critical, :overall_status
|
|
60
84
|
end
|
|
61
85
|
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsHealthChecks
|
|
4
|
+
class PrometheusFormatter
|
|
5
|
+
STATUS_CODES = { "ok" => 0, "degraded" => 1, "critical" => 2 }.freeze
|
|
6
|
+
|
|
7
|
+
def initialize(results)
|
|
8
|
+
@results = results
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_text
|
|
12
|
+
lines = []
|
|
13
|
+
|
|
14
|
+
lines << "# HELP rails_health_check_status Health check status (0=ok, 1=degraded, 2=critical)"
|
|
15
|
+
lines << "# TYPE rails_health_check_status gauge"
|
|
16
|
+
@results.each { |name, check| lines << "rails_health_check_status{check=\"#{name}\"} #{STATUS_CODES[check.status]}" }
|
|
17
|
+
|
|
18
|
+
lines << ""
|
|
19
|
+
lines << "# HELP rails_health_check_latency_ms Health check latency in milliseconds"
|
|
20
|
+
lines << "# TYPE rails_health_check_latency_ms gauge"
|
|
21
|
+
@results.each do |name, check|
|
|
22
|
+
lines << "rails_health_check_latency_ms{check=\"#{name}\"} #{check.latency_ms}" if check.latency_ms
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
lines.join("\n") + "\n"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
data/lib/rails_health_checks.rb
CHANGED
|
@@ -16,6 +16,7 @@ require "rails_health_checks/checks/memory_check"
|
|
|
16
16
|
require "rails_health_checks/checks/http_check"
|
|
17
17
|
require "rails_health_checks/check_registry"
|
|
18
18
|
require "rails_health_checks/response_builder"
|
|
19
|
+
require "rails_health_checks/prometheus_formatter"
|
|
19
20
|
|
|
20
21
|
module RailsHealthChecks
|
|
21
22
|
class << self
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rails_health_checks
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Chuck Smith
|
|
@@ -35,7 +35,10 @@ files:
|
|
|
35
35
|
- README.md
|
|
36
36
|
- Rakefile
|
|
37
37
|
- app/controllers/rails_health_checks/application_controller.rb
|
|
38
|
+
- app/controllers/rails_health_checks/groups_controller.rb
|
|
38
39
|
- app/controllers/rails_health_checks/health_controller.rb
|
|
40
|
+
- app/controllers/rails_health_checks/live_controller.rb
|
|
41
|
+
- app/controllers/rails_health_checks/metrics_controller.rb
|
|
39
42
|
- app/jobs/rails_health_checks/application_job.rb
|
|
40
43
|
- app/mailers/rails_health_checks/application_mailer.rb
|
|
41
44
|
- app/models/rails_health_checks/application_record.rb
|
|
@@ -55,6 +58,7 @@ files:
|
|
|
55
58
|
- lib/rails_health_checks/checks/solid_queue_check.rb
|
|
56
59
|
- lib/rails_health_checks/configuration.rb
|
|
57
60
|
- lib/rails_health_checks/engine.rb
|
|
61
|
+
- lib/rails_health_checks/prometheus_formatter.rb
|
|
58
62
|
- lib/rails_health_checks/response_builder.rb
|
|
59
63
|
- lib/rails_health_checks/version.rb
|
|
60
64
|
- lib/tasks/rails_health_checks_tasks.rake
|