allgood 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +23 -1
- data/app/controllers/allgood/healthcheck_controller.rb +48 -6
- data/examples/allgood.rb +53 -3
- data/lib/allgood/cache_store.rb +52 -0
- data/lib/allgood/configuration.rb +103 -0
- data/lib/allgood/version.rb +1 -1
- data/lib/allgood.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45bac1780ef5cb92516f0a6f02cd7aa7b476213d6595a937252c600dc3e870e0
|
4
|
+
data.tar.gz: b1f8f7e8e30609d67c6c28fb66cd651d13414f9161f74b3e3880f02113408c10
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b169c7d38987605312e2e013f645814e12fa23f2b0574793fa560f9f9ec9f97491eea5347c625e8b6d0909f3ec32c912a744925906f9e66b710486eb365cf35f
|
7
|
+
data.tar.gz: 0573a0b4918ec2e6e4a0ce40831d3e57afed34db5e8ada879d0db4450764dd0e72a8ffe4b3e779bf58758cf3d29546728600f9823437cdf1618627a48648fe5a
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## [0.3.0] - 2024-10-27
|
2
|
+
|
3
|
+
- Added rate limiting for expensive checks with the `run: "N times per day/hour"` option
|
4
|
+
- Added a cache mechanism to store check results and error states, which allows for rate limiting and avoiding redundant runs when checks fail
|
5
|
+
- Added automatic cache key expiration
|
6
|
+
- Added error handling and feedback for rate-limited checks
|
7
|
+
|
1
8
|
## [0.2.0] - 2024-10-26
|
2
9
|
|
3
10
|
- Improved the `allgood` DSL by adding optional conditionals on when individual checks are run
|
data/README.md
CHANGED
@@ -4,11 +4,13 @@
|
|
4
4
|
|
5
5
|
Add quick, simple, and beautiful health checks to your Rails application via a `/healthcheck` page.
|
6
6
|
|
7
|
+
Use it for smoke testing, to make sure your app is healthy and functioning as expected.
|
8
|
+
|
7
9
|
![Example dashboard of the Allgood health check page](allgood.jpeg)
|
8
10
|
|
9
11
|
## How it works
|
10
12
|
|
11
|
-
`allgood` allows you to define custom health checks (as in: can the Rails app connect to the DB, are there any new users in the past 24 hours, are they actually using the app, etc.) in a very intuitive way that reads just like English.
|
13
|
+
`allgood` allows you to define custom health checks / smoke tests (as in: can the Rails app connect to the DB, are there any new users in the past 24 hours, are they actually using the app, etc.) in a very intuitive way that reads just like English.
|
12
14
|
|
13
15
|
It provides a `/healthcheck` endpoint that displays the results in a beautiful page.
|
14
16
|
|
@@ -180,6 +182,26 @@ check "Complex check",
|
|
180
182
|
end
|
181
183
|
```
|
182
184
|
|
185
|
+
### Rate Limiting Expensive Checks
|
186
|
+
|
187
|
+
For expensive operations (like testing paid APIs), you can limit how often checks run:
|
188
|
+
|
189
|
+
```ruby
|
190
|
+
# Run expensive checks a limited number of times
|
191
|
+
check "OpenAI is responding with a valid LLM message", run: "2 times per day" do
|
192
|
+
# expensive API call
|
193
|
+
end
|
194
|
+
|
195
|
+
check "Analytics can be processed", run: "4 times per hour" do
|
196
|
+
# expensive operation
|
197
|
+
end
|
198
|
+
```
|
199
|
+
|
200
|
+
Important notes:
|
201
|
+
- Rate limits reset at the start of each period (hour/day)
|
202
|
+
- The error state persists between rate-limited runs
|
203
|
+
- Rate-limited checks show clear feedback about remaining runs and next reset time
|
204
|
+
|
183
205
|
When a check is skipped due to its conditions not being met, it will appear in the healthcheck page with a skip emoji (⏭️) and a clear explanation of why it was skipped.
|
184
206
|
|
185
207
|
![Example dashboard of the Allgood health check page with skipped checks](allgood_skipped.webp)
|
@@ -44,20 +44,62 @@ module Allgood
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def run_single_check(check)
|
47
|
+
last_result_key = "allgood:last_result:#{check[:name].parameterize}"
|
48
|
+
last_result = Allgood::CacheStore.instance.read(last_result_key)
|
49
|
+
|
50
|
+
unless Allgood.configuration.should_run_check?(check)
|
51
|
+
message = check[:skip_reason]
|
52
|
+
if last_result
|
53
|
+
status_info = "Last check #{last_result[:success] ? 'passed' : 'failed'} #{time_ago_in_words(last_result[:time])} ago: #{last_result[:message]}"
|
54
|
+
message = "#{message}. #{status_info}"
|
55
|
+
end
|
56
|
+
|
57
|
+
return {
|
58
|
+
name: check[:name],
|
59
|
+
success: last_result ? last_result[:success] : true,
|
60
|
+
skipped: true,
|
61
|
+
message: message,
|
62
|
+
duration: 0
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
47
66
|
start_time = Time.now
|
48
67
|
result = { success: false, message: "Check timed out after #{check[:timeout]} seconds" }
|
68
|
+
error_key = "allgood:error:#{check[:name].parameterize}"
|
49
69
|
|
50
70
|
begin
|
51
71
|
Timeout.timeout(check[:timeout]) do
|
52
72
|
check_result = Allgood.configuration.run_check(&check[:block])
|
53
73
|
result = { success: check_result[:success], message: check_result[:message] }
|
74
|
+
|
75
|
+
if result[:success]
|
76
|
+
# Clear error state and store successful result
|
77
|
+
Allgood::CacheStore.instance.write(error_key, nil)
|
78
|
+
Allgood::CacheStore.instance.write(last_result_key, {
|
79
|
+
success: true,
|
80
|
+
message: result[:message],
|
81
|
+
time: Time.current
|
82
|
+
})
|
83
|
+
end
|
84
|
+
end
|
85
|
+
rescue Timeout::Error, Allgood::CheckFailedError, StandardError => e
|
86
|
+
error_message = case e
|
87
|
+
when Timeout::Error
|
88
|
+
"Check timed out after #{check[:timeout]} seconds"
|
89
|
+
when Allgood::CheckFailedError
|
90
|
+
e.message
|
91
|
+
else
|
92
|
+
"Error: #{e.message}"
|
54
93
|
end
|
55
|
-
|
56
|
-
#
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
94
|
+
|
95
|
+
# Store error state and failed result
|
96
|
+
Allgood::CacheStore.instance.write(error_key, error_message)
|
97
|
+
Allgood::CacheStore.instance.write(last_result_key, {
|
98
|
+
success: false,
|
99
|
+
message: error_message,
|
100
|
+
time: Time.current
|
101
|
+
})
|
102
|
+
result = { success: false, message: error_message }
|
61
103
|
end
|
62
104
|
|
63
105
|
{
|
data/examples/allgood.rb
CHANGED
@@ -106,7 +106,10 @@ end
|
|
106
106
|
|
107
107
|
check "ActiveStorage can store images, retrieve them, and purge them" do
|
108
108
|
blob = ActiveStorage::Blob.create_and_upload!(io: StringIO.new(TEST_IMAGE), filename: "allgood-test-image-#{Time.now.to_i}.jpg", content_type: "image/jpeg")
|
109
|
-
|
109
|
+
blob_key = blob.key
|
110
|
+
make_sure blob.persisted? && blob.service.exist?(blob_key)
|
111
|
+
blob.purge
|
112
|
+
make_sure !blob.service.exist?(blob_key), "Image needs to be successfully stored, retrieved, and purged from #{ActiveStorage::Blob.service.name} (#{ActiveStorage::Blob.service.class.name})"
|
110
113
|
end
|
111
114
|
|
112
115
|
# --- CACHE ---
|
@@ -143,18 +146,65 @@ check "The percentage of failed jobs in the last 24 hours is less than 1%", only
|
|
143
146
|
end
|
144
147
|
end
|
145
148
|
|
149
|
+
# --- ACTION CABLE ---
|
150
|
+
|
151
|
+
check "ActionCable is configured and running" do
|
152
|
+
make_sure ActionCable.server.present?, "ActionCable server should be running"
|
153
|
+
end
|
154
|
+
|
155
|
+
check "ActionCable is configured to accept connections with a valid adapter" do
|
156
|
+
make_sure ActionCable.server.config.allow_same_origin_as_host, "ActionCable server should be configured to accept connections"
|
157
|
+
|
158
|
+
adapter = ActionCable.server.config.cable["adapter"]
|
159
|
+
|
160
|
+
if Rails.env.production?
|
161
|
+
make_sure adapter.in?(["solid_cable", "redis"]), "ActionCable running #{adapter} adapter in #{Rails.env.to_s}"
|
162
|
+
else
|
163
|
+
make_sure adapter.in?(["solid_cable", "async"]), "ActionCable running #{adapter} adapter in #{Rails.env.to_s}"
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
check "ActionCable can broadcast messages and store them in SolidCable" do
|
168
|
+
test_message = "allgood_test_#{Time.now.to_i}"
|
169
|
+
|
170
|
+
begin
|
171
|
+
ActionCable.server.broadcast("allgood_test_channel", { message: test_message })
|
172
|
+
|
173
|
+
# Verify message was stored in SolidCable
|
174
|
+
message = SolidCable::Message.where(channel: "allgood_test_channel")
|
175
|
+
.order(created_at: :desc)
|
176
|
+
.first
|
177
|
+
|
178
|
+
make_sure message.present?, "Message should be stored in SolidCable"
|
179
|
+
make_sure message.payload.include?(test_message) && message.destroy, "Message payload should contain our test message"
|
180
|
+
rescue => e
|
181
|
+
make_sure false, "Failed to broadcast/verify message: #{e.message}"
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
146
185
|
# --- SYSTEM ---
|
147
186
|
|
148
|
-
check "Disk space usage is below 90%" do
|
187
|
+
check "Disk space usage is below 90%", only: :production do
|
149
188
|
usage = `df -h / | tail -1 | awk '{print $5}' | sed 's/%//'`.to_i
|
150
189
|
expect(usage).to_be_less_than(90)
|
151
190
|
end
|
152
191
|
|
153
|
-
check "Memory usage is below 90%" do
|
192
|
+
check "Memory usage is below 90%", only: :production do
|
154
193
|
usage = `free | grep Mem | awk '{print $3/$2 * 100.0}' | cut -d. -f1`.to_i
|
155
194
|
expect(usage).to_be_less_than(90)
|
156
195
|
end
|
157
196
|
|
197
|
+
# --- SITEMAP ---
|
198
|
+
|
199
|
+
check "The sitemap generator is available" do
|
200
|
+
make_sure SitemapGenerator.present?
|
201
|
+
end
|
202
|
+
|
203
|
+
check "sitemap.xml.gz exists", only: :production do
|
204
|
+
make_sure File.exist?(Rails.public_path.join("sitemap.xml.gz"))
|
205
|
+
end
|
206
|
+
|
207
|
+
|
158
208
|
# --- USAGE-DEPENDENT CHECKS ---
|
159
209
|
|
160
210
|
check "SolidQueue has processed jobs in the last 24 hours", only: :production do
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Allgood
|
4
|
+
class CacheStore
|
5
|
+
def self.instance
|
6
|
+
@instance ||= new
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@memory_store = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def read(key)
|
14
|
+
if rails_cache_available?
|
15
|
+
Rails.cache.read(key)
|
16
|
+
else
|
17
|
+
@memory_store[key]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def write(key, value)
|
22
|
+
if rails_cache_available?
|
23
|
+
expiry = key.include?('day') ? 1.day : 1.hour
|
24
|
+
Rails.cache.write(key, value, expires_in: expiry)
|
25
|
+
else
|
26
|
+
@memory_store[key] = value
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def cleanup_old_keys
|
31
|
+
return unless rails_cache_available?
|
32
|
+
|
33
|
+
keys_pattern = "allgood:*"
|
34
|
+
if Rails.cache.respond_to?(:delete_matched)
|
35
|
+
Rails.cache.delete_matched("#{keys_pattern}:*:#{(Time.current - 2.days).strftime('%Y-%m-%d')}*")
|
36
|
+
end
|
37
|
+
rescue StandardError => e
|
38
|
+
Rails.logger.warn "Allgood: Failed to cleanup old cache keys: #{e.message}"
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def rails_cache_available?
|
44
|
+
Rails.cache && Rails.cache.respond_to?(:read) && Rails.cache.respond_to?(:write) &&
|
45
|
+
Rails.cache.write("allgood_rails_cache_test_ok", "true") &&
|
46
|
+
Rails.cache.read("allgood_rails_cache_test_ok") == "true"
|
47
|
+
rescue StandardError => e
|
48
|
+
Rails.logger.warn "Allgood: Rails.cache not available (#{e.message}), falling back to memory store"
|
49
|
+
false
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -17,6 +17,18 @@ module Allgood
|
|
17
17
|
status: :pending
|
18
18
|
}
|
19
19
|
|
20
|
+
# Handle rate limiting
|
21
|
+
if options[:run]
|
22
|
+
begin
|
23
|
+
check_info[:rate] = parse_run_frequency(options[:run])
|
24
|
+
rescue ArgumentError => e
|
25
|
+
check_info[:status] = :skipped
|
26
|
+
check_info[:skip_reason] = "Invalid run frequency: #{e.message}"
|
27
|
+
@checks << check_info
|
28
|
+
return
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
20
32
|
# Handle environment-specific options
|
21
33
|
if options[:only]
|
22
34
|
environments = Array(options[:only])
|
@@ -66,6 +78,97 @@ module Allgood
|
|
66
78
|
def run_check(&block)
|
67
79
|
CheckRunner.new.instance_eval(&block)
|
68
80
|
end
|
81
|
+
|
82
|
+
def should_run_check?(check)
|
83
|
+
return true unless check[:rate]
|
84
|
+
|
85
|
+
cache_key = "allgood:last_run:#{check[:name].parameterize}"
|
86
|
+
runs_key = "allgood:runs_count:#{check[:name].parameterize}:#{current_period(check[:rate])}"
|
87
|
+
error_key = "allgood:error:#{check[:name].parameterize}"
|
88
|
+
last_result_key = "allgood:last_result:#{check[:name].parameterize}"
|
89
|
+
|
90
|
+
last_run = Allgood::CacheStore.instance.read(cache_key)
|
91
|
+
period_runs = Allgood::CacheStore.instance.read(runs_key).to_i
|
92
|
+
last_result = Allgood::CacheStore.instance.read(last_result_key)
|
93
|
+
|
94
|
+
current_period_key = current_period(check[:rate])
|
95
|
+
stored_period = Allgood::CacheStore.instance.read("allgood:current_period:#{check[:name].parameterize}")
|
96
|
+
|
97
|
+
# If we're in a new period, reset the counter
|
98
|
+
if stored_period != current_period_key
|
99
|
+
period_runs = 0
|
100
|
+
Allgood::CacheStore.instance.write("allgood:current_period:#{check[:name].parameterize}", current_period_key)
|
101
|
+
Allgood::CacheStore.instance.write(runs_key, 0)
|
102
|
+
end
|
103
|
+
|
104
|
+
# If there's an error, wait until next period
|
105
|
+
if previous_error = Allgood::CacheStore.instance.read(error_key)
|
106
|
+
next_period = next_period_start(check[:rate])
|
107
|
+
rate_info = "Rate limited (#{period_runs}/#{check[:rate][:max_runs]} runs this #{check[:rate][:period]})"
|
108
|
+
check[:skip_reason] = "#{rate_info}. Waiting until #{next_period.strftime('%H:%M:%S %Z')} to retry failed check"
|
109
|
+
return false
|
110
|
+
end
|
111
|
+
|
112
|
+
# If we haven't exceeded the max runs for this period
|
113
|
+
if period_runs < check[:rate][:max_runs]
|
114
|
+
Allgood::CacheStore.instance.write(cache_key, Time.current)
|
115
|
+
Allgood::CacheStore.instance.write(runs_key, period_runs + 1)
|
116
|
+
true
|
117
|
+
else
|
118
|
+
next_period = next_period_start(check[:rate])
|
119
|
+
rate_info = "Rate limited (#{period_runs}/#{check[:rate][:max_runs]} runs this #{check[:rate][:period]})"
|
120
|
+
next_run = "Next check at #{next_period.strftime('%H:%M:%S %Z')}"
|
121
|
+
check[:skip_reason] = "#{rate_info}. #{next_run}"
|
122
|
+
false
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
private
|
127
|
+
|
128
|
+
def parse_run_frequency(frequency)
|
129
|
+
case frequency.to_s.downcase
|
130
|
+
when /(\d+)\s+times?\s+per\s+(day|hour)/i
|
131
|
+
max_runs, period = $1.to_i, $2
|
132
|
+
if max_runs <= 0
|
133
|
+
raise ArgumentError, "Number of runs must be positive"
|
134
|
+
end
|
135
|
+
if max_runs > 1000
|
136
|
+
raise ArgumentError, "Maximum 1000 runs per period allowed"
|
137
|
+
end
|
138
|
+
{ max_runs: max_runs, period: period }
|
139
|
+
else
|
140
|
+
raise ArgumentError, "Unsupported frequency format. Use 'N times per day' or 'N times per hour'"
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def current_period(rate)
|
145
|
+
case rate[:period]
|
146
|
+
when 'day'
|
147
|
+
Time.current.strftime('%Y-%m-%d')
|
148
|
+
when 'hour'
|
149
|
+
Time.current.strftime('%Y-%m-%d-%H')
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def new_period?(last_run, rate)
|
154
|
+
case rate[:period]
|
155
|
+
when 'day'
|
156
|
+
!last_run.to_date.equal?(Time.current.to_date)
|
157
|
+
when 'hour'
|
158
|
+
last_run.strftime('%Y-%m-%d-%H') != Time.current.strftime('%Y-%m-%d-%H')
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def next_period_start(rate)
|
163
|
+
case rate[:period]
|
164
|
+
when 'day'
|
165
|
+
Time.current.beginning_of_day + 1.day
|
166
|
+
when 'hour'
|
167
|
+
Time.current.beginning_of_hour + 1.hour
|
168
|
+
else
|
169
|
+
raise ArgumentError, "Unsupported period: #{rate[:period]}"
|
170
|
+
end
|
171
|
+
end
|
69
172
|
end
|
70
173
|
|
71
174
|
class CheckRunner
|
data/lib/allgood/version.rb
CHANGED
data/lib/allgood.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: allgood
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- rameerez
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -48,6 +48,7 @@ files:
|
|
48
48
|
- config/routes.rb
|
49
49
|
- examples/allgood.rb
|
50
50
|
- lib/allgood.rb
|
51
|
+
- lib/allgood/cache_store.rb
|
51
52
|
- lib/allgood/configuration.rb
|
52
53
|
- lib/allgood/engine.rb
|
53
54
|
- lib/allgood/version.rb
|
@@ -75,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
76
|
- !ruby/object:Gem::Version
|
76
77
|
version: '0'
|
77
78
|
requirements: []
|
78
|
-
rubygems_version: 3.5.
|
79
|
+
rubygems_version: 3.5.22
|
79
80
|
signing_key:
|
80
81
|
specification_version: 4
|
81
82
|
summary: Add quick, simple, and beautiful health checks to your Rails application.
|