solid_observer 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/README.md +195 -82
- data/app/assets/javascripts/solid_observer/live_poll.js +3 -1
- data/app/controllers/solid_observer/application_controller.rb +1 -0
- data/app/controllers/solid_observer/cable_dashboard_controller.rb +52 -0
- data/app/controllers/solid_observer/cable_operations_controller.rb +16 -0
- data/app/controllers/solid_observer/cache_dashboard_controller.rb +52 -0
- data/app/controllers/solid_observer/cache_operations_controller.rb +24 -0
- data/app/controllers/solid_observer/dashboard_controller.rb +38 -1
- data/app/controllers/solid_observer/storages_controller.rb +1 -1
- data/app/helpers/solid_observer/application_helper.rb +268 -5
- data/app/helpers/solid_observer/dashboard_helper.rb +30 -11
- data/app/models/solid_observer/cable_event.rb +13 -0
- data/app/models/solid_observer/cable_metric.rb +12 -0
- data/app/models/solid_observer/cache_event.rb +15 -0
- data/app/models/solid_observer/cache_metric.rb +13 -0
- data/app/models/solid_observer/storage_info.rb +4 -1
- data/app/views/layouts/solid_observer/application.html.erb +157 -19
- data/app/views/solid_observer/cable_dashboard/_charts.html.erb +31 -0
- data/app/views/solid_observer/cable_dashboard/_recent_events.html.erb +34 -0
- data/app/views/solid_observer/cable_dashboard/_summary.html.erb +34 -0
- data/app/views/solid_observer/cable_dashboard/index.html.erb +118 -0
- data/app/views/solid_observer/cache_dashboard/_charts.html.erb +40 -0
- data/app/views/solid_observer/cache_dashboard/_recent_events.html.erb +34 -0
- data/app/views/solid_observer/cache_dashboard/_summary.html.erb +39 -0
- data/app/views/solid_observer/cache_dashboard/index.html.erb +62 -0
- data/app/views/solid_observer/cache_operations/_confirm_clear.html.erb +6 -0
- data/app/views/solid_observer/cache_operations/index.html.erb +60 -0
- data/app/views/solid_observer/dashboard/_queue_table.html.erb +1 -0
- data/app/views/solid_observer/dashboard/index.html.erb +32 -5
- data/app/views/solid_observer/events/index.html.erb +1 -0
- data/app/views/solid_observer/jobs/index.html.erb +1 -0
- data/app/views/solid_observer/jobs/show.html.erb +3 -3
- data/app/views/solid_observer/storages/show.html.erb +90 -32
- data/config/routes.rb +7 -0
- data/db/migrate/20260601000001_create_solid_observer_cache_events.rb +22 -0
- data/db/migrate/20260601000002_create_solid_observer_cache_metrics.rb +18 -0
- data/db/migrate/20260602000001_add_component_to_solid_observer_storage_infos.rb +8 -0
- data/db/migrate/20260612000001_add_event_type_recorded_at_index_to_cache_events.rb +21 -0
- data/db/migrate/20260619000001_create_solid_observer_cable_events.rb +22 -0
- data/db/migrate/20260619000002_create_solid_observer_cable_metrics.rb +17 -0
- data/lib/generators/solid_observer/install_generator.rb +8 -1
- data/lib/generators/solid_observer/templates/initializer.rb.tt +20 -4
- data/lib/solid_observer/base_event.rb +1 -1
- data/lib/solid_observer/base_metric.rb +1 -1
- data/lib/solid_observer/base_record.rb +8 -0
- data/lib/solid_observer/cable_event_buffer.rb +28 -0
- data/lib/solid_observer/cable_metric_buffer.rb +230 -0
- data/lib/solid_observer/cable_subscriber.rb +57 -0
- data/lib/solid_observer/cache_event_buffer.rb +28 -0
- data/lib/solid_observer/cache_metric_buffer.rb +229 -0
- data/lib/solid_observer/cache_subscriber.rb +47 -0
- data/lib/solid_observer/chart_buffer.rb +84 -27
- data/lib/solid_observer/cli/storage.rb +16 -13
- data/lib/solid_observer/configuration.rb +67 -5
- data/lib/solid_observer/engine.rb +70 -15
- data/lib/solid_observer/event_buffer_core.rb +218 -0
- data/lib/solid_observer/queue_event_buffer.rb +9 -201
- data/lib/solid_observer/services/cable_operations.rb +74 -0
- data/lib/solid_observer/services/cable_stats.rb +385 -0
- data/lib/solid_observer/services/cache_operations.rb +115 -0
- data/lib/solid_observer/services/cache_stats.rb +346 -0
- data/lib/solid_observer/services/cleanup_storage.rb +98 -47
- data/lib/solid_observer/services/database_size.rb +13 -8
- data/lib/solid_observer/services/flush_cable_event_buffer.rb +54 -0
- data/lib/solid_observer/services/flush_cable_metrics.rb +54 -0
- data/lib/solid_observer/services/flush_cache_event_buffer.rb +54 -0
- data/lib/solid_observer/services/flush_cache_metrics.rb +56 -0
- data/lib/solid_observer/services/record_cable_event.rb +114 -0
- data/lib/solid_observer/services/record_cable_metric.rb +73 -0
- data/lib/solid_observer/services/record_cache_event.rb +165 -0
- data/lib/solid_observer/services/record_cache_metric.rb +66 -0
- data/lib/solid_observer/services/storage_info_snapshot.rb +216 -0
- data/lib/solid_observer/version.rb +1 -1
- data/lib/solid_observer.rb +36 -11
- data/lib/tasks/solid_observer.rake +111 -21
- metadata +47 -5
- data/bin/console +0 -11
- data/bin/quality_gate +0 -95
- data/bin/setup +0 -8
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidObserver
|
|
4
|
+
module Services
|
|
5
|
+
class CacheOperations
|
|
6
|
+
MESSAGES = {
|
|
7
|
+
clear: {
|
|
8
|
+
confirmation: "Clear all SolidCache entries? This evicts cached application data and may slow requests while the cache rebuilds. This cannot be undone.",
|
|
9
|
+
success: "Cache cleared successfully.",
|
|
10
|
+
failure: "Cache clear failed. SolidCache is unavailable or rejected the operation."
|
|
11
|
+
}.freeze,
|
|
12
|
+
prune: {
|
|
13
|
+
success: "Expired cache entries pruned successfully.",
|
|
14
|
+
failure: "Cache prune failed. SolidCache is unavailable or rejected the operation."
|
|
15
|
+
}.freeze,
|
|
16
|
+
unavailable: "Cache controls are unavailable because SolidCache is not enabled or not detected."
|
|
17
|
+
}.freeze
|
|
18
|
+
|
|
19
|
+
class << self
|
|
20
|
+
def available?
|
|
21
|
+
new.available?
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def clear
|
|
25
|
+
new.clear
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def prune
|
|
29
|
+
new.prune
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def message(operation, key = nil)
|
|
33
|
+
return MESSAGES.fetch(:unavailable) if operation == :unavailable
|
|
34
|
+
|
|
35
|
+
MESSAGES.fetch(operation).fetch(key)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def unavailable_message
|
|
39
|
+
message(:unavailable)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def available?
|
|
44
|
+
SolidObserver.config.solid_cache_enabled? && compatible_store?
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def clear
|
|
48
|
+
messages = self.class
|
|
49
|
+
return {ok: false, message: messages.unavailable_message} unless available?
|
|
50
|
+
|
|
51
|
+
perform_operation(
|
|
52
|
+
:clear,
|
|
53
|
+
success_message: messages.message(:clear, :success),
|
|
54
|
+
failure_message: messages.message(:clear, :failure)
|
|
55
|
+
) do
|
|
56
|
+
cache_store.clear
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def prune
|
|
61
|
+
messages = self.class
|
|
62
|
+
return {ok: false, message: messages.unavailable_message} unless available?
|
|
63
|
+
|
|
64
|
+
perform_operation(
|
|
65
|
+
:prune,
|
|
66
|
+
success_message: messages.message(:prune, :success),
|
|
67
|
+
failure_message: messages.message(:prune, :failure)
|
|
68
|
+
) do
|
|
69
|
+
prune_with_fallback
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def compatible_store?
|
|
76
|
+
defined?(::SolidCache::Store) && cache_store.is_a?(::SolidCache::Store)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def cache_store
|
|
80
|
+
Rails.cache
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def perform_operation(name, success_message:, failure_message:)
|
|
84
|
+
yield
|
|
85
|
+
{ok: true, message: success_message}
|
|
86
|
+
rescue => error
|
|
87
|
+
log_failure(name, error)
|
|
88
|
+
{ok: false, message: failure_message}
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def prune_with_fallback
|
|
92
|
+
cache_store.cleanup
|
|
93
|
+
rescue NotImplementedError
|
|
94
|
+
prune_with_solid_cache_fallback
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def prune_with_solid_cache_fallback
|
|
98
|
+
cache_store.with_each_connection do
|
|
99
|
+
::SolidCache::Entry.expire(
|
|
100
|
+
cache_store.expiry_batch_size,
|
|
101
|
+
max_age: cache_store.max_age,
|
|
102
|
+
max_entries: cache_store.max_entries,
|
|
103
|
+
max_size: cache_store.max_size
|
|
104
|
+
)
|
|
105
|
+
end
|
|
106
|
+
rescue NameError
|
|
107
|
+
raise "cleanup unsupported"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def log_failure(name, error)
|
|
111
|
+
Rails.logger&.warn("[SolidObserver] Cache #{name} failed: #{error.class}")
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidObserver
|
|
4
|
+
module Services
|
|
5
|
+
class CacheStats
|
|
6
|
+
RANGES = {
|
|
7
|
+
"15m" => 15.minutes,
|
|
8
|
+
"30m" => 30.minutes,
|
|
9
|
+
"1h" => 1.hour,
|
|
10
|
+
"7h" => 7.hours,
|
|
11
|
+
"1d" => 1.day,
|
|
12
|
+
"7d" => 7.days,
|
|
13
|
+
"14d" => 14.days
|
|
14
|
+
}.freeze
|
|
15
|
+
DEFAULT_RANGE = "15m"
|
|
16
|
+
ACTIVITY_TREND_EMPTY = {
|
|
17
|
+
available: false,
|
|
18
|
+
hit_rate: [],
|
|
19
|
+
operations: [],
|
|
20
|
+
errors: []
|
|
21
|
+
}.freeze
|
|
22
|
+
STABILITY_EMPTY = {
|
|
23
|
+
available: false,
|
|
24
|
+
state: :stable,
|
|
25
|
+
error_count: 0,
|
|
26
|
+
slow_count: 0,
|
|
27
|
+
latest_recorded_at: nil
|
|
28
|
+
}.freeze
|
|
29
|
+
BUCKET_RULES = [
|
|
30
|
+
[2.hours.to_i, 1.minute.to_i],
|
|
31
|
+
[1.day.to_i, 15.minutes.to_i],
|
|
32
|
+
[7.days.to_i, 2.hours.to_i]
|
|
33
|
+
].freeze
|
|
34
|
+
|
|
35
|
+
class TrendData
|
|
36
|
+
class BucketSnapshot
|
|
37
|
+
attr_reader :operations_count, :hits_count, :misses_count, :errors_count
|
|
38
|
+
|
|
39
|
+
def initialize
|
|
40
|
+
@operations_count = 0
|
|
41
|
+
@hits_count = 0
|
|
42
|
+
@misses_count = 0
|
|
43
|
+
@errors_count = 0
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def add(row)
|
|
47
|
+
@operations_count += row[1].to_i
|
|
48
|
+
@hits_count += row[2].to_i
|
|
49
|
+
@misses_count += row[3].to_i
|
|
50
|
+
@errors_count += row[4].to_i
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def hit_rate
|
|
54
|
+
read_outcomes = hits_count + misses_count
|
|
55
|
+
return 0.0 if read_outcomes.zero?
|
|
56
|
+
|
|
57
|
+
hits_count.to_f / read_outcomes
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def value_for(key)
|
|
61
|
+
public_send(key)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def initialize(metric_rows:, window:, current_time:)
|
|
66
|
+
@metric_rows = metric_rows
|
|
67
|
+
@window = window
|
|
68
|
+
@current_time = current_time
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def to_h
|
|
72
|
+
return CacheStats::ACTIVITY_TREND_EMPTY.dup if metric_rows.empty?
|
|
73
|
+
|
|
74
|
+
buckets = blank_buckets
|
|
75
|
+
metric_rows.each do |row|
|
|
76
|
+
buckets[align_bucket(row[0].to_i)]&.add(row)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
{
|
|
80
|
+
available: true,
|
|
81
|
+
hit_rate: hit_rate_series(buckets),
|
|
82
|
+
operations: count_series(buckets, :operations_count),
|
|
83
|
+
errors: count_series(buckets, :errors_count)
|
|
84
|
+
}
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
attr_reader :metric_rows, :window, :current_time
|
|
90
|
+
|
|
91
|
+
def blank_buckets
|
|
92
|
+
start_bucket = align_bucket((current_time - window).to_i)
|
|
93
|
+
end_bucket = align_bucket(current_time.to_i)
|
|
94
|
+
|
|
95
|
+
start_bucket.step(end_bucket, bucket_seconds).each_with_object({}) do |timestamp, buckets|
|
|
96
|
+
buckets[timestamp] = BucketSnapshot.new
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def hit_rate_series(buckets)
|
|
101
|
+
buckets.map do |timestamp, totals|
|
|
102
|
+
{t: timestamp, v: totals.hit_rate}
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def count_series(buckets, key)
|
|
107
|
+
buckets.map do |timestamp, totals|
|
|
108
|
+
{t: timestamp, v: totals.value_for(key)}
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def bucket_seconds
|
|
113
|
+
seconds = window.to_i
|
|
114
|
+
CacheStats::BUCKET_RULES.find { |limit, _bucket| seconds <= limit }&.last || 4.hours.to_i
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def align_bucket(value)
|
|
118
|
+
(value / bucket_seconds) * bucket_seconds
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
class StabilityData
|
|
123
|
+
class EventCounts
|
|
124
|
+
attr_reader :error_count, :slow_count, :latest_recorded_at
|
|
125
|
+
|
|
126
|
+
def initialize(error_count: 0, slow_count: 0, latest_recorded_at: nil)
|
|
127
|
+
@error_count = error_count.to_i
|
|
128
|
+
@slow_count = slow_count.to_i
|
|
129
|
+
@latest_recorded_at = latest_recorded_at
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def record(recorded_at:, error_class:, duration:)
|
|
133
|
+
kind = event_kind(error_class: error_class, duration: duration)
|
|
134
|
+
return unless kind
|
|
135
|
+
|
|
136
|
+
@latest_recorded_at = [latest_recorded_at, recorded_at].compact.max
|
|
137
|
+
@error_count += 1 if kind == :error
|
|
138
|
+
@slow_count += 1 if kind == :slow
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def state
|
|
142
|
+
return :critical if error_count.positive?
|
|
143
|
+
return :degraded if slow_count.positive?
|
|
144
|
+
|
|
145
|
+
:stable
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def to_h
|
|
149
|
+
{
|
|
150
|
+
available: true,
|
|
151
|
+
state: state,
|
|
152
|
+
error_count: error_count,
|
|
153
|
+
slow_count: slow_count,
|
|
154
|
+
latest_recorded_at: latest_recorded_at
|
|
155
|
+
}
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
private
|
|
159
|
+
|
|
160
|
+
def event_kind(error_class:, duration:)
|
|
161
|
+
return :error if error_class.present?
|
|
162
|
+
return :slow if duration.to_f >= SolidObserver.config.cache_slow_threshold.to_f
|
|
163
|
+
|
|
164
|
+
nil
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def initialize(window:, current_time:)
|
|
169
|
+
@window = window
|
|
170
|
+
@current_time = current_time
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def to_h
|
|
174
|
+
event_counts.to_h
|
|
175
|
+
rescue ActiveRecord::StatementInvalid
|
|
176
|
+
CacheStats::STABILITY_EMPTY.dup
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
private
|
|
180
|
+
|
|
181
|
+
attr_reader :window, :current_time
|
|
182
|
+
|
|
183
|
+
def event_counts
|
|
184
|
+
error_count, slow_count, latest_recorded_at = SolidObserver::CacheEvent.where(recorded_at: window_range).pick(
|
|
185
|
+
Arel.sql("COUNT(CASE WHEN #{error_condition_sql} THEN 1 END)"),
|
|
186
|
+
Arel.sql("COUNT(CASE WHEN #{slow_condition_sql} THEN 1 END)"),
|
|
187
|
+
Arel.sql("MAX(CASE WHEN #{tracked_condition_sql} THEN recorded_at END)")
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
EventCounts.new(
|
|
191
|
+
error_count: error_count,
|
|
192
|
+
slow_count: slow_count,
|
|
193
|
+
latest_recorded_at: latest_recorded_at
|
|
194
|
+
)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def window_range
|
|
198
|
+
(current_time - window)..current_time
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def tracked_condition_sql
|
|
202
|
+
"(#{error_condition_sql}) OR (#{slow_condition_sql})"
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def error_condition_sql
|
|
206
|
+
"error_class IS NOT NULL AND TRIM(error_class) != ''"
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def slow_condition_sql
|
|
210
|
+
"(error_class IS NULL OR TRIM(error_class) = '') AND duration >= #{slow_threshold}"
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def slow_threshold
|
|
214
|
+
SolidObserver.config.cache_slow_threshold.to_f
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
class << self
|
|
219
|
+
def parse_range(value, fallback: DEFAULT_RANGE)
|
|
220
|
+
range_key = value.to_s
|
|
221
|
+
RANGES.key?(range_key) ? range_key : fallback
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def range_duration(value, fallback: DEFAULT_RANGE)
|
|
225
|
+
RANGES.fetch(parse_range(value, fallback: fallback))
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def self.call(window:)
|
|
230
|
+
new.call(window: window)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def call(window:)
|
|
234
|
+
current_time = Time.current
|
|
235
|
+
dashboard_response(window: window, current_time: current_time)
|
|
236
|
+
rescue => error
|
|
237
|
+
Rails.logger&.error("[SolidObserver] CacheStats call failed: #{error.class} #{error.message}") if defined?(Rails)
|
|
238
|
+
error_response
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
private
|
|
242
|
+
|
|
243
|
+
def dashboard_response(window:, current_time:)
|
|
244
|
+
time_window = (current_time - window)..current_time
|
|
245
|
+
metric_rows = metric_rows(time_window: time_window)
|
|
246
|
+
|
|
247
|
+
build_response(
|
|
248
|
+
window: window,
|
|
249
|
+
totals: metric_totals(time_window: time_window),
|
|
250
|
+
dashboard_data: dashboard_data(window: window, current_time: current_time, metric_rows: metric_rows)
|
|
251
|
+
)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def build_response(window:, totals:, dashboard_data:)
|
|
255
|
+
operations_count, hits_count, misses_count, errors_count, duration_total = totals.values_at(
|
|
256
|
+
:operations_count,
|
|
257
|
+
:hits_count,
|
|
258
|
+
:misses_count,
|
|
259
|
+
:errors_count,
|
|
260
|
+
:duration_total
|
|
261
|
+
)
|
|
262
|
+
read_outcomes_count = hits_count + misses_count
|
|
263
|
+
window_minutes = [window.to_f / 60.0, 1.0].max
|
|
264
|
+
|
|
265
|
+
{
|
|
266
|
+
hit_rate: ratio(hits_count, read_outcomes_count),
|
|
267
|
+
throughput: operations_count.to_f / window_minutes,
|
|
268
|
+
error_rate: ratio(errors_count, operations_count),
|
|
269
|
+
avg_duration: ratio(duration_total, operations_count),
|
|
270
|
+
operations_count: operations_count,
|
|
271
|
+
hits_count: hits_count,
|
|
272
|
+
misses_count: misses_count,
|
|
273
|
+
errors_count: errors_count,
|
|
274
|
+
duration_total: duration_total,
|
|
275
|
+
activity_trends: dashboard_data[:activity_trends],
|
|
276
|
+
stability: dashboard_data[:stability]
|
|
277
|
+
}
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def dashboard_data(window:, current_time:, metric_rows:)
|
|
281
|
+
{
|
|
282
|
+
activity_trends: TrendData.new(
|
|
283
|
+
metric_rows: metric_rows,
|
|
284
|
+
window: window,
|
|
285
|
+
current_time: current_time
|
|
286
|
+
).to_h,
|
|
287
|
+
stability: StabilityData.new(window: window, current_time: current_time).to_h
|
|
288
|
+
}
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def metric_rows(time_window:)
|
|
292
|
+
SolidObserver::CacheMetric.where(period_start: time_window).pluck(
|
|
293
|
+
:period_start,
|
|
294
|
+
:operations_count,
|
|
295
|
+
:hits_count,
|
|
296
|
+
:misses_count,
|
|
297
|
+
:errors_count,
|
|
298
|
+
:duration_total
|
|
299
|
+
)
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def metric_totals(time_window:)
|
|
303
|
+
operations_count, hits_count, misses_count, errors_count, duration_total = SolidObserver::CacheMetric.where(
|
|
304
|
+
period_start: time_window
|
|
305
|
+
).pick(
|
|
306
|
+
Arel.sql("COALESCE(SUM(operations_count), 0)"),
|
|
307
|
+
Arel.sql("COALESCE(SUM(hits_count), 0)"),
|
|
308
|
+
Arel.sql("COALESCE(SUM(misses_count), 0)"),
|
|
309
|
+
Arel.sql("COALESCE(SUM(errors_count), 0)"),
|
|
310
|
+
Arel.sql("COALESCE(SUM(duration_total), 0.0)")
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
{
|
|
314
|
+
operations_count: operations_count,
|
|
315
|
+
hits_count: hits_count,
|
|
316
|
+
misses_count: misses_count,
|
|
317
|
+
errors_count: errors_count,
|
|
318
|
+
duration_total: duration_total
|
|
319
|
+
}
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def ratio(numerator, denominator)
|
|
323
|
+
return 0.0 if denominator.to_i.zero?
|
|
324
|
+
|
|
325
|
+
numerator.to_f / denominator
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def error_response
|
|
329
|
+
{
|
|
330
|
+
hit_rate: 0.0,
|
|
331
|
+
throughput: 0.0,
|
|
332
|
+
error_rate: 0.0,
|
|
333
|
+
avg_duration: 0.0,
|
|
334
|
+
operations_count: 0,
|
|
335
|
+
hits_count: 0,
|
|
336
|
+
misses_count: 0,
|
|
337
|
+
errors_count: 0,
|
|
338
|
+
duration_total: 0.0,
|
|
339
|
+
activity_trends: ACTIVITY_TREND_EMPTY.dup,
|
|
340
|
+
stability: STABILITY_EMPTY.dup,
|
|
341
|
+
error: "Service temporarily unavailable"
|
|
342
|
+
}
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
end
|
|
@@ -1,10 +1,18 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "database_size"
|
|
4
|
+
require_relative "storage_info_snapshot"
|
|
4
5
|
|
|
5
6
|
module SolidObserver
|
|
6
7
|
module Services
|
|
7
8
|
class CleanupStorage
|
|
9
|
+
MAINTENANCE_STATEMENT_BUILDERS = {
|
|
10
|
+
"sqlite" => ->(_tables) { ["VACUUM"] },
|
|
11
|
+
"postgresql" => ->(tables) { tables.map { |table_name| "VACUUM ANALYZE #{table_name}" } },
|
|
12
|
+
"mysql2" => ->(tables) { ["OPTIMIZE TABLE #{tables.join(", ")}"] },
|
|
13
|
+
"trilogy" => ->(tables) { ["OPTIMIZE TABLE #{tables.join(", ")}"] }
|
|
14
|
+
}.freeze
|
|
15
|
+
|
|
8
16
|
def self.call
|
|
9
17
|
new.call
|
|
10
18
|
end
|
|
@@ -12,100 +20,143 @@ module SolidObserver
|
|
|
12
20
|
def call
|
|
13
21
|
return 0 if SolidObserver.config.realtime_mode?
|
|
14
22
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
handle_cleanup_failure(e)
|
|
23
|
+
post_cleanup(cleanup_counts)
|
|
24
|
+
rescue => error
|
|
25
|
+
handle_cleanup_failure(error)
|
|
19
26
|
end
|
|
20
27
|
|
|
21
28
|
private
|
|
22
29
|
|
|
30
|
+
def cleanup_counts
|
|
31
|
+
perform_cleanup.tap { record_snapshot_after_cleanup }
|
|
32
|
+
end
|
|
33
|
+
|
|
23
34
|
def handle_cleanup_failure(error)
|
|
24
35
|
Rails.logger.error "[SolidObserver] Cleanup failed: #{error.message}"
|
|
25
36
|
raise
|
|
26
37
|
end
|
|
27
38
|
|
|
28
|
-
def
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
39
|
+
def perform_cleanup
|
|
40
|
+
config = SolidObserver.config
|
|
41
|
+
event_cutoff = config.event_retention.ago
|
|
42
|
+
|
|
43
|
+
{
|
|
44
|
+
queue_events: QueueEvent.transaction do
|
|
45
|
+
QueueEvent.where("recorded_at < ?", event_cutoff).delete_all
|
|
46
|
+
end,
|
|
47
|
+
cache_events: delete_telemetry_records(
|
|
48
|
+
SolidObserver::CacheEvent,
|
|
49
|
+
column: :recorded_at,
|
|
50
|
+
cutoff: event_cutoff
|
|
51
|
+
),
|
|
52
|
+
cache_metrics: delete_telemetry_records(
|
|
53
|
+
SolidObserver::CacheMetric,
|
|
54
|
+
column: :period_start,
|
|
55
|
+
cutoff: config.metrics_retention.ago
|
|
56
|
+
)
|
|
57
|
+
}
|
|
34
58
|
end
|
|
35
59
|
|
|
36
|
-
def post_cleanup(
|
|
60
|
+
def post_cleanup(cleanup_counts)
|
|
37
61
|
vacuum_database
|
|
38
62
|
check_storage_warnings
|
|
39
|
-
log_results(
|
|
40
|
-
|
|
63
|
+
log_results(cleanup_counts)
|
|
64
|
+
cleanup_counts.values.sum
|
|
41
65
|
end
|
|
42
66
|
|
|
43
|
-
def
|
|
44
|
-
|
|
45
|
-
|
|
67
|
+
def delete_telemetry_records(model, column:, cutoff:)
|
|
68
|
+
return 0 unless data_source_available?(model)
|
|
69
|
+
|
|
70
|
+
model.where("#{column} < ?", cutoff).delete_all
|
|
46
71
|
end
|
|
47
72
|
|
|
48
73
|
def record_snapshot_after_cleanup
|
|
74
|
+
snapshots = StorageInfoSnapshot.call
|
|
75
|
+
|
|
49
76
|
# StorageInfo.db_size_bytes is NOT NULL; record_snapshot coerces nil to 0.
|
|
77
|
+
StorageInfo.record_snapshot(db_size: current_database_size, event_count: QueueEvent.count)
|
|
78
|
+
|
|
79
|
+
snapshots.each do |snapshot|
|
|
80
|
+
record_component_snapshot(snapshot)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def record_component_snapshot(snapshot)
|
|
85
|
+
return unless snapshot[:available]
|
|
86
|
+
component = snapshot[:component]
|
|
87
|
+
return if component == "queue_observer"
|
|
88
|
+
|
|
50
89
|
StorageInfo.record_snapshot(
|
|
51
|
-
|
|
52
|
-
|
|
90
|
+
component: component,
|
|
91
|
+
db_size: snapshot[:db_size_bytes],
|
|
92
|
+
event_count: snapshot[:event_count]
|
|
53
93
|
)
|
|
54
94
|
end
|
|
55
95
|
|
|
56
96
|
def vacuum_database
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
Rails.logger.warn "[SolidObserver] Database maintenance failed: #{e.message}"
|
|
97
|
+
maintenance_statements.each do |statement|
|
|
98
|
+
QueueEvent.connection.execute(statement)
|
|
99
|
+
end
|
|
100
|
+
rescue => error
|
|
101
|
+
Rails.logger.warn "[SolidObserver] Database maintenance failed: #{error.message}"
|
|
63
102
|
end
|
|
64
103
|
|
|
65
104
|
def check_storage_warnings
|
|
66
105
|
current_size = current_database_size
|
|
67
|
-
return unless
|
|
106
|
+
return unless current_size
|
|
107
|
+
return unless current_size > (SolidObserver.config.max_db_size * SolidObserver.config.warning_threshold)
|
|
68
108
|
|
|
69
109
|
Rails.logger.warn(storage_warning_message(current_size))
|
|
70
110
|
end
|
|
71
111
|
|
|
72
|
-
def warning_needed?(current_size)
|
|
73
|
-
return false unless current_size
|
|
74
|
-
|
|
75
|
-
config = SolidObserver.config
|
|
76
|
-
max_size = config.max_db_size
|
|
77
|
-
threshold = config.warning_threshold
|
|
78
|
-
current_size > (max_size * threshold)
|
|
79
|
-
end
|
|
80
|
-
|
|
81
112
|
def storage_warning_message(current_size)
|
|
82
113
|
max_size = SolidObserver.config.max_db_size
|
|
83
114
|
percentage = ((current_size.to_f / max_size) * 100).round(1)
|
|
84
|
-
current_size_human =
|
|
85
|
-
|
|
115
|
+
current_size_human = ActiveSupport::NumberHelper.number_to_human_size(
|
|
116
|
+
current_size,
|
|
117
|
+
precision: 1,
|
|
118
|
+
significant: false,
|
|
119
|
+
strip_insignificant_zeros: false
|
|
120
|
+
)
|
|
121
|
+
max_size_human = ActiveSupport::NumberHelper.number_to_human_size(
|
|
122
|
+
max_size,
|
|
123
|
+
precision: 1,
|
|
124
|
+
significant: false,
|
|
125
|
+
strip_insignificant_zeros: false
|
|
126
|
+
)
|
|
86
127
|
"[SolidObserver] Queue DB approaching limit: #{current_size_human} / #{max_size_human} (#{percentage}%)"
|
|
87
128
|
end
|
|
88
129
|
|
|
89
|
-
def human_size(bytes)
|
|
90
|
-
ActiveSupport::NumberHelper.number_to_human_size(bytes, precision: 1, significant: false, strip_insignificant_zeros: false)
|
|
91
|
-
end
|
|
92
|
-
|
|
93
130
|
def current_database_size
|
|
94
131
|
return @current_database_size if defined?(@current_database_size)
|
|
95
132
|
|
|
96
133
|
@current_database_size = DatabaseSize.call(connection: QueueEvent.connection)
|
|
97
134
|
end
|
|
98
135
|
|
|
99
|
-
def log_results(
|
|
100
|
-
Rails.logger.info
|
|
136
|
+
def log_results(cleanup_counts)
|
|
137
|
+
Rails.logger.info(
|
|
138
|
+
"[SolidObserver] Cleaned #{cleanup_counts[:queue_events]} queue events, " \
|
|
139
|
+
"#{cleanup_counts[:cache_events]} cache events, " \
|
|
140
|
+
"#{cleanup_counts[:cache_metrics]} cache metrics"
|
|
141
|
+
)
|
|
101
142
|
end
|
|
102
143
|
|
|
103
|
-
def
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
when "postgresql" then "VACUUM ANALYZE solid_observer_queue_events"
|
|
107
|
-
when "mysql2", "trilogy" then "OPTIMIZE TABLE solid_observer_queue_events"
|
|
144
|
+
def maintenance_statements
|
|
145
|
+
tables = [QueueEvent, SolidObserver::CacheEvent, SolidObserver::CacheMetric].filter_map do |model|
|
|
146
|
+
model.table_name if data_source_available?(model)
|
|
108
147
|
end
|
|
148
|
+
return [] if tables.empty?
|
|
149
|
+
|
|
150
|
+
MAINTENANCE_STATEMENT_BUILDERS.fetch(QueueEvent.connection.adapter_name.downcase, ->(_known_tables) { [] }).call(tables)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def data_source_available?(model)
|
|
154
|
+
table_name = model.table_name.to_s
|
|
155
|
+
return false if table_name.empty?
|
|
156
|
+
|
|
157
|
+
model.connection.data_source_exists?(table_name)
|
|
158
|
+
rescue *StorageInfoSnapshot::CONNECTION_ERRORS, TypeError
|
|
159
|
+
false
|
|
109
160
|
end
|
|
110
161
|
end
|
|
111
162
|
end
|