solid_observer 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/README.md +195 -82
- data/app/assets/javascripts/solid_observer/live_poll.js +3 -1
- data/app/controllers/solid_observer/application_controller.rb +1 -0
- data/app/controllers/solid_observer/cable_dashboard_controller.rb +52 -0
- data/app/controllers/solid_observer/cable_operations_controller.rb +16 -0
- data/app/controllers/solid_observer/cache_dashboard_controller.rb +52 -0
- data/app/controllers/solid_observer/cache_operations_controller.rb +24 -0
- data/app/controllers/solid_observer/dashboard_controller.rb +38 -1
- data/app/controllers/solid_observer/storages_controller.rb +1 -1
- data/app/helpers/solid_observer/application_helper.rb +268 -5
- data/app/helpers/solid_observer/dashboard_helper.rb +30 -11
- data/app/models/solid_observer/cable_event.rb +13 -0
- data/app/models/solid_observer/cable_metric.rb +12 -0
- data/app/models/solid_observer/cache_event.rb +15 -0
- data/app/models/solid_observer/cache_metric.rb +13 -0
- data/app/models/solid_observer/storage_info.rb +4 -1
- data/app/views/layouts/solid_observer/application.html.erb +157 -19
- data/app/views/solid_observer/cable_dashboard/_charts.html.erb +31 -0
- data/app/views/solid_observer/cable_dashboard/_recent_events.html.erb +34 -0
- data/app/views/solid_observer/cable_dashboard/_summary.html.erb +34 -0
- data/app/views/solid_observer/cable_dashboard/index.html.erb +118 -0
- data/app/views/solid_observer/cache_dashboard/_charts.html.erb +40 -0
- data/app/views/solid_observer/cache_dashboard/_recent_events.html.erb +34 -0
- data/app/views/solid_observer/cache_dashboard/_summary.html.erb +39 -0
- data/app/views/solid_observer/cache_dashboard/index.html.erb +62 -0
- data/app/views/solid_observer/cache_operations/_confirm_clear.html.erb +6 -0
- data/app/views/solid_observer/cache_operations/index.html.erb +60 -0
- data/app/views/solid_observer/dashboard/_queue_table.html.erb +1 -0
- data/app/views/solid_observer/dashboard/index.html.erb +32 -5
- data/app/views/solid_observer/events/index.html.erb +1 -0
- data/app/views/solid_observer/jobs/index.html.erb +1 -0
- data/app/views/solid_observer/jobs/show.html.erb +3 -3
- data/app/views/solid_observer/storages/show.html.erb +90 -32
- data/config/routes.rb +7 -0
- data/db/migrate/20260601000001_create_solid_observer_cache_events.rb +22 -0
- data/db/migrate/20260601000002_create_solid_observer_cache_metrics.rb +18 -0
- data/db/migrate/20260602000001_add_component_to_solid_observer_storage_infos.rb +8 -0
- data/db/migrate/20260612000001_add_event_type_recorded_at_index_to_cache_events.rb +21 -0
- data/db/migrate/20260619000001_create_solid_observer_cable_events.rb +22 -0
- data/db/migrate/20260619000002_create_solid_observer_cable_metrics.rb +17 -0
- data/lib/generators/solid_observer/install_generator.rb +8 -1
- data/lib/generators/solid_observer/templates/initializer.rb.tt +20 -4
- data/lib/solid_observer/base_event.rb +1 -1
- data/lib/solid_observer/base_metric.rb +1 -1
- data/lib/solid_observer/base_record.rb +8 -0
- data/lib/solid_observer/cable_event_buffer.rb +28 -0
- data/lib/solid_observer/cable_metric_buffer.rb +230 -0
- data/lib/solid_observer/cable_subscriber.rb +57 -0
- data/lib/solid_observer/cache_event_buffer.rb +28 -0
- data/lib/solid_observer/cache_metric_buffer.rb +229 -0
- data/lib/solid_observer/cache_subscriber.rb +47 -0
- data/lib/solid_observer/chart_buffer.rb +84 -27
- data/lib/solid_observer/cli/storage.rb +16 -13
- data/lib/solid_observer/configuration.rb +67 -5
- data/lib/solid_observer/engine.rb +70 -15
- data/lib/solid_observer/event_buffer_core.rb +218 -0
- data/lib/solid_observer/queue_event_buffer.rb +9 -201
- data/lib/solid_observer/services/cable_operations.rb +74 -0
- data/lib/solid_observer/services/cable_stats.rb +385 -0
- data/lib/solid_observer/services/cache_operations.rb +115 -0
- data/lib/solid_observer/services/cache_stats.rb +346 -0
- data/lib/solid_observer/services/cleanup_storage.rb +98 -47
- data/lib/solid_observer/services/database_size.rb +13 -8
- data/lib/solid_observer/services/flush_cable_event_buffer.rb +54 -0
- data/lib/solid_observer/services/flush_cable_metrics.rb +54 -0
- data/lib/solid_observer/services/flush_cache_event_buffer.rb +54 -0
- data/lib/solid_observer/services/flush_cache_metrics.rb +56 -0
- data/lib/solid_observer/services/record_cable_event.rb +114 -0
- data/lib/solid_observer/services/record_cable_metric.rb +73 -0
- data/lib/solid_observer/services/record_cache_event.rb +165 -0
- data/lib/solid_observer/services/record_cache_metric.rb +66 -0
- data/lib/solid_observer/services/storage_info_snapshot.rb +216 -0
- data/lib/solid_observer/version.rb +1 -1
- data/lib/solid_observer.rb +36 -11
- data/lib/tasks/solid_observer.rake +111 -21
- metadata +47 -5
- data/bin/console +0 -11
- data/bin/quality_gate +0 -95
- data/bin/setup +0 -8
|
@@ -1,221 +1,29 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "singleton"
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
require_relative "event_buffer_core"
|
|
5
6
|
|
|
6
7
|
module SolidObserver
|
|
7
8
|
# Thread-safe buffer for collecting queue events before batch insertion.
|
|
8
|
-
#
|
|
9
|
-
# Events are buffered in memory and flushed either when:
|
|
10
|
-
# - Buffer size reaches the configured threshold
|
|
11
|
-
# - Flush interval timer expires
|
|
12
|
-
#
|
|
13
|
-
# @example Push an event to the buffer
|
|
14
|
-
# QueueEventBuffer.instance.push(event_data)
|
|
15
9
|
class QueueEventBuffer
|
|
16
10
|
include Singleton
|
|
11
|
+
include EventBufferCore
|
|
17
12
|
|
|
18
|
-
INITIAL_METRICS =
|
|
19
|
-
flush_failures_count: 0,
|
|
20
|
-
drops_count: 0,
|
|
21
|
-
last_flush_at: nil,
|
|
22
|
-
last_flush_duration_ms: nil,
|
|
23
|
-
last_flush_error: nil
|
|
24
|
-
}.freeze
|
|
13
|
+
INITIAL_METRICS = EventBufferCore::INITIAL_METRICS
|
|
25
14
|
|
|
26
15
|
def initialize
|
|
27
|
-
|
|
28
|
-
@metrics_mutex = Mutex.new
|
|
29
|
-
@buffer = []
|
|
30
|
-
@metrics = INITIAL_METRICS.dup
|
|
31
|
-
@timer_task = nil
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# Adds an event to the buffer and triggers flush if threshold reached.
|
|
35
|
-
#
|
|
36
|
-
# @param event_data [Hash] Event data to buffer
|
|
37
|
-
# @return [void]
|
|
38
|
-
def push(event_data)
|
|
39
|
-
return unless (config = SolidObserver.config).persistence_mode?
|
|
40
|
-
|
|
41
|
-
drops_count, should_flush = sync_push_and_check(event_data, config)
|
|
42
|
-
record_drop(drops_count) if drops_count.positive?
|
|
43
|
-
ensure_timer_running
|
|
44
|
-
flush! if should_flush
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# Flushes all buffered events to the database.
|
|
48
|
-
#
|
|
49
|
-
# @return [void]
|
|
50
|
-
def flush!
|
|
51
|
-
events_to_flush = nil
|
|
52
|
-
|
|
53
|
-
@mutex.synchronize do
|
|
54
|
-
return if @buffer.empty?
|
|
55
|
-
events_to_flush = @buffer.dup
|
|
56
|
-
@buffer.clear
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
started_at_ms = monotonic_ms
|
|
60
|
-
begin
|
|
61
|
-
Services::FlushEventBuffer.call(events_to_flush)
|
|
62
|
-
rescue => e
|
|
63
|
-
requeue_failed_events(events_to_flush)
|
|
64
|
-
record_flush_failure(e)
|
|
65
|
-
Rails.logger&.error "[SolidObserver] Buffer flush failed: #{e.message}" if defined?(Rails)
|
|
66
|
-
return
|
|
67
|
-
end
|
|
68
|
-
record_flush_success(monotonic_ms - started_at_ms)
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def size
|
|
72
|
-
@mutex.synchronize { @buffer.size }
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def clear
|
|
76
|
-
@mutex.synchronize { @buffer.clear }
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
def metrics
|
|
80
|
-
current_size = @mutex.synchronize { @buffer.size }
|
|
81
|
-
snapshot = @metrics_mutex.synchronize { @metrics.dup }
|
|
82
|
-
{
|
|
83
|
-
size: current_size,
|
|
84
|
-
max_buffer_size: SolidObserver.config.max_buffer_size
|
|
85
|
-
}.merge(snapshot)
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def shutdown
|
|
89
|
-
stop_timer
|
|
90
|
-
flush!
|
|
16
|
+
initialize_event_buffer
|
|
91
17
|
end
|
|
92
18
|
|
|
93
19
|
private
|
|
94
20
|
|
|
95
|
-
def
|
|
96
|
-
|
|
97
|
-
@buffer << event_data
|
|
98
|
-
return 0
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
handle_overflow(event_data, config.buffer_overflow_strategy)
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
def handle_overflow(event_data, overflow_strategy)
|
|
105
|
-
case overflow_strategy
|
|
106
|
-
when :drop_old
|
|
107
|
-
@buffer.shift
|
|
108
|
-
@buffer << event_data
|
|
109
|
-
when :drop_new
|
|
110
|
-
# No-op: drop incoming event, keep oldest buffered events.
|
|
111
|
-
else
|
|
112
|
-
raise ArgumentError, "Unsupported buffer_overflow_strategy: #{overflow_strategy.inspect}"
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
1
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
def requeue_failed_events(events_to_flush)
|
|
119
|
-
return unless events_to_flush
|
|
120
|
-
|
|
121
|
-
dropped_count = sync_requeue_events(events_to_flush, SolidObserver.config.max_buffer_size)
|
|
122
|
-
record_drop(dropped_count) if dropped_count.positive?
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
def trim_events_for_capacity(events, max_buffer_size)
|
|
126
|
-
dropped_count = events.size - max_buffer_size
|
|
127
|
-
return [events, 0] if dropped_count <= 0
|
|
128
|
-
|
|
129
|
-
[events_to_keep(events, max_buffer_size), dropped_count]
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
def ensure_timer_running
|
|
133
|
-
timer_to_start, timer_to_stop = replace_timer_if_stopped
|
|
134
|
-
return unless timer_to_start
|
|
135
|
-
|
|
136
|
-
timer_to_stop&.shutdown
|
|
137
|
-
timer_to_start.execute
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
def replace_timer_if_stopped
|
|
141
|
-
@mutex.synchronize do
|
|
142
|
-
current_timer_task = @timer_task
|
|
143
|
-
return [nil, nil] if timer_running?(current_timer_task)
|
|
144
|
-
|
|
145
|
-
[build_timer_task, current_timer_task]
|
|
146
|
-
end
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
def stop_timer
|
|
150
|
-
timer_to_stop = @mutex.synchronize do
|
|
151
|
-
current_timer = @timer_task
|
|
152
|
-
@timer_task = nil
|
|
153
|
-
current_timer
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
timer_to_stop&.shutdown
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
def record_flush_success(duration_ms)
|
|
160
|
-
@metrics_mutex.synchronize do
|
|
161
|
-
@metrics.merge!(
|
|
162
|
-
last_flush_at: Time.current,
|
|
163
|
-
last_flush_duration_ms: duration_ms,
|
|
164
|
-
last_flush_error: nil
|
|
165
|
-
)
|
|
166
|
-
end
|
|
167
|
-
end
|
|
168
|
-
|
|
169
|
-
def record_flush_failure(error)
|
|
170
|
-
@metrics_mutex.synchronize do
|
|
171
|
-
@metrics[:flush_failures_count] += 1
|
|
172
|
-
@metrics[:last_flush_error] = error.message
|
|
173
|
-
end
|
|
174
|
-
end
|
|
175
|
-
|
|
176
|
-
def record_drop(count = 1)
|
|
177
|
-
@metrics_mutex.synchronize do
|
|
178
|
-
@metrics[:drops_count] += count
|
|
179
|
-
end
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
def sync_push_and_check(event_data, config)
|
|
183
|
-
@mutex.synchronize do
|
|
184
|
-
drops_count = apply_overflow_policy(event_data, config)
|
|
185
|
-
[drops_count, @buffer.size >= config.buffer_size]
|
|
186
|
-
end
|
|
187
|
-
end
|
|
188
|
-
|
|
189
|
-
def sync_requeue_events(events_to_flush, max_buffer_size)
|
|
190
|
-
@mutex.synchronize do
|
|
191
|
-
combined_events = events_to_flush + @buffer
|
|
192
|
-
kept_events, dropped_count = trim_events_for_capacity(combined_events, max_buffer_size)
|
|
193
|
-
@buffer.replace(kept_events)
|
|
194
|
-
dropped_count
|
|
195
|
-
end
|
|
196
|
-
end
|
|
197
|
-
|
|
198
|
-
def events_to_keep(events, max_buffer_size)
|
|
199
|
-
if SolidObserver.config.buffer_overflow_strategy == :drop_old
|
|
200
|
-
events.last(max_buffer_size)
|
|
201
|
-
else
|
|
202
|
-
events.first(max_buffer_size)
|
|
203
|
-
end
|
|
204
|
-
end
|
|
205
|
-
|
|
206
|
-
def timer_running?(timer_task)
|
|
207
|
-
timer_task && !timer_task.shuttingdown?
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
def build_timer_task
|
|
211
|
-
@timer_task = Concurrent::TimerTask.new(
|
|
212
|
-
execution_interval: SolidObserver.config.flush_interval,
|
|
213
|
-
run_now: false
|
|
214
|
-
) { flush! }
|
|
21
|
+
def flush_service
|
|
22
|
+
Services::FlushEventBuffer
|
|
215
23
|
end
|
|
216
24
|
|
|
217
|
-
def
|
|
218
|
-
|
|
25
|
+
def log_label
|
|
26
|
+
"Buffer"
|
|
219
27
|
end
|
|
220
28
|
end
|
|
221
29
|
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidObserver
|
|
4
|
+
module Services
|
|
5
|
+
class CableOperations
|
|
6
|
+
MESSAGES = {
|
|
7
|
+
trim: {
|
|
8
|
+
success: "Expired/trimmable Solid Cable messages trimmed.",
|
|
9
|
+
failure: "Cable trim failed. No raw Cable payloads or adapter details are shown. Use solid_observer:cable:trim if the problem continues."
|
|
10
|
+
}.freeze,
|
|
11
|
+
unavailable: "Cable controls are unavailable because Solid Cable support is disabled or not detected."
|
|
12
|
+
}.freeze
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
def available?
|
|
16
|
+
new.available?
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def trim
|
|
20
|
+
new.trim
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def message(operation, key = nil)
|
|
24
|
+
return MESSAGES.fetch(:unavailable) if operation == :unavailable
|
|
25
|
+
|
|
26
|
+
MESSAGES.fetch(operation).fetch(key)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def unavailable_message
|
|
30
|
+
message(:unavailable)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def available?
|
|
35
|
+
SolidObserver.config.solid_cable_enabled? && !!defined?(::SolidCable::Message)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def trim
|
|
39
|
+
messages = self.class
|
|
40
|
+
return {ok: false, message: messages.unavailable_message} unless available?
|
|
41
|
+
|
|
42
|
+
perform_operation(
|
|
43
|
+
:trim,
|
|
44
|
+
success_message: messages.message(:trim, :success),
|
|
45
|
+
failure_message: messages.message(:trim, :failure)
|
|
46
|
+
) do
|
|
47
|
+
trim_cable_messages
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def trim_cable_messages
|
|
54
|
+
if defined?(::SolidCable::TrimJob)
|
|
55
|
+
::SolidCable::TrimJob.perform_now
|
|
56
|
+
else
|
|
57
|
+
::SolidCable::Message.trimmable.delete_all
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def perform_operation(name, success_message:, failure_message:)
|
|
62
|
+
yield
|
|
63
|
+
{ok: true, message: success_message}
|
|
64
|
+
rescue => error
|
|
65
|
+
log_failure(name, error)
|
|
66
|
+
{ok: false, message: failure_message}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def log_failure(name, error)
|
|
70
|
+
Rails.logger&.warn("[SolidObserver] Cable #{name} failed: #{error.class}")
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "storage_info_snapshot"
|
|
4
|
+
|
|
5
|
+
module SolidObserver
|
|
6
|
+
module Services
|
|
7
|
+
# :reek:TooManyConstants
|
|
8
|
+
class CableStats
|
|
9
|
+
RANGES = {
|
|
10
|
+
"15m" => 15.minutes,
|
|
11
|
+
"30m" => 30.minutes,
|
|
12
|
+
"1h" => 1.hour,
|
|
13
|
+
"7h" => 7.hours,
|
|
14
|
+
"1d" => 1.day,
|
|
15
|
+
"7d" => 7.days,
|
|
16
|
+
"14d" => 14.days
|
|
17
|
+
}.freeze
|
|
18
|
+
DEFAULT_RANGE = "15m"
|
|
19
|
+
ACTIVITY_TREND_EMPTY = {
|
|
20
|
+
available: false,
|
|
21
|
+
broadcasts: [],
|
|
22
|
+
rejections: []
|
|
23
|
+
}.freeze
|
|
24
|
+
STABILITY_EMPTY = {
|
|
25
|
+
available: false,
|
|
26
|
+
state: :stable,
|
|
27
|
+
rejection_count: 0,
|
|
28
|
+
error_count: 0,
|
|
29
|
+
rejection_rate: 0.0,
|
|
30
|
+
backlog_ratio: nil,
|
|
31
|
+
backlog_available: false,
|
|
32
|
+
latest_recorded_at: nil
|
|
33
|
+
}.freeze
|
|
34
|
+
STABILITY_DEGRADED = {
|
|
35
|
+
available: true,
|
|
36
|
+
state: :degraded,
|
|
37
|
+
rejection_count: 0,
|
|
38
|
+
error_count: 0,
|
|
39
|
+
rejection_rate: 0.0,
|
|
40
|
+
backlog_ratio: nil,
|
|
41
|
+
backlog_available: false,
|
|
42
|
+
latest_recorded_at: nil
|
|
43
|
+
}.freeze
|
|
44
|
+
BUCKET_RULES = [
|
|
45
|
+
[2.hours.to_i, 1.minute.to_i],
|
|
46
|
+
[1.day.to_i, 15.minutes.to_i],
|
|
47
|
+
[7.days.to_i, 2.hours.to_i]
|
|
48
|
+
].freeze
|
|
49
|
+
|
|
50
|
+
class TrendData
|
|
51
|
+
class BucketSnapshot
|
|
52
|
+
attr_reader :broadcasts_count, :rejections_count
|
|
53
|
+
|
|
54
|
+
def initialize
|
|
55
|
+
@broadcasts_count = 0
|
|
56
|
+
@rejections_count = 0
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def add(row)
|
|
60
|
+
@broadcasts_count += row[1].to_i
|
|
61
|
+
@rejections_count += row[4].to_i
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def value_for(key)
|
|
65
|
+
public_send(key)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def initialize(metric_rows:, window:, current_time:)
|
|
70
|
+
@metric_rows = metric_rows
|
|
71
|
+
@window = window
|
|
72
|
+
@current_time = current_time
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def to_h
|
|
76
|
+
return CableStats::ACTIVITY_TREND_EMPTY.dup if metric_rows.empty?
|
|
77
|
+
|
|
78
|
+
buckets = blank_buckets
|
|
79
|
+
metric_rows.each do |row|
|
|
80
|
+
buckets[align_bucket(row[0].to_i)]&.add(row)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
{
|
|
84
|
+
available: true,
|
|
85
|
+
broadcasts: count_series(buckets, :broadcasts_count),
|
|
86
|
+
rejections: count_series(buckets, :rejections_count)
|
|
87
|
+
}
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
attr_reader :metric_rows, :window, :current_time
|
|
93
|
+
|
|
94
|
+
def blank_buckets
|
|
95
|
+
start_bucket = align_bucket((current_time - window).to_i)
|
|
96
|
+
end_bucket = align_bucket(current_time.to_i)
|
|
97
|
+
|
|
98
|
+
start_bucket.step(end_bucket, bucket_seconds).each_with_object({}) do |timestamp, buckets|
|
|
99
|
+
buckets[timestamp] = BucketSnapshot.new
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def count_series(buckets, key)
|
|
104
|
+
buckets.map do |timestamp, totals|
|
|
105
|
+
{t: timestamp, v: totals.value_for(key)}
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def bucket_seconds
|
|
110
|
+
seconds = window.to_i
|
|
111
|
+
CableStats::BUCKET_RULES.find { |limit, _bucket| seconds <= limit }&.last || 4.hours.to_i
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def align_bucket(value)
|
|
115
|
+
(value / bucket_seconds) * bucket_seconds
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
class BacklogSnapshot
|
|
120
|
+
def self.call
|
|
121
|
+
new.call
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# :reek:TooManyStatements
|
|
125
|
+
def call
|
|
126
|
+
snapshot = fetch_snapshot
|
|
127
|
+
return unavailable unless snapshot && snapshot[:available]
|
|
128
|
+
|
|
129
|
+
count = snapshot[:trimmable_count].to_i
|
|
130
|
+
total = snapshot[:event_count].to_i
|
|
131
|
+
|
|
132
|
+
{
|
|
133
|
+
available: true,
|
|
134
|
+
count: count,
|
|
135
|
+
ratio: total.positive? ? count.to_f / total.to_f : 0.0
|
|
136
|
+
}
|
|
137
|
+
rescue *StorageInfoSnapshot::CONNECTION_ERRORS, TypeError, NoMethodError
|
|
138
|
+
unavailable
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
private
|
|
142
|
+
|
|
143
|
+
def fetch_snapshot
|
|
144
|
+
StorageInfoSnapshot.call.find { |component| component[:component] == "solid_cable" }
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def unavailable
|
|
148
|
+
{available: false, count: nil, ratio: nil}
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
class StabilityData
|
|
153
|
+
def initialize(window:, current_time:, backlog_snapshot:)
|
|
154
|
+
@window = window
|
|
155
|
+
@current_time = current_time
|
|
156
|
+
@backlog_snapshot = backlog_snapshot
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def to_h(metric_broadcasts_count:, metric_rejections_count:)
|
|
160
|
+
compute(
|
|
161
|
+
metric_broadcasts_count: metric_broadcasts_count,
|
|
162
|
+
metric_rejections_count: metric_rejections_count
|
|
163
|
+
).to_h
|
|
164
|
+
rescue ActiveRecord::StatementInvalid
|
|
165
|
+
CableStats::STABILITY_DEGRADED.dup
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
private
|
|
169
|
+
|
|
170
|
+
attr_reader :window, :current_time, :backlog_snapshot
|
|
171
|
+
|
|
172
|
+
# :reek:TooManyStatements
|
|
173
|
+
def compute(metric_broadcasts_count:, metric_rejections_count:)
|
|
174
|
+
event_counts = query_event_counts
|
|
175
|
+
rejection_count = event_counts[:rejection_count]
|
|
176
|
+
error_count = event_counts[:error_count]
|
|
177
|
+
rejection_rate = CableStats.ratio(metric_rejections_count, metric_broadcasts_count)
|
|
178
|
+
rejection_present = rejection_count.to_i.positive? || metric_rejections_count.to_i.positive?
|
|
179
|
+
backlog_ratio = backlog_snapshot[:ratio]
|
|
180
|
+
backlog_available = backlog_snapshot[:available]
|
|
181
|
+
|
|
182
|
+
{
|
|
183
|
+
available: true,
|
|
184
|
+
state: stability_state(
|
|
185
|
+
error_count: error_count,
|
|
186
|
+
rejection_rate: rejection_rate,
|
|
187
|
+
rejection_present: rejection_present,
|
|
188
|
+
backlog_ratio: backlog_ratio,
|
|
189
|
+
backlog_available: backlog_available
|
|
190
|
+
),
|
|
191
|
+
rejection_count: rejection_count,
|
|
192
|
+
error_count: error_count,
|
|
193
|
+
rejection_rate: rejection_rate,
|
|
194
|
+
backlog_ratio: backlog_ratio,
|
|
195
|
+
backlog_available: backlog_available,
|
|
196
|
+
latest_recorded_at: event_counts[:latest_recorded_at]
|
|
197
|
+
}
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# :reek:ControlParameter
|
|
201
|
+
# :reek:LongParameterList
|
|
202
|
+
# :reek:TooManyStatements
|
|
203
|
+
def stability_state(error_count:, rejection_rate:, rejection_present:, backlog_ratio:, backlog_available:)
|
|
204
|
+
config = SolidObserver.config
|
|
205
|
+
|
|
206
|
+
return :critical if error_count.to_i > config.cable_error_threshold.to_i
|
|
207
|
+
return :critical if rejection_rate >= config.cable_rejection_threshold.to_f
|
|
208
|
+
return :critical if backlog_ratio && backlog_ratio >= 0.5
|
|
209
|
+
return :degraded if backlog_ratio && backlog_ratio >= config.cable_backlog_threshold.to_f
|
|
210
|
+
return :degraded if rejection_present
|
|
211
|
+
return :degraded unless backlog_available
|
|
212
|
+
|
|
213
|
+
:stable
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def query_event_counts
|
|
217
|
+
rejection_count, error_count, latest_recorded_at = SolidObserver::CableEvent.where(recorded_at: window_range).pick(
|
|
218
|
+
Arel.sql("COUNT(CASE WHEN event_type = 'transmit_subscription_rejection' THEN 1 END)"),
|
|
219
|
+
Arel.sql("COUNT(CASE WHEN error_class IS NOT NULL AND TRIM(error_class) != '' THEN 1 END)"),
|
|
220
|
+
Arel.sql("MAX(CASE WHEN event_type = 'transmit_subscription_rejection' OR (error_class IS NOT NULL AND TRIM(error_class) != '') THEN recorded_at END)")
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
{
|
|
224
|
+
rejection_count: rejection_count.to_i,
|
|
225
|
+
error_count: error_count.to_i,
|
|
226
|
+
latest_recorded_at: parse_latest_recorded_at(latest_recorded_at)
|
|
227
|
+
}
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def parse_latest_recorded_at(value)
|
|
231
|
+
return nil unless value.present?
|
|
232
|
+
|
|
233
|
+
value.is_a?(Time) ? value : Time.parse(value.to_s)
|
|
234
|
+
rescue ArgumentError
|
|
235
|
+
nil
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def window_range
|
|
239
|
+
(current_time - window)..current_time
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
class << self
|
|
244
|
+
def parse_range(value, fallback: DEFAULT_RANGE)
|
|
245
|
+
range_key = value.to_s
|
|
246
|
+
RANGES.key?(range_key) ? range_key : fallback
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def range_duration(value, fallback: DEFAULT_RANGE)
|
|
250
|
+
RANGES.fetch(parse_range(value, fallback: fallback))
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def ratio(numerator, denominator)
|
|
254
|
+
return 0.0 if denominator.to_i.zero?
|
|
255
|
+
|
|
256
|
+
numerator.to_f / denominator.to_f
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def self.call(window:)
|
|
261
|
+
new.call(window: window)
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def call(window:)
|
|
265
|
+
current_time = Time.current
|
|
266
|
+
dashboard_response(window: window, current_time: current_time)
|
|
267
|
+
rescue => error
|
|
268
|
+
Rails.logger&.error("[SolidObserver] CableStats call failed: #{error.class} #{error.message}") if defined?(Rails)
|
|
269
|
+
error_response
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
private
|
|
273
|
+
|
|
274
|
+
def dashboard_response(window:, current_time:)
|
|
275
|
+
time_window = (current_time - window)..current_time
|
|
276
|
+
metric_rows = metric_rows(time_window: time_window)
|
|
277
|
+
totals = metric_totals(time_window: time_window)
|
|
278
|
+
backlog_snapshot = BacklogSnapshot.call
|
|
279
|
+
|
|
280
|
+
build_response(
|
|
281
|
+
totals: totals,
|
|
282
|
+
backlog_snapshot: backlog_snapshot,
|
|
283
|
+
dashboard_data: dashboard_data(
|
|
284
|
+
window: window,
|
|
285
|
+
current_time: current_time,
|
|
286
|
+
metric_rows: metric_rows,
|
|
287
|
+
totals: totals,
|
|
288
|
+
backlog_snapshot: backlog_snapshot
|
|
289
|
+
)
|
|
290
|
+
)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# :reek:FeatureEnvy
|
|
294
|
+
# :reek:LongParameterList
|
|
295
|
+
def build_response(totals:, backlog_snapshot:, dashboard_data:)
|
|
296
|
+
broadcasts_count = totals[:broadcasts_count]
|
|
297
|
+
rejections_count = totals[:rejections_count]
|
|
298
|
+
|
|
299
|
+
{
|
|
300
|
+
broadcasts_count: broadcasts_count,
|
|
301
|
+
transmissions_count: totals[:transmissions_count],
|
|
302
|
+
confirmations_count: totals[:confirmations_count],
|
|
303
|
+
rejections_count: rejections_count,
|
|
304
|
+
perform_actions_count: totals[:perform_actions_count],
|
|
305
|
+
errors_count: totals[:errors_count],
|
|
306
|
+
rejection_rate: self.class.ratio(rejections_count, broadcasts_count),
|
|
307
|
+
activity_trends: dashboard_data[:activity_trends],
|
|
308
|
+
stability: dashboard_data[:stability],
|
|
309
|
+
backlog_count: backlog_snapshot[:count],
|
|
310
|
+
backlog_available: backlog_snapshot[:available]
|
|
311
|
+
}
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
# :reek:LongParameterList
|
|
315
|
+
def dashboard_data(window:, current_time:, metric_rows:, totals:, backlog_snapshot:)
|
|
316
|
+
{
|
|
317
|
+
activity_trends: TrendData.new(
|
|
318
|
+
metric_rows: metric_rows,
|
|
319
|
+
window: window,
|
|
320
|
+
current_time: current_time
|
|
321
|
+
).to_h,
|
|
322
|
+
stability: StabilityData.new(
|
|
323
|
+
window: window,
|
|
324
|
+
current_time: current_time,
|
|
325
|
+
backlog_snapshot: backlog_snapshot
|
|
326
|
+
).to_h(
|
|
327
|
+
metric_broadcasts_count: totals[:broadcasts_count],
|
|
328
|
+
metric_rejections_count: totals[:rejections_count]
|
|
329
|
+
)
|
|
330
|
+
}
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def metric_rows(time_window:)
|
|
334
|
+
SolidObserver::CableMetric.where(period_start: time_window).pluck(
|
|
335
|
+
:period_start,
|
|
336
|
+
:broadcasts_count,
|
|
337
|
+
:transmissions_count,
|
|
338
|
+
:confirmations_count,
|
|
339
|
+
:rejections_count,
|
|
340
|
+
:perform_actions_count,
|
|
341
|
+
:errors_count
|
|
342
|
+
)
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def metric_totals(time_window:)
|
|
346
|
+
broadcasts_count, transmissions_count, confirmations_count, rejections_count, perform_actions_count, errors_count = SolidObserver::CableMetric.where(
|
|
347
|
+
period_start: time_window
|
|
348
|
+
).pick(
|
|
349
|
+
Arel.sql("COALESCE(SUM(broadcasts_count), 0)"),
|
|
350
|
+
Arel.sql("COALESCE(SUM(transmissions_count), 0)"),
|
|
351
|
+
Arel.sql("COALESCE(SUM(confirmations_count), 0)"),
|
|
352
|
+
Arel.sql("COALESCE(SUM(rejections_count), 0)"),
|
|
353
|
+
Arel.sql("COALESCE(SUM(perform_actions_count), 0)"),
|
|
354
|
+
Arel.sql("COALESCE(SUM(errors_count), 0)")
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
{
|
|
358
|
+
broadcasts_count: broadcasts_count,
|
|
359
|
+
transmissions_count: transmissions_count,
|
|
360
|
+
confirmations_count: confirmations_count,
|
|
361
|
+
rejections_count: rejections_count,
|
|
362
|
+
perform_actions_count: perform_actions_count,
|
|
363
|
+
errors_count: errors_count
|
|
364
|
+
}
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def error_response
|
|
368
|
+
{
|
|
369
|
+
broadcasts_count: 0,
|
|
370
|
+
transmissions_count: 0,
|
|
371
|
+
confirmations_count: 0,
|
|
372
|
+
rejections_count: 0,
|
|
373
|
+
perform_actions_count: 0,
|
|
374
|
+
errors_count: 0,
|
|
375
|
+
rejection_rate: 0.0,
|
|
376
|
+
activity_trends: ACTIVITY_TREND_EMPTY.dup,
|
|
377
|
+
stability: STABILITY_EMPTY.dup,
|
|
378
|
+
backlog_count: nil,
|
|
379
|
+
backlog_available: false,
|
|
380
|
+
error: "Service temporarily unavailable"
|
|
381
|
+
}
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
end
|