solid_observer 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +63 -0
- data/README.md +157 -28
- data/app/assets/javascripts/solid_observer/live_poll.js +376 -0
- data/app/controllers/concerns/solid_observer/paginatable.rb +17 -0
- data/app/controllers/concerns/solid_observer/require_persistence_mode.rb +19 -0
- data/app/controllers/concerns/solid_observer/require_solid_queue.rb +19 -0
- data/app/controllers/solid_observer/application_controller.rb +69 -0
- data/app/controllers/solid_observer/dashboard_controller.rb +79 -0
- data/app/controllers/solid_observer/events_controller.rb +50 -0
- data/app/controllers/solid_observer/jobs_controller.rb +85 -0
- data/app/controllers/solid_observer/storages_controller.rb +12 -0
- data/app/helpers/solid_observer/application_helper.rb +95 -0
- data/app/helpers/solid_observer/dashboard_helper.rb +39 -0
- data/app/models/solid_observer/queue_event.rb +134 -0
- data/app/models/solid_observer/queue_metric.rb +1 -1
- data/app/presenters/solid_observer/execution_presenter.rb +50 -0
- data/app/views/layouts/solid_observer/application.html.erb +470 -0
- data/app/views/solid_observer/dashboard/_chart.html.erb +28 -0
- data/app/views/solid_observer/dashboard/_live_state.html.erb +20 -0
- data/app/views/solid_observer/dashboard/_queue_table.html.erb +34 -0
- data/app/views/solid_observer/dashboard/_right_now.html.erb +3 -0
- data/app/views/solid_observer/dashboard/_throughput.html.erb +32 -0
- data/app/views/solid_observer/dashboard/index.html.erb +113 -0
- data/app/views/solid_observer/errors/storage_unavailable.html.erb +27 -0
- data/app/views/solid_observer/events/index.html.erb +53 -0
- data/app/views/solid_observer/events/show.html.erb +47 -0
- data/app/views/solid_observer/jobs/index.html.erb +61 -0
- data/app/views/solid_observer/jobs/show.html.erb +71 -0
- data/app/views/solid_observer/shared/_empty_state.html.erb +5 -0
- data/app/views/solid_observer/shared/_pagination.html.erb +17 -0
- data/app/views/solid_observer/shared/_stat_card.html.erb +9 -0
- data/app/views/solid_observer/storages/show.html.erb +39 -0
- data/bin/quality_gate +95 -0
- data/config/routes.rb +17 -0
- data/db/migrate/20260424000001_add_composite_indexes_to_queue_events.rb +30 -0
- data/lib/generators/solid_observer/install_generator.rb +12 -25
- data/lib/generators/solid_observer/templates/initializer.rb.tt +5 -6
- data/lib/solid_observer/base_metric.rb +1 -1
- data/lib/solid_observer/chart_buffer.rb +83 -0
- data/lib/solid_observer/cli/base.rb +2 -2
- data/lib/solid_observer/cli/jobs.rb +2 -2
- data/lib/solid_observer/cli/status.rb +20 -2
- data/lib/solid_observer/cli/storage.rb +41 -40
- data/lib/solid_observer/configuration.rb +47 -37
- data/lib/solid_observer/correlation_id_resolver.rb +8 -6
- data/lib/solid_observer/engine.rb +72 -17
- data/lib/solid_observer/params/events_filter.rb +37 -0
- data/lib/solid_observer/params/jobs_filter.rb +35 -0
- data/lib/solid_observer/queries/events_query.rb +27 -0
- data/lib/solid_observer/queries/execution_finder.rb +42 -0
- data/lib/solid_observer/queries/job_executions_query.rb +73 -0
- data/lib/solid_observer/queue_event_buffer.rb +163 -25
- data/lib/solid_observer/queue_stats.rb +165 -19
- data/lib/solid_observer/services/cleanup_storage.rb +58 -42
- data/lib/solid_observer/services/database_size.rb +86 -0
- data/lib/solid_observer/services/flush_event_buffer.rb +31 -15
- data/lib/solid_observer/services/install_migrations.rb +49 -0
- data/lib/solid_observer/services/record_event.rb +51 -14
- data/lib/solid_observer/services/ui_auth_check.rb +65 -0
- data/lib/solid_observer/subscriber.rb +15 -8
- data/lib/solid_observer/version.rb +1 -1
- data/lib/solid_observer.rb +7 -0
- data/lib/tasks/solid_observer.rake +10 -2
- metadata +55 -1
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidObserver
|
|
4
|
+
module Queries
|
|
5
|
+
class ExecutionFinder
|
|
6
|
+
EXECUTION_TYPES = [
|
|
7
|
+
"SolidQueue::ReadyExecution",
|
|
8
|
+
"SolidQueue::ScheduledExecution",
|
|
9
|
+
"SolidQueue::ClaimedExecution",
|
|
10
|
+
"SolidQueue::FailedExecution"
|
|
11
|
+
].freeze
|
|
12
|
+
|
|
13
|
+
STATUS_TO_EXECUTION_TYPE = {
|
|
14
|
+
"ready" => "SolidQueue::ReadyExecution",
|
|
15
|
+
"scheduled" => "SolidQueue::ScheduledExecution",
|
|
16
|
+
"claimed" => "SolidQueue::ClaimedExecution",
|
|
17
|
+
"failed" => "SolidQueue::FailedExecution"
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
20
|
+
def self.find_any(id)
|
|
21
|
+
EXECUTION_TYPES.each do |const_name|
|
|
22
|
+
execution = const_name.safe_constantize&.find_by(id: id)
|
|
23
|
+
return execution if execution
|
|
24
|
+
end
|
|
25
|
+
nil
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def self.find_by_status(id, status)
|
|
29
|
+
const_name = STATUS_TO_EXECUTION_TYPE[status.to_s.downcase]
|
|
30
|
+
return nil unless const_name
|
|
31
|
+
|
|
32
|
+
const_name.safe_constantize&.find_by(id: id)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.find_failed(id)
|
|
36
|
+
return nil unless defined?(SolidQueue::FailedExecution)
|
|
37
|
+
|
|
38
|
+
SolidQueue::FailedExecution.find_by(id: id)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SolidObserver
|
|
4
|
+
module Queries
|
|
5
|
+
class JobExecutionsQuery
|
|
6
|
+
ALL_ACTIVE_STATUSES = %w[ready scheduled claimed failed].freeze
|
|
7
|
+
|
|
8
|
+
STATUS_SCOPES = {
|
|
9
|
+
"ready" => -> { SolidQueue::ReadyExecution.all },
|
|
10
|
+
"scheduled" => -> { SolidQueue::ScheduledExecution.all },
|
|
11
|
+
"claimed" => -> { SolidQueue::ClaimedExecution.all },
|
|
12
|
+
"failed" => -> { SolidQueue::FailedExecution.all }
|
|
13
|
+
}.freeze
|
|
14
|
+
|
|
15
|
+
def initialize(filter)
|
|
16
|
+
@filter = filter
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def call
|
|
20
|
+
status = @filter.status
|
|
21
|
+
return all_active_executions if status == "all_active"
|
|
22
|
+
|
|
23
|
+
filtered_scope(status).order(created_at: :desc)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def all_active_executions
|
|
29
|
+
records = all_active_records.sort_by(&:created_at).reverse
|
|
30
|
+
preload_jobs(records)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def all_active_records
|
|
34
|
+
ALL_ACTIVE_STATUSES.flat_map do |status|
|
|
35
|
+
filtered_scope(status).order(created_at: :desc).limit(50).to_a
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def preload_jobs(records)
|
|
40
|
+
ActiveRecord::Associations::Preloader.new(records:, associations: :job).call
|
|
41
|
+
records
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def filtered_scope(status)
|
|
45
|
+
scope = status_scope(status)
|
|
46
|
+
scope = apply_queue_filter(scope, status)
|
|
47
|
+
apply_job_class_filter(scope)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def status_scope(status)
|
|
51
|
+
STATUS_SCOPES.fetch(status, STATUS_SCOPES["ready"]).call
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def apply_job_class_filter(scope)
|
|
55
|
+
job_class = @filter.job_class
|
|
56
|
+
return scope if job_class.blank?
|
|
57
|
+
|
|
58
|
+
scope.joins(:job).where(solid_queue_jobs: {class_name: job_class})
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def apply_queue_filter(scope, status)
|
|
62
|
+
queue_name = @filter.queue_name
|
|
63
|
+
return scope if queue_name.blank?
|
|
64
|
+
|
|
65
|
+
if %w[failed claimed].include?(status)
|
|
66
|
+
scope.joins(:job).where(solid_queue_jobs: {queue_name: queue_name})
|
|
67
|
+
else
|
|
68
|
+
scope.where(queue_name: queue_name)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "singleton"
|
|
4
|
+
require "concurrent/timer_task"
|
|
4
5
|
|
|
5
6
|
module SolidObserver
|
|
6
7
|
# Thread-safe buffer for collecting queue events before batch insertion.
|
|
@@ -14,10 +15,20 @@ module SolidObserver
|
|
|
14
15
|
class QueueEventBuffer
|
|
15
16
|
include Singleton
|
|
16
17
|
|
|
18
|
+
INITIAL_METRICS = {
|
|
19
|
+
flush_failures_count: 0,
|
|
20
|
+
drops_count: 0,
|
|
21
|
+
last_flush_at: nil,
|
|
22
|
+
last_flush_duration_ms: nil,
|
|
23
|
+
last_flush_error: nil
|
|
24
|
+
}.freeze
|
|
25
|
+
|
|
17
26
|
def initialize
|
|
18
27
|
@mutex = Mutex.new
|
|
28
|
+
@metrics_mutex = Mutex.new
|
|
19
29
|
@buffer = []
|
|
20
|
-
@
|
|
30
|
+
@metrics = INITIAL_METRICS.dup
|
|
31
|
+
@timer_task = nil
|
|
21
32
|
end
|
|
22
33
|
|
|
23
34
|
# Adds an event to the buffer and triggers flush if threshold reached.
|
|
@@ -25,17 +36,11 @@ module SolidObserver
|
|
|
25
36
|
# @param event_data [Hash] Event data to buffer
|
|
26
37
|
# @return [void]
|
|
27
38
|
def push(event_data)
|
|
28
|
-
config = SolidObserver.config
|
|
29
|
-
return unless config.persistence_mode?
|
|
30
|
-
|
|
31
|
-
should_flush = false
|
|
32
|
-
|
|
33
|
-
@mutex.synchronize do
|
|
34
|
-
@buffer << event_data
|
|
35
|
-
schedule_flush unless @flush_scheduled
|
|
36
|
-
should_flush = @buffer.size >= config.buffer_size
|
|
37
|
-
end
|
|
39
|
+
return unless (config = SolidObserver.config).persistence_mode?
|
|
38
40
|
|
|
41
|
+
drops_count, should_flush = sync_push_and_check(event_data, config)
|
|
42
|
+
record_drop(drops_count) if drops_count.positive?
|
|
43
|
+
ensure_timer_running
|
|
39
44
|
flush! if should_flush
|
|
40
45
|
end
|
|
41
46
|
|
|
@@ -51,10 +56,16 @@ module SolidObserver
|
|
|
51
56
|
@buffer.clear
|
|
52
57
|
end
|
|
53
58
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
59
|
+
started_at_ms = monotonic_ms
|
|
60
|
+
begin
|
|
61
|
+
Services::FlushEventBuffer.call(events_to_flush)
|
|
62
|
+
rescue => e
|
|
63
|
+
requeue_failed_events(events_to_flush)
|
|
64
|
+
record_flush_failure(e)
|
|
65
|
+
Rails.logger&.error "[SolidObserver] Buffer flush failed: #{e.message}" if defined?(Rails)
|
|
66
|
+
return
|
|
67
|
+
end
|
|
68
|
+
record_flush_success(monotonic_ms - started_at_ms)
|
|
58
69
|
end
|
|
59
70
|
|
|
60
71
|
def size
|
|
@@ -65,19 +76,146 @@ module SolidObserver
|
|
|
65
76
|
@mutex.synchronize { @buffer.clear }
|
|
66
77
|
end
|
|
67
78
|
|
|
79
|
+
def metrics
|
|
80
|
+
current_size = @mutex.synchronize { @buffer.size }
|
|
81
|
+
snapshot = @metrics_mutex.synchronize { @metrics.dup }
|
|
82
|
+
{
|
|
83
|
+
size: current_size,
|
|
84
|
+
max_buffer_size: SolidObserver.config.max_buffer_size
|
|
85
|
+
}.merge(snapshot)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def shutdown
|
|
89
|
+
stop_timer
|
|
90
|
+
flush!
|
|
91
|
+
end
|
|
92
|
+
|
|
68
93
|
private
|
|
69
94
|
|
|
70
|
-
def
|
|
71
|
-
@
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
95
|
+
def apply_overflow_policy(event_data, config)
|
|
96
|
+
if @buffer.size < config.max_buffer_size
|
|
97
|
+
@buffer << event_data
|
|
98
|
+
return 0
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
handle_overflow(event_data, config.buffer_overflow_strategy)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def handle_overflow(event_data, overflow_strategy)
|
|
105
|
+
case overflow_strategy
|
|
106
|
+
when :drop_old
|
|
107
|
+
@buffer.shift
|
|
108
|
+
@buffer << event_data
|
|
109
|
+
when :drop_new
|
|
110
|
+
# No-op: drop incoming event, keep oldest buffered events.
|
|
111
|
+
else
|
|
112
|
+
raise ArgumentError, "Unsupported buffer_overflow_strategy: #{overflow_strategy.inspect}"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
1
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def requeue_failed_events(events_to_flush)
|
|
119
|
+
return unless events_to_flush
|
|
120
|
+
|
|
121
|
+
dropped_count = sync_requeue_events(events_to_flush, SolidObserver.config.max_buffer_size)
|
|
122
|
+
record_drop(dropped_count) if dropped_count.positive?
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def trim_events_for_capacity(events, max_buffer_size)
|
|
126
|
+
dropped_count = events.size - max_buffer_size
|
|
127
|
+
return [events, 0] if dropped_count <= 0
|
|
128
|
+
|
|
129
|
+
[events_to_keep(events, max_buffer_size), dropped_count]
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def ensure_timer_running
|
|
133
|
+
timer_to_start, timer_to_stop = replace_timer_if_stopped
|
|
134
|
+
return unless timer_to_start
|
|
135
|
+
|
|
136
|
+
timer_to_stop&.shutdown
|
|
137
|
+
timer_to_start.execute
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def replace_timer_if_stopped
|
|
141
|
+
@mutex.synchronize do
|
|
142
|
+
current_timer_task = @timer_task
|
|
143
|
+
return [nil, nil] if timer_running?(current_timer_task)
|
|
144
|
+
|
|
145
|
+
[build_timer_task, current_timer_task]
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def stop_timer
|
|
150
|
+
timer_to_stop = @mutex.synchronize do
|
|
151
|
+
current_timer = @timer_task
|
|
152
|
+
@timer_task = nil
|
|
153
|
+
current_timer
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
timer_to_stop&.shutdown
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def record_flush_success(duration_ms)
|
|
160
|
+
@metrics_mutex.synchronize do
|
|
161
|
+
@metrics.merge!(
|
|
162
|
+
last_flush_at: Time.current,
|
|
163
|
+
last_flush_duration_ms: duration_ms,
|
|
164
|
+
last_flush_error: nil
|
|
165
|
+
)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def record_flush_failure(error)
|
|
170
|
+
@metrics_mutex.synchronize do
|
|
171
|
+
@metrics[:flush_failures_count] += 1
|
|
172
|
+
@metrics[:last_flush_error] = error.message
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def record_drop(count = 1)
|
|
177
|
+
@metrics_mutex.synchronize do
|
|
178
|
+
@metrics[:drops_count] += count
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def sync_push_and_check(event_data, config)
|
|
183
|
+
@mutex.synchronize do
|
|
184
|
+
drops_count = apply_overflow_policy(event_data, config)
|
|
185
|
+
[drops_count, @buffer.size >= config.buffer_size]
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def sync_requeue_events(events_to_flush, max_buffer_size)
|
|
190
|
+
@mutex.synchronize do
|
|
191
|
+
combined_events = events_to_flush + @buffer
|
|
192
|
+
kept_events, dropped_count = trim_events_for_capacity(combined_events, max_buffer_size)
|
|
193
|
+
@buffer.replace(kept_events)
|
|
194
|
+
dropped_count
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def events_to_keep(events, max_buffer_size)
|
|
199
|
+
if SolidObserver.config.buffer_overflow_strategy == :drop_old
|
|
200
|
+
events.last(max_buffer_size)
|
|
201
|
+
else
|
|
202
|
+
events.first(max_buffer_size)
|
|
78
203
|
end
|
|
79
|
-
|
|
80
|
-
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def timer_running?(timer_task)
|
|
207
|
+
timer_task && !timer_task.shuttingdown?
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def build_timer_task
|
|
211
|
+
@timer_task = Concurrent::TimerTask.new(
|
|
212
|
+
execution_interval: SolidObserver.config.flush_interval,
|
|
213
|
+
run_now: false
|
|
214
|
+
) { flush! }
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def monotonic_ms
|
|
218
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
|
|
81
219
|
end
|
|
82
220
|
end
|
|
83
221
|
end
|
|
@@ -1,20 +1,167 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "chart_buffer"
|
|
4
|
+
|
|
3
5
|
module SolidObserver
|
|
6
|
+
# :reek:TooManyMethods
|
|
4
7
|
class QueueStats
|
|
8
|
+
RANGES = {
|
|
9
|
+
"15m" => 15.minutes,
|
|
10
|
+
"30m" => 30.minutes,
|
|
11
|
+
"1h" => 1.hour,
|
|
12
|
+
"7h" => 7.hours,
|
|
13
|
+
"1d" => 1.day,
|
|
14
|
+
"7d" => 7.days,
|
|
15
|
+
"14d" => 14.days
|
|
16
|
+
}.freeze
|
|
17
|
+
DEFAULT_RANGE = "15m"
|
|
18
|
+
POLL_DEFAULT_RANGE = "15m"
|
|
19
|
+
POLL_EMPTY_SNAPSHOT = {
|
|
20
|
+
ready: 0,
|
|
21
|
+
scheduled: 0,
|
|
22
|
+
claimed: 0,
|
|
23
|
+
workers: 0,
|
|
24
|
+
failed: 0,
|
|
25
|
+
enqueue_rate_per_min: nil
|
|
26
|
+
}.freeze
|
|
27
|
+
TICK_EMPTY_SNAPSHOT = {
|
|
28
|
+
ready: 0,
|
|
29
|
+
scheduled: 0,
|
|
30
|
+
claimed: 0,
|
|
31
|
+
workers: 0,
|
|
32
|
+
failed: 0
|
|
33
|
+
}.freeze
|
|
34
|
+
BUCKET_RULES = [
|
|
35
|
+
[30.minutes.to_i, 30],
|
|
36
|
+
[2.hours.to_i, 60],
|
|
37
|
+
[1.day.to_i, 5.minutes.to_i]
|
|
38
|
+
].freeze
|
|
39
|
+
|
|
5
40
|
class << self
|
|
6
|
-
def snapshot
|
|
7
|
-
new.snapshot
|
|
41
|
+
def snapshot(range: DEFAULT_RANGE)
|
|
42
|
+
new.snapshot(range)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def snapshot_for_poll(range:)
|
|
46
|
+
new.snapshot_for_poll(parse_range(range, fallback: POLL_DEFAULT_RANGE))
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def snapshot_for_tick
|
|
50
|
+
new.snapshot_for_tick
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def chart_data(window: 15.minutes)
|
|
54
|
+
new.chart_data(window)
|
|
8
55
|
end
|
|
9
56
|
|
|
10
57
|
def solid_queue_available?
|
|
11
58
|
!!(defined?(SolidQueue) && defined?(SolidQueue::Job))
|
|
12
59
|
end
|
|
60
|
+
|
|
61
|
+
def parse_range(value, fallback: DEFAULT_RANGE)
|
|
62
|
+
range_key = value.to_s
|
|
63
|
+
RANGES.key?(range_key) ? range_key : fallback
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def range_duration(value, fallback: DEFAULT_RANGE)
|
|
67
|
+
RANGES.fetch(parse_range(value, fallback: fallback))
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def snapshot(range = DEFAULT_RANGE)
|
|
72
|
+
klass = self.class
|
|
73
|
+
return snapshot_for_mode(range && klass.parse_range(range)) if klass.solid_queue_available?
|
|
74
|
+
|
|
75
|
+
error_response("SolidQueue not available")
|
|
76
|
+
rescue => e
|
|
77
|
+
error_response(e.message)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# :reek:TooManyStatements
|
|
81
|
+
def snapshot_for_poll(range)
|
|
82
|
+
empty_snapshot = POLL_EMPTY_SNAPSHOT.dup
|
|
83
|
+
klass = self.class
|
|
84
|
+
return empty_snapshot unless klass.solid_queue_available?
|
|
85
|
+
|
|
86
|
+
window = klass.range_duration(range, fallback: POLL_DEFAULT_RANGE)
|
|
87
|
+
persistence = SolidObserver.config.persistence_mode?
|
|
88
|
+
base = {
|
|
89
|
+
ready: ready_count,
|
|
90
|
+
scheduled: scheduled_count,
|
|
91
|
+
claimed: claimed_count,
|
|
92
|
+
workers: active_workers_count,
|
|
93
|
+
failed: failed_count,
|
|
94
|
+
enqueue_rate_per_min: persistence ? QueueEvent.enqueue_rate_per_minute(window: window) : nil
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if persistence
|
|
98
|
+
base.merge!(throughput_stats(range))
|
|
99
|
+
base[:queues] = queue_depths
|
|
100
|
+
base[:performed_by_queue] = QueueEvent.count_by_queue_and_event_type(window: window, event_type: "job_completed")
|
|
101
|
+
base[:failed_by_queue] = QueueEvent.count_by_queue_and_event_type(window: window, event_type: "job_failed")
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
base
|
|
105
|
+
rescue
|
|
106
|
+
empty_snapshot
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def snapshot_for_tick
|
|
110
|
+
return TICK_EMPTY_SNAPSHOT unless self.class.solid_queue_available?
|
|
111
|
+
|
|
112
|
+
{
|
|
113
|
+
ready: ready_count,
|
|
114
|
+
scheduled: scheduled_count,
|
|
115
|
+
claimed: claimed_count,
|
|
116
|
+
workers: active_workers_count,
|
|
117
|
+
failed: failed_count
|
|
118
|
+
}
|
|
119
|
+
rescue
|
|
120
|
+
TICK_EMPTY_SNAPSHOT
|
|
13
121
|
end
|
|
14
122
|
|
|
15
|
-
|
|
16
|
-
|
|
123
|
+
# :reek:TooManyStatements
|
|
124
|
+
def chart_data(window)
|
|
125
|
+
seconds = window.to_i
|
|
126
|
+
ready = ChartBuffer.recent(seconds)
|
|
127
|
+
return {performed: [], failed: [], ready: ready} unless SolidObserver.config.persistence_mode?
|
|
17
128
|
|
|
129
|
+
bucket_seconds = derive_bucket_seconds(window)
|
|
130
|
+
{
|
|
131
|
+
performed: QueueEvent.count_by_time_bucket(
|
|
132
|
+
event_type: "job_completed",
|
|
133
|
+
window: window,
|
|
134
|
+
bucket_seconds: bucket_seconds
|
|
135
|
+
),
|
|
136
|
+
failed: QueueEvent.count_by_time_bucket(
|
|
137
|
+
event_type: "job_failed",
|
|
138
|
+
window: window,
|
|
139
|
+
bucket_seconds: bucket_seconds
|
|
140
|
+
),
|
|
141
|
+
ready: ready
|
|
142
|
+
}
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
private
|
|
146
|
+
|
|
147
|
+
def derive_bucket_seconds(window)
|
|
148
|
+
seconds = window.to_i
|
|
149
|
+
BUCKET_RULES.find { |limit, _bucket| seconds <= limit }&.last || 30.minutes.to_i
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def snapshot_for_mode(range_key)
|
|
153
|
+
base = snapshot_base
|
|
154
|
+
return base unless SolidObserver.config.persistence_mode? && range_key
|
|
155
|
+
|
|
156
|
+
duration = self.class.range_duration(range_key)
|
|
157
|
+
base.merge(throughput_stats(range_key)).merge(
|
|
158
|
+
range: range_key,
|
|
159
|
+
performed_by_queue: QueueEvent.count_by_queue_and_event_type(window: duration, event_type: "job_completed"),
|
|
160
|
+
failed_by_queue: QueueEvent.count_by_queue_and_event_type(window: duration, event_type: "job_failed")
|
|
161
|
+
)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def snapshot_base
|
|
18
165
|
{
|
|
19
166
|
ready: ready_count,
|
|
20
167
|
scheduled: scheduled_count,
|
|
@@ -24,26 +171,24 @@ module SolidObserver
|
|
|
24
171
|
queues: queue_depths,
|
|
25
172
|
available: true
|
|
26
173
|
}
|
|
27
|
-
rescue => e
|
|
28
|
-
error_response(e)
|
|
29
174
|
end
|
|
30
175
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def unavailable_response
|
|
176
|
+
def throughput_stats(range_key)
|
|
177
|
+
duration = self.class.range_duration(range_key)
|
|
34
178
|
{
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
179
|
+
performed_in_range: QueueEvent.performed_count_last(duration),
|
|
180
|
+
failed_in_range: QueueEvent.failed_count_last(duration),
|
|
181
|
+
enqueued_in_range: QueueEvent.enqueued_count_last(duration),
|
|
182
|
+
avg_duration_in_range: QueueEvent.avg_duration_last(duration),
|
|
183
|
+
# Stability indicator still uses dedicated rolling windows independent of selected range.
|
|
184
|
+
failed_last_24h: QueueEvent.failed_count_last(24.hours),
|
|
185
|
+
failed_last_hour: QueueEvent.failed_count_last(1.hour),
|
|
186
|
+
latest_failure_at: QueueEvent.recent_failures(1).first&.recorded_at,
|
|
187
|
+
enqueue_rate_per_min: QueueEvent.enqueue_rate_per_minute(window: duration)
|
|
43
188
|
}
|
|
44
189
|
end
|
|
45
190
|
|
|
46
|
-
def error_response(
|
|
191
|
+
def error_response(message)
|
|
47
192
|
{
|
|
48
193
|
ready: 0,
|
|
49
194
|
scheduled: 0,
|
|
@@ -52,7 +197,8 @@ module SolidObserver
|
|
|
52
197
|
workers: 0,
|
|
53
198
|
queues: {},
|
|
54
199
|
available: false,
|
|
55
|
-
|
|
200
|
+
range: DEFAULT_RANGE,
|
|
201
|
+
error: message
|
|
56
202
|
}
|
|
57
203
|
end
|
|
58
204
|
|