dead_bro 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,403 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "time"
5
+
6
+ module DeadBro
7
+ module Collectors
8
+ # Jobs collector is responsible for building the unified "jobs" payload
9
+ # that the background job queue monitor sends once per interval.
10
+ #
11
+ # It encapsulates queue backend detection (Sidekiq, SolidQueue, etc.)
12
+ # and enriches the payload with optional database, process, and system
13
+ # statistics.
14
+ module Jobs
15
+ module_function
16
+
17
+ # Public entry point: returns a single Hash suitable for JSON encoding.
18
+ def collect
19
+ queue_system = detect_queue_system
20
+
21
+ payload = {
22
+ queue_system: queue_system
23
+ }
24
+
25
+ # Queue backend–specific stats
26
+ payload[:queue] = collect_sidekiq_stats if queue_system == :sidekiq
27
+ payload[:queue] = collect_solid_queue_stats if queue_system == :solid_queue
28
+ payload[:queue] = collect_delayed_job_stats if queue_system == :delayed_job
29
+ payload[:queue] = collect_good_job_stats if queue_system == :good_job
30
+
31
+ # Optional collectors moved to Monitor
32
+
33
+ payload
34
+ rescue => e
35
+ {
36
+ error_class: e.class.name,
37
+ error_message: e.message.to_s[0, 500]
38
+ }
39
+ end
40
+
41
+ # --- Queue system detection -------------------------------------------------
42
+
43
+ def detect_queue_system
44
+ return :sidekiq if defined?(Sidekiq)
45
+ return :solid_queue if defined?(SolidQueue)
46
+ return :delayed_job if defined?(Delayed::Job)
47
+ return :good_job if defined?(GoodJob)
48
+ :unknown
49
+ end
50
+
51
+ # --- Sidekiq collector ------------------------------------------------------
52
+
53
+ def collect_sidekiq_stats
54
+ return {} unless defined?(Sidekiq)
55
+
56
+ stats = {
57
+ processed: nil,
58
+ failed: nil,
59
+ enqueued: nil,
60
+ scheduled_size: nil,
61
+ retry_size: nil,
62
+ dead_size: nil,
63
+ workers_size: nil,
64
+ processes_size: nil,
65
+ memory_rss_bytes: nil,
66
+ queues: []
67
+ }
68
+
69
+ begin
70
+ sidekiq_stats = safe_sidekiq_stats
71
+ if sidekiq_stats
72
+ stats[:processed] = begin
73
+ sidekiq_stats.processed
74
+ rescue
75
+ nil
76
+ end
77
+ stats[:failed] = begin
78
+ sidekiq_stats.failed
79
+ rescue
80
+ nil
81
+ end
82
+ stats[:enqueued] = begin
83
+ sidekiq_stats.enqueued
84
+ rescue
85
+ nil
86
+ end
87
+ stats[:scheduled_size] = begin
88
+ sidekiq_stats.scheduled_size
89
+ rescue
90
+ nil
91
+ end
92
+ stats[:retry_size] = begin
93
+ sidekiq_stats.retry_size
94
+ rescue
95
+ nil
96
+ end
97
+ stats[:dead_size] = begin
98
+ sidekiq_stats.dead_size
99
+ rescue
100
+ nil
101
+ end
102
+ stats[:workers_size] = begin
103
+ sidekiq_stats.workers_size
104
+ rescue
105
+ nil
106
+ end
107
+ stats[:processes_size] = begin
108
+ sidekiq_stats.processes_size
109
+ rescue
110
+ nil
111
+ end
112
+ end
113
+
114
+ # Per-queue size and latency
115
+ queue_class = begin
116
+ Sidekiq.const_get(:Queue)
117
+ rescue
118
+ nil
119
+ end
120
+ if queue_class && queue_class.respond_to?(:all)
121
+ queue_class.all.each do |queue|
122
+ stats[:queues] << {
123
+ name: queue.name,
124
+ size: safe_integer(queue.size),
125
+ latency_s: safe_latency(queue)
126
+ }
127
+ end
128
+ end
129
+
130
+ # Process RSS at collection time (best-effort)
131
+ if DeadBro.configuration.respond_to?(:enable_process_stats) && DeadBro.configuration.enable_process_stats
132
+ stats[:memory_rss_bytes] = begin
133
+ ProcessInfo.rss_bytes
134
+ rescue
135
+ nil
136
+ end
137
+ end
138
+ rescue => e
139
+ stats[:error_class] = e.class.name
140
+ stats[:error_message] = e.message.to_s[0, 500]
141
+ end
142
+
143
+ stats
144
+ rescue => e
145
+ {
146
+ error_class: e.class.name,
147
+ error_message: e.message.to_s[0, 500]
148
+ }
149
+ end
150
+
151
+ def safe_sidekiq_stats
152
+ require "sidekiq/api"
153
+ Sidekiq::Stats.new
154
+ rescue LoadError, NameError, StandardError
155
+ nil
156
+ end
157
+
158
+ def safe_integer(value)
159
+ Integer(value)
160
+ rescue
161
+ nil
162
+ end
163
+
164
+ def safe_latency(queue)
165
+ latency = begin
166
+ queue.latency
167
+ rescue
168
+ nil
169
+ end
170
+ return nil unless latency
171
+
172
+ value = begin
173
+ Float(latency)
174
+ rescue
175
+ nil
176
+ end
177
+ return nil unless value && value.finite?
178
+
179
+ value
180
+ rescue
181
+ nil
182
+ end
183
+
184
+ # --- SolidQueue collector (database-backed) ---------------------------------
185
+
186
+ def collect_solid_queue_stats
187
+ return {} unless defined?(SolidQueue)
188
+ return {} unless defined?(ActiveRecord)
189
+ return {} unless ActiveRecord::Base.respond_to?(:connected?) && ActiveRecord::Base.connected?
190
+
191
+ stats = {total_queued: 0, total_busy: 0, queues: {}}
192
+
193
+ begin
194
+ conn = ActiveRecord::Base.connection
195
+ return stats unless conn.respond_to?(:table_exists?) && conn.table_exists?("solid_queue_jobs")
196
+
197
+ # queued jobs
198
+ result = conn.execute("SELECT queue_name, COUNT(*) as count FROM solid_queue_jobs WHERE finished_at IS NULL GROUP BY queue_name")
199
+ parse_query_result(result).each do |row|
200
+ queue_name = (row["queue_name"] || row[:queue_name] || "default").to_s
201
+ count = (row["count"] || row[:count] || 0).to_i
202
+ stats[:queues][queue_name] = {queued: count, busy: 0, scheduled: 0, retries: 0}
203
+ stats[:total_queued] += count
204
+ end
205
+
206
+ # busy jobs
207
+ result = conn.execute("SELECT queue_name, COUNT(*) as count FROM solid_queue_jobs WHERE finished_at IS NULL AND claimed_at IS NOT NULL GROUP BY queue_name")
208
+ parse_query_result(result).each do |row|
209
+ queue_name = (row["queue_name"] || row[:queue_name] || "default").to_s
210
+ count = (row["count"] || row[:count] || 0).to_i
211
+ stats[:queues][queue_name] ||= {queued: 0, busy: 0, scheduled: 0, retries: 0}
212
+ stats[:queues][queue_name][:busy] = count
213
+ stats[:total_busy] += count
214
+ end
215
+
216
+ # scheduled jobs
217
+ result = conn.execute("SELECT COUNT(*) as count FROM solid_queue_jobs WHERE scheduled_at > NOW()")
218
+ scheduled_count = parse_query_result(result).first
219
+ stats[:total_scheduled] = (scheduled_count&.dig("count") || scheduled_count&.dig(:count) || 0).to_i
220
+
221
+ # failed jobs
222
+ if conn.table_exists?("solid_queue_failed_jobs")
223
+ result = conn.execute("SELECT COUNT(*) as count FROM solid_queue_failed_jobs")
224
+ failed_count = parse_query_result(result).first
225
+ stats[:total_failed] = (failed_count&.dig("count") || failed_count&.dig(:count) || 0).to_i
226
+ end
227
+ rescue => e
228
+ stats[:error_class] = e.class.name
229
+ stats[:error_message] = e.message.to_s[0, 500]
230
+ end
231
+
232
+ stats
233
+ rescue => e
234
+ {
235
+ error_class: e.class.name,
236
+ error_message: e.message.to_s[0, 500]
237
+ }
238
+ end
239
+
240
+ # --- Delayed::Job collector -------------------------------------------------
241
+
242
+ def collect_delayed_job_stats
243
+ return {} unless defined?(Delayed::Job)
244
+ return {} unless defined?(ActiveRecord)
245
+
246
+ stats = {total_queued: 0, total_busy: 0, queues: {}}
247
+
248
+ begin
249
+ return stats unless ActiveRecord::Base.connection.table_exists?("delayed_jobs")
250
+
251
+ # queued jobs
252
+ queued = Delayed::Job.where("locked_at IS NULL AND attempts < max_attempts").count
253
+ stats[:total_queued] = queued
254
+ stats[:queues]["default"] = {queued: queued, busy: 0, scheduled: 0, retries: 0}
255
+
256
+ # busy jobs
257
+ busy = Delayed::Job.where("locked_at IS NOT NULL AND locked_by IS NOT NULL").count
258
+ stats[:total_busy] = busy
259
+ stats[:queues]["default"][:busy] = busy
260
+
261
+ # failed jobs
262
+ failed = Delayed::Job.where("attempts >= max_attempts").count
263
+ stats[:total_failed] = failed
264
+ rescue => e
265
+ stats[:error_class] = e.class.name
266
+ stats[:error_message] = e.message.to_s[0, 500]
267
+ end
268
+
269
+ stats
270
+ rescue => e
271
+ {
272
+ error_class: e.class.name,
273
+ error_message: e.message.to_s[0, 500]
274
+ }
275
+ end
276
+
277
+ # --- GoodJob collector ------------------------------------------------------
278
+
279
+ def collect_good_job_stats
280
+ return {} unless defined?(GoodJob)
281
+ return {} unless defined?(ActiveRecord)
282
+ return {} unless ActiveRecord::Base.respond_to?(:connected?) && ActiveRecord::Base.connected?
283
+
284
+ stats = {total_queued: 0, total_busy: 0, queues: {}}
285
+
286
+ begin
287
+ conn = ActiveRecord::Base.connection
288
+ return stats unless conn.respond_to?(:table_exists?) && conn.table_exists?("good_jobs")
289
+
290
+ # queued
291
+ result = conn.execute("SELECT queue_name, COUNT(*) as count FROM good_jobs WHERE finished_at IS NULL GROUP BY queue_name")
292
+ parse_query_result(result).each do |row|
293
+ queue_name = (row["queue_name"] || row[:queue_name] || "default").to_s
294
+ count = (row["count"] || row[:count] || 0).to_i
295
+ stats[:queues][queue_name] = {queued: count, busy: 0, scheduled: 0, retries: 0}
296
+ stats[:total_queued] += count
297
+ end
298
+
299
+ # busy
300
+ result = conn.execute("SELECT queue_name, COUNT(*) as count FROM good_jobs WHERE finished_at IS NULL AND performed_at IS NOT NULL GROUP BY queue_name")
301
+ parse_query_result(result).each do |row|
302
+ queue_name = (row["queue_name"] || row[:queue_name] || "default").to_s
303
+ count = (row["count"] || row[:count] || 0).to_i
304
+ stats[:queues][queue_name] ||= {queued: 0, busy: 0, scheduled: 0, retries: 0}
305
+ stats[:queues][queue_name][:busy] = count
306
+ stats[:total_busy] += count
307
+ end
308
+
309
+ # scheduled
310
+ result = conn.execute("SELECT COUNT(*) as count FROM good_jobs WHERE scheduled_at > NOW()")
311
+ scheduled_count = parse_query_result(result).first
312
+ stats[:total_scheduled] = (scheduled_count&.dig("count") || scheduled_count&.dig(:count) || 0).to_i
313
+
314
+ # failed
315
+ result = conn.execute("SELECT COUNT(*) as count FROM good_jobs WHERE finished_at IS NOT NULL AND error IS NOT NULL")
316
+ failed_count = parse_query_result(result).first
317
+ stats[:total_failed] = (failed_count&.dig("count") || failed_count&.dig(:count) || 0).to_i
318
+ rescue => e
319
+ stats[:error_class] = e.class.name
320
+ stats[:error_message] = e.message.to_s[0, 500]
321
+ end
322
+
323
+ stats
324
+ rescue => e
325
+ {
326
+ error_class: e.class.name,
327
+ error_message: e.message.to_s[0, 500]
328
+ }
329
+ end
330
+
331
+ # --- Shared helpers ---------------------------------------------------------
332
+
333
+ def parse_query_result(result)
334
+ if result.respond_to?(:each)
335
+ if result.respond_to?(:values)
336
+ begin
337
+ columns = begin
338
+ result.fields
339
+ rescue
340
+ result.column_names
341
+ end
342
+ rescue
343
+ []
344
+ end
345
+ result.values.map do |row|
346
+ columns.each_with_index.each_with_object({}) do |(col, idx), hash|
347
+ hash[col.to_s] = row[idx]
348
+ hash[col.to_sym] = row[idx]
349
+ end
350
+ end
351
+ elsif result.is_a?(Array)
352
+ result
353
+ else
354
+ result.to_a
355
+ end
356
+ else
357
+ []
358
+ end
359
+ rescue
360
+ []
361
+ end
362
+
363
+ def safe_app_name
364
+ if defined?(Rails) && Rails.respond_to?(:application) && Rails.application
365
+ begin
366
+ Rails.application.class.module_parent_name
367
+ rescue
368
+ Rails.application.class.name
369
+ end
370
+ end
371
+ rescue
372
+ nil
373
+ end
374
+
375
+ def process_hostname
376
+ if defined?(ProcessInfo)
377
+ begin
378
+ ProcessInfo.safe_hostname
379
+ rescue
380
+ default_hostname
381
+ end
382
+ else
383
+ default_hostname
384
+ end
385
+ rescue
386
+ default_hostname
387
+ end
388
+
389
+ def default_hostname
390
+ require "socket"
391
+ Socket.gethostname
392
+ rescue
393
+ "unknown"
394
+ end
395
+
396
+ def safe_collect
397
+ yield
398
+ rescue => e
399
+ {error_class: e.class.name, error_message: e.message.to_s[0, 500]}
400
+ end
401
+ end
402
+ end
403
+ end
@@ -0,0 +1,252 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "rbconfig"
5
+
6
+ module DeadBro
7
+ module Collectors
8
+ # Network collector exposes best-effort rx/tx byte counters and
9
+ # per-interval rates for Linux systems via /proc/net/dev.
10
+ module Network
11
+ module_function
12
+
13
+ SAMPLE_KEY = "network"
14
+
15
+ def collect
16
+ if linux? && File.readable?("/proc/net/dev")
17
+ current = read_interfaces_linux
18
+ elsif macos?
19
+ current = read_interfaces_macos
20
+ else
21
+ return {available: false}
22
+ end
23
+
24
+ return {available: false} if current.empty?
25
+
26
+ now = current_time
27
+ prev = SampleStore.load(SAMPLE_KEY)
28
+ SampleStore.save(SAMPLE_KEY, {"timestamp" => now, "interfaces" => current})
29
+
30
+ # Filter to keep only the top interface by total activity (rx + tx)
31
+ top_interface = current.max_by do |_, data|
32
+ (data["rx_bytes"] || 0) + (data["tx_bytes"] || 0)
33
+ end
34
+
35
+ # current is a Hash: { "eth0" => { ... }, ... }
36
+ # top_interface is an Array: ["eth0", { ... }] or nil
37
+
38
+ filtered_current = {}
39
+ filtered_current[top_interface[0]] = top_interface[1] if top_interface
40
+
41
+ # Save *all* current interfaces to store for continuity,
42
+ # but only report the top one to the backend.
43
+ # Actually, if we switch top interface, we need history for the new one.
44
+ # So we should save all, but only return one.
45
+
46
+ {
47
+ available: true,
48
+ interfaces: build_interface_stats(prev, filtered_current, now)
49
+ }
50
+ rescue => e
51
+ {
52
+ error_class: e.class.name,
53
+ error_message: e.message.to_s[0, 500]
54
+ }
55
+ end
56
+
57
+ def linux?
58
+ host_os = RbConfig::CONFIG["host_os"].to_s.downcase
59
+ host_os.include?("linux")
60
+ rescue
61
+ false
62
+ end
63
+
64
+ def macos?
65
+ host_os = RbConfig::CONFIG["host_os"].to_s.downcase
66
+ host_os.include?("darwin")
67
+ rescue
68
+ false
69
+ end
70
+
71
+ def current_time
72
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
73
+ rescue
74
+ Time.now.to_f
75
+ end
76
+
77
+ def ignore_interfaces
78
+ if DeadBro.configuration.respond_to?(:interfaces_ignore)
79
+ DeadBro.configuration.interfaces_ignore || default_ignore
80
+ else
81
+ default_ignore
82
+ end
83
+ rescue
84
+ default_ignore
85
+ end
86
+
87
+ def default_ignore
88
+ %w[lo lo0 docker0]
89
+ end
90
+
91
+ def read_interfaces_linux
92
+ ignored = ignore_interfaces
93
+ interfaces = {}
94
+
95
+ File.foreach("/proc/net/dev") do |line|
96
+ next unless line.include?(":")
97
+
98
+ name, data = line.split(":", 2)
99
+ name = name.strip
100
+ next if ignored.include?(name)
101
+
102
+ fields = data.split
103
+ # /proc/net/dev format:
104
+ # Inter-| Receive | Transmit
105
+ # face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed
106
+ rx_bytes = begin
107
+ Integer(fields[0])
108
+ rescue
109
+ nil
110
+ end
111
+ tx_bytes = begin
112
+ Integer(fields[8])
113
+ rescue
114
+ nil
115
+ end
116
+ next unless rx_bytes && tx_bytes
117
+
118
+ interfaces[name] = {
119
+ "rx_bytes" => rx_bytes,
120
+ "tx_bytes" => tx_bytes
121
+ }
122
+ end
123
+
124
+ interfaces
125
+ rescue
126
+ {}
127
+ end
128
+
129
+ def read_interfaces_macos
130
+ ignored = ignore_interfaces
131
+ interfaces = {}
132
+
133
+ # netstat -ib output format (simplified):
134
+ # Name Mtu Network Address Ipkts Ierrs Ibytes Opkts Oerrs Obytes Coll
135
+ # lo0 16384 <Link#1> 309756 0 49057632 309756 0 49057632 0
136
+ # en0 1500 <Link#4> 88:66:5a:00:22:11 2685232 0 3123456789 1501234 0 234567890 0
137
+
138
+ output = `netstat -ib`
139
+ output.each_line do |line|
140
+ fields = line.split
141
+ next if fields.size < 10 # heuristic check for header or malformed line
142
+
143
+ name = fields[0]
144
+ fields[2]
145
+ network = fields[2]
146
+
147
+ # We only care about lines with <Link#...> which contain the byte counters
148
+ next unless network && network.start_with?("<Link#")
149
+ next if ignored.include?(name)
150
+
151
+ # Header columns: Name(0) Mtu(1) Network(2) Address(3) Ipkts(4) Ierrs(5) Ibytes(6) Opkts(7) Oerrs(8) Obytes(9) Coll(10)
152
+ # Note: Address column might be missing if no MAC address (like lo0), checking field alignment
153
+ # netstat -ib alignment is tricky, sometimes space separated.
154
+ # Assuming standard output where <Link#..> is present:
155
+
156
+ # For <Link#...> lines:
157
+ # Name Mtu Network Address Ipkts Ierrs Ibytes ...
158
+ # en0 1500 <Link#4> 88:66:5a:00:22:11 ... ... bytes(6) ... bytes(9)
159
+ # lo0 16384 <Link#1> ... ... bytes(5?) -> No address column for lo0 link row?
160
+ # Let's re-verify netstat -ib output.
161
+ # Actually "Address" column exists for links, usually MAC address for en0, empty/implied for lo0?
162
+ # Wait, regex is safer.
163
+
164
+ # Try to identify based on identifying the Network column being <Link...>
165
+
166
+ # Usually:
167
+ # fields[0] = Name
168
+ # ...
169
+ # fields[2] = <Link#...>
170
+ # ...
171
+ # We need to find Ibytes and Obytes.
172
+ # If Address is present (MAC), Ibytes is at index 6, Obytes at 9.
173
+ # If Address is NOT present (?), indices shift?
174
+ # Actually netstat -ib usually aligns content.
175
+
176
+ # Let's count from the end?
177
+ # Typical line: en0 1500 <Link#4> 88:66:5a:... 2685232 0 3123456789 1501234 0 234567890 0
178
+ # fields: [en0, 1500, <Link#4>, MAC, Ipkts, Ierrs, Ibytes, Opkts, Oerrs, Obytes, Coll] -> 11 fields
179
+ # Ibytes = 6, Obytes = 9
180
+
181
+ # lo0 line: lo0 16384 <Link#1> 309756 0 49057632 309756 0 49057632 0
182
+ # fields: [lo0, 16384, <Link#1>, Ipkts, Ierrs, Ibytes, Opkts, Oerrs, Obytes, Coll] -> 10 fields? Address missing?
183
+ # Yes, lo0 often has no address in Link row.
184
+ # Ibytes = 5, Obytes = 8
185
+
186
+ # Logic:
187
+ # if fields[3] looks like a MAC address, use 6 and 9.
188
+ # else (assuming it's Ipkts), use 5 and 8.
189
+
190
+ # Actually, Ipkts is always an integer. MAC is xx:xx:xx...
191
+
192
+ idx_ibytes = 6
193
+ idx_obytes = 9
194
+
195
+ if /^\d+$/.match?(fields[3]) # Field 3 is Ipkts (integer) -> Address column missing
196
+ idx_ibytes = 5
197
+ idx_obytes = 8
198
+ end
199
+
200
+ rx_bytes = begin
201
+ Integer(fields[idx_ibytes])
202
+ rescue
203
+ nil
204
+ end
205
+ tx_bytes = begin
206
+ Integer(fields[idx_obytes])
207
+ rescue
208
+ nil
209
+ end
210
+
211
+ next unless rx_bytes && tx_bytes
212
+
213
+ interfaces[name] = {
214
+ "rx_bytes" => rx_bytes,
215
+ "tx_bytes" => tx_bytes
216
+ }
217
+ end
218
+
219
+ interfaces
220
+ rescue
221
+ {}
222
+ end
223
+
224
+ def build_interface_stats(prev, current, now)
225
+ prev_ts = prev && prev["timestamp"]
226
+ elapsed = prev_ts ? (now - prev_ts.to_f) : nil
227
+
228
+ current.map do |name, data|
229
+ prev_data = prev && prev["interfaces"] && prev["interfaces"][name]
230
+ rx_rate = tx_rate = nil
231
+
232
+ if elapsed && elapsed > 0 && prev_data
233
+ rx_delta = data["rx_bytes"] - prev_data["rx_bytes"].to_i
234
+ tx_delta = data["tx_bytes"] - prev_data["tx_bytes"].to_i
235
+ rx_rate = (rx_delta / elapsed.to_f).round(2) if rx_delta >= 0
236
+ tx_rate = (tx_delta / elapsed.to_f).round(2) if tx_delta >= 0
237
+ end
238
+
239
+ {
240
+ name: name,
241
+ rx_bytes: data["rx_bytes"],
242
+ tx_bytes: data["tx_bytes"],
243
+ rx_bytes_per_s: rx_rate,
244
+ tx_bytes_per_s: tx_rate
245
+ }
246
+ end
247
+ rescue
248
+ []
249
+ end
250
+ end
251
+ end
252
+ end