dead_bro 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +126 -0
- data/lib/dead_bro/cache_subscriber.rb +3 -3
- data/lib/dead_bro/client.rb +6 -6
- data/lib/dead_bro/collectors/database.rb +126 -0
- data/lib/dead_bro/collectors/filesystem.rb +94 -0
- data/lib/dead_bro/collectors/jobs.rb +403 -0
- data/lib/dead_bro/collectors/network.rb +252 -0
- data/lib/dead_bro/collectors/process_info.rb +178 -0
- data/lib/dead_bro/collectors/sample_store.rb +108 -0
- data/lib/dead_bro/collectors/system.rb +206 -0
- data/lib/dead_bro/collectors.rb +14 -0
- data/lib/dead_bro/configuration.rb +21 -17
- data/lib/dead_bro/error_middleware.rb +1 -11
- data/lib/dead_bro/http_instrumentation.rb +3 -3
- data/lib/dead_bro/job_sql_tracking_middleware.rb +2 -2
- data/lib/dead_bro/job_subscriber.rb +2 -12
- data/lib/dead_bro/monitor.rb +89 -0
- data/lib/dead_bro/railtie.rb +5 -6
- data/lib/dead_bro/redis_subscriber.rb +3 -3
- data/lib/dead_bro/sql_subscriber.rb +41 -39
- data/lib/dead_bro/sql_tracking_middleware.rb +1 -1
- data/lib/dead_bro/subscriber.rb +1 -9
- data/lib/dead_bro/version.rb +1 -1
- data/lib/dead_bro/view_rendering_subscriber.rb +3 -3
- data/lib/dead_bro.rb +11 -8
- metadata +10 -2
- data/lib/dead_bro/job_queue_monitor.rb +0 -395
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "rbconfig"
|
|
5
|
+
require "socket"
|
|
6
|
+
|
|
7
|
+
module DeadBro
|
|
8
|
+
module Collectors
|
|
9
|
+
# ProcessInfo collector exposes Ruby / Rails / process level metrics such as
|
|
10
|
+
# RSS, thread count, file descriptor count, GC stats and uptime.
|
|
11
|
+
#
|
|
12
|
+
# All methods are best-effort and will return nil on failure rather than
|
|
13
|
+
# raising exceptions.
|
|
14
|
+
module ProcessInfo
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def collect
|
|
18
|
+
now = Time.now.utc
|
|
19
|
+
|
|
20
|
+
{
|
|
21
|
+
pid: Process.pid,
|
|
22
|
+
hostname: safe_hostname,
|
|
23
|
+
boot_time: rails_boot_time,
|
|
24
|
+
uptime_s: uptime_seconds(now),
|
|
25
|
+
ruby_version: RUBY_VERSION,
|
|
26
|
+
rails_version: safe_rails_version,
|
|
27
|
+
app_env: DeadBro.env,
|
|
28
|
+
rss_bytes: rss_bytes,
|
|
29
|
+
thread_count: thread_count,
|
|
30
|
+
fd_count: fd_count,
|
|
31
|
+
gc: gc_stats
|
|
32
|
+
}
|
|
33
|
+
rescue => e
|
|
34
|
+
{
|
|
35
|
+
error_class: e.class.name,
|
|
36
|
+
error_message: e.message.to_s[0, 500]
|
|
37
|
+
}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def linux?
|
|
41
|
+
host_os = RbConfig::CONFIG["host_os"].to_s.downcase
|
|
42
|
+
host_os.include?("linux")
|
|
43
|
+
rescue
|
|
44
|
+
false
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def macos?
|
|
48
|
+
host_os = RbConfig::CONFIG["host_os"].to_s.downcase
|
|
49
|
+
host_os.include?("darwin")
|
|
50
|
+
rescue
|
|
51
|
+
false
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def safe_hostname
|
|
55
|
+
Socket.gethostname
|
|
56
|
+
rescue
|
|
57
|
+
"unknown"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def rails_boot_time
|
|
61
|
+
return nil unless defined?(Rails)
|
|
62
|
+
|
|
63
|
+
if Rails.respond_to?(:application) && Rails.application.respond_to?(:config)
|
|
64
|
+
# Rails does not expose boot time directly; approximate with process start
|
|
65
|
+
process_start_time
|
|
66
|
+
else
|
|
67
|
+
process_start_time
|
|
68
|
+
end
|
|
69
|
+
rescue
|
|
70
|
+
nil
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def process_start_time
|
|
74
|
+
@process_start_time ||= Time.now.utc
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def uptime_seconds(now = Time.now.utc)
|
|
78
|
+
(now.to_f - process_start_time.to_f).round(2)
|
|
79
|
+
rescue
|
|
80
|
+
nil
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def safe_rails_version
|
|
84
|
+
if defined?(Rails) && Rails.respond_to?(:version)
|
|
85
|
+
Rails.version
|
|
86
|
+
elsif defined?(Rails::VERSION) && Rails::VERSION::STRING
|
|
87
|
+
Rails::VERSION::STRING
|
|
88
|
+
end
|
|
89
|
+
rescue
|
|
90
|
+
nil
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def rss_bytes
|
|
94
|
+
if linux? && File.readable?("/proc/self/status")
|
|
95
|
+
parse_proc_status_for_rss("/proc/self/status")
|
|
96
|
+
else
|
|
97
|
+
rss_from_ps
|
|
98
|
+
end
|
|
99
|
+
rescue
|
|
100
|
+
nil
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def parse_proc_status_for_rss(path)
|
|
104
|
+
File.foreach(path) do |line|
|
|
105
|
+
next unless line.start_with?("VmRSS:")
|
|
106
|
+
|
|
107
|
+
parts = line.split
|
|
108
|
+
value_kb = begin
|
|
109
|
+
Integer(parts[1])
|
|
110
|
+
rescue
|
|
111
|
+
nil
|
|
112
|
+
end
|
|
113
|
+
return value_kb * 1024 if value_kb
|
|
114
|
+
end
|
|
115
|
+
nil
|
|
116
|
+
rescue
|
|
117
|
+
nil
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def rss_from_ps
|
|
121
|
+
rss_kb = `ps -o rss= -p #{Process.pid}`.to_i
|
|
122
|
+
return nil if rss_kb <= 0
|
|
123
|
+
|
|
124
|
+
rss_kb * 1024
|
|
125
|
+
rescue
|
|
126
|
+
nil
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def thread_count
|
|
130
|
+
if linux? && File.readable?("/proc/self/status")
|
|
131
|
+
File.foreach("/proc/self/status") do |line|
|
|
132
|
+
next unless line.start_with?("Threads:")
|
|
133
|
+
|
|
134
|
+
parts = line.split
|
|
135
|
+
begin
|
|
136
|
+
return Integer(parts[1])
|
|
137
|
+
rescue
|
|
138
|
+
nil
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
nil
|
|
142
|
+
else
|
|
143
|
+
Thread.list.size
|
|
144
|
+
end
|
|
145
|
+
rescue
|
|
146
|
+
nil
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def fd_count
|
|
150
|
+
if linux? && File.directory?("/proc/self/fd")
|
|
151
|
+
Dir.entries("/proc/self/fd").size - 2 # exclude . and ..
|
|
152
|
+
elsif macos? && File.directory?("/dev/fd")
|
|
153
|
+
Dir.entries("/dev/fd").size - 2
|
|
154
|
+
else
|
|
155
|
+
# Best-effort: count file descriptors under /proc when available
|
|
156
|
+
nil
|
|
157
|
+
end
|
|
158
|
+
rescue
|
|
159
|
+
nil
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def gc_stats
|
|
163
|
+
return {} unless defined?(GC) && GC.respond_to?(:stat)
|
|
164
|
+
|
|
165
|
+
stats = GC.stat
|
|
166
|
+
{
|
|
167
|
+
heap_live_slots: stats[:heap_live_slots],
|
|
168
|
+
heap_free_slots: stats[:heap_free_slots],
|
|
169
|
+
total_allocated_objects: stats[:total_allocated_objects],
|
|
170
|
+
major_gc_count: stats[:major_gc_count],
|
|
171
|
+
minor_gc_count: stats[:minor_gc_count]
|
|
172
|
+
}
|
|
173
|
+
rescue
|
|
174
|
+
{}
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "json"
|
|
5
|
+
require "digest"
|
|
6
|
+
|
|
7
|
+
module DeadBro
|
|
8
|
+
module Collectors
|
|
9
|
+
# SampleStore provides a tiny, best-effort persistence layer for
|
|
10
|
+
# time-series samples (CPU, network, etc.) between runs.
|
|
11
|
+
#
|
|
12
|
+
# It prefers Redis via Sidekiq when available and otherwise falls back
|
|
13
|
+
# to a JSON file in /tmp. All failures are swallowed and simply result
|
|
14
|
+
# in nil being returned from #load.
|
|
15
|
+
module SampleStore
|
|
16
|
+
module_function
|
|
17
|
+
|
|
18
|
+
def load(key)
|
|
19
|
+
load_from_redis(key) || load_from_file(key)
|
|
20
|
+
rescue
|
|
21
|
+
nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def save(key, data)
|
|
25
|
+
save_to_redis(key, data)
|
|
26
|
+
rescue
|
|
27
|
+
# If Redis is unavailable or fails, fall back to file-based storage
|
|
28
|
+
ensure
|
|
29
|
+
begin
|
|
30
|
+
save_to_file(key, data)
|
|
31
|
+
rescue
|
|
32
|
+
# Completely best-effort
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def load_from_redis(key)
|
|
37
|
+
return nil unless defined?(Sidekiq) && Sidekiq.respond_to?(:redis)
|
|
38
|
+
|
|
39
|
+
raw = Sidekiq.redis { |r| r.get(redis_key(key)) }
|
|
40
|
+
return nil unless raw
|
|
41
|
+
|
|
42
|
+
JSON.parse(raw)
|
|
43
|
+
rescue
|
|
44
|
+
nil
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def save_to_redis(key, data)
|
|
48
|
+
return unless defined?(Sidekiq) && Sidekiq.respond_to?(:redis)
|
|
49
|
+
|
|
50
|
+
Sidekiq.redis do |r|
|
|
51
|
+
r.set(redis_key(key), JSON.dump(data), ex: 300) # keep for 5 minutes
|
|
52
|
+
end
|
|
53
|
+
rescue
|
|
54
|
+
# Best-effort only
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def load_from_file(key)
|
|
58
|
+
path = file_path(key)
|
|
59
|
+
return nil unless File.file?(path)
|
|
60
|
+
|
|
61
|
+
File.open(path, "r") do |f|
|
|
62
|
+
JSON.parse(f.read)
|
|
63
|
+
end
|
|
64
|
+
rescue
|
|
65
|
+
nil
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def save_to_file(key, data)
|
|
69
|
+
path = file_path(key)
|
|
70
|
+
dir = File.dirname(path)
|
|
71
|
+
Dir.mkdir(dir) unless Dir.exist?(dir)
|
|
72
|
+
|
|
73
|
+
File.open(path, File::RDWR | File::CREAT, 0o600) do |f|
|
|
74
|
+
f.flock(File::LOCK_EX)
|
|
75
|
+
f.rewind
|
|
76
|
+
f.truncate(0)
|
|
77
|
+
f.write(JSON.dump(data))
|
|
78
|
+
ensure
|
|
79
|
+
begin
|
|
80
|
+
f.flock(File::LOCK_UN)
|
|
81
|
+
rescue
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
rescue
|
|
85
|
+
# Best-effort only
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def redis_key(key)
|
|
89
|
+
env = DeadBro.env
|
|
90
|
+
host = begin
|
|
91
|
+
require "socket"
|
|
92
|
+
Socket.gethostname
|
|
93
|
+
rescue
|
|
94
|
+
"unknown"
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
"dead_bro:metrics:#{env}:#{host}:#{key}"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def file_path(key)
|
|
101
|
+
digest = Digest::SHA256.hexdigest(key.to_s)[0, 16]
|
|
102
|
+
File.join(Dir.tmpdir, "dead_bro_metrics_#{digest}.json")
|
|
103
|
+
rescue
|
|
104
|
+
"/tmp/dead_bro_metrics_#{key.to_s.gsub(/[^a-zA-Z0-9]/, "_")}.json"
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "rbconfig"
|
|
5
|
+
|
|
6
|
+
module DeadBro
|
|
7
|
+
module Collectors
|
|
8
|
+
# System collector provides best-effort CPU and memory statistics
|
|
9
|
+
# using cgroups when available and falling back to /proc on Linux.
|
|
10
|
+
#
|
|
11
|
+
# CPU percentages are normalised to 0..100 across all cores. The first
|
|
12
|
+
# run may not contain a CPU percentage because there is no previous
|
|
13
|
+
# sample to diff against.
|
|
14
|
+
module System
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
CPU_SAMPLE_KEY = "cpu"
|
|
18
|
+
MEMINFO_PATH = "/proc/meminfo"
|
|
19
|
+
|
|
20
|
+
def collect
|
|
21
|
+
return {enabled: false} unless system_enabled?
|
|
22
|
+
|
|
23
|
+
{
|
|
24
|
+
cpu_pct: cpu_percentage,
|
|
25
|
+
mem_used_bytes: mem_used_bytes,
|
|
26
|
+
mem_total_bytes: mem_total_bytes,
|
|
27
|
+
mem_available_bytes: mem_available_bytes,
|
|
28
|
+
disk: Filesystem.collect
|
|
29
|
+
}
|
|
30
|
+
rescue => e
|
|
31
|
+
{
|
|
32
|
+
error_class: e.class.name,
|
|
33
|
+
error_message: e.message.to_s[0, 500]
|
|
34
|
+
}
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def system_enabled?
|
|
38
|
+
DeadBro.configuration.respond_to?(:enable_system_stats) &&
|
|
39
|
+
DeadBro.configuration.enable_system_stats
|
|
40
|
+
rescue
|
|
41
|
+
false
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def linux?
|
|
45
|
+
host_os = RbConfig::CONFIG["host_os"].to_s.downcase
|
|
46
|
+
host_os.include?("linux")
|
|
47
|
+
rescue
|
|
48
|
+
false
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def macos?
|
|
52
|
+
host_os = RbConfig::CONFIG["host_os"].to_s.downcase
|
|
53
|
+
host_os.include?("darwin")
|
|
54
|
+
rescue
|
|
55
|
+
false
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# CPU percentage normalised to 0..100
|
|
59
|
+
def cpu_percentage
|
|
60
|
+
if linux?
|
|
61
|
+
cpu_percentage_linux
|
|
62
|
+
elsif macos?
|
|
63
|
+
cpu_percentage_macos
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def cpu_percentage_linux
|
|
68
|
+
return nil unless File.readable?("/proc/stat")
|
|
69
|
+
|
|
70
|
+
now = current_time
|
|
71
|
+
current = read_proc_stat
|
|
72
|
+
prev = SampleStore.load(CPU_SAMPLE_KEY)
|
|
73
|
+
SampleStore.save(CPU_SAMPLE_KEY, {"timestamp" => now, "stat" => current})
|
|
74
|
+
|
|
75
|
+
return nil unless prev && prev["stat"].is_a?(Hash) && prev["timestamp"]
|
|
76
|
+
|
|
77
|
+
cpu_pct_from_samples(prev["stat"], current)
|
|
78
|
+
rescue
|
|
79
|
+
nil
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def cpu_percentage_macos
|
|
83
|
+
output = `top -l 1 -n 0 | grep "CPU usage"`
|
|
84
|
+
# Example: CPU usage: 9.38% user, 10.93% sys, 79.68% idle
|
|
85
|
+
if output =~ /([\d.]+)% idle/
|
|
86
|
+
idle = $1.to_f
|
|
87
|
+
(100.0 - idle).round(2)
|
|
88
|
+
end
|
|
89
|
+
rescue
|
|
90
|
+
nil
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def current_time
|
|
94
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
95
|
+
rescue
|
|
96
|
+
Time.now.to_f
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Parse the first "cpu" line from /proc/stat
|
|
100
|
+
def read_proc_stat
|
|
101
|
+
File.foreach("/proc/stat") do |line|
|
|
102
|
+
next unless line.start_with?("cpu ")
|
|
103
|
+
|
|
104
|
+
fields = line.split
|
|
105
|
+
# cpu user nice system idle iowait irq softirq steal guest guest_nice
|
|
106
|
+
values = fields[1..-1].map { |v| v.to_i }
|
|
107
|
+
total = values.sum
|
|
108
|
+
idle = values[3] + values[4] # idle + iowait
|
|
109
|
+
return {"total" => total, "idle" => idle}
|
|
110
|
+
end
|
|
111
|
+
{}
|
|
112
|
+
rescue
|
|
113
|
+
{}
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Computes a CPU percentage from two /proc/stat samples.
|
|
117
|
+
# This is intentionally public so it can be unit tested.
|
|
118
|
+
def cpu_pct_from_samples(prev, current)
|
|
119
|
+
prev_total = prev["total"].to_f
|
|
120
|
+
prev_idle = prev["idle"].to_f
|
|
121
|
+
cur_total = current["total"].to_f
|
|
122
|
+
cur_idle = current["idle"].to_f
|
|
123
|
+
|
|
124
|
+
total_delta = cur_total - prev_total
|
|
125
|
+
idle_delta = cur_idle - prev_idle
|
|
126
|
+
return nil if total_delta <= 0
|
|
127
|
+
|
|
128
|
+
usage = (total_delta - idle_delta) / total_delta.to_f
|
|
129
|
+
pct = (usage * 100.0)
|
|
130
|
+
return nil unless pct.finite?
|
|
131
|
+
|
|
132
|
+
pct.round(2)
|
|
133
|
+
rescue
|
|
134
|
+
nil
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def meminfo
|
|
138
|
+
return {} unless linux? && File.readable?(MEMINFO_PATH)
|
|
139
|
+
|
|
140
|
+
info = {}
|
|
141
|
+
File.foreach(MEMINFO_PATH) do |line|
|
|
142
|
+
key, value, _ = line.split
|
|
143
|
+
next unless key && value
|
|
144
|
+
|
|
145
|
+
key = key.sub(":", "")
|
|
146
|
+
info[key] = begin
|
|
147
|
+
Integer(value)
|
|
148
|
+
rescue
|
|
149
|
+
nil
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
info
|
|
153
|
+
rescue
|
|
154
|
+
{}
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def mem_total_bytes
|
|
158
|
+
if linux?
|
|
159
|
+
info = meminfo
|
|
160
|
+
total_kb = info["MemTotal"]
|
|
161
|
+
return nil unless total_kb
|
|
162
|
+
total_kb * 1024
|
|
163
|
+
elsif macos?
|
|
164
|
+
`sysctl -n hw.memsize`.to_i
|
|
165
|
+
end
|
|
166
|
+
rescue
|
|
167
|
+
nil
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def mem_available_bytes
|
|
171
|
+
if linux?
|
|
172
|
+
info = meminfo
|
|
173
|
+
avail_kb = info["MemAvailable"] || info["MemFree"]
|
|
174
|
+
return nil unless avail_kb
|
|
175
|
+
avail_kb * 1024
|
|
176
|
+
elsif macos?
|
|
177
|
+
# vm_stat output:
|
|
178
|
+
# Pages free: 3632.
|
|
179
|
+
# Pages active: 138466.
|
|
180
|
+
# Pages inactive: 134812.
|
|
181
|
+
# ...
|
|
182
|
+
output = `vm_stat`
|
|
183
|
+
pages_free = output[/Pages free:\s+(\d+)/, 1].to_i
|
|
184
|
+
pages_inactive = output[/Pages inactive:\s+(\d+)/, 1].to_i
|
|
185
|
+
|
|
186
|
+
# MacOS page size is typically 4096 bytes
|
|
187
|
+
(pages_free + pages_inactive) * 4096
|
|
188
|
+
else
|
|
189
|
+
nil
|
|
190
|
+
end
|
|
191
|
+
rescue
|
|
192
|
+
nil
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def mem_used_bytes
|
|
196
|
+
total = mem_total_bytes
|
|
197
|
+
avail = mem_available_bytes
|
|
198
|
+
return nil unless total && avail
|
|
199
|
+
|
|
200
|
+
total - avail
|
|
201
|
+
rescue
|
|
202
|
+
nil
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
module DeadBro
|
|
5
|
+
module Collectors
|
|
6
|
+
autoload :Jobs, "dead_bro/collectors/jobs"
|
|
7
|
+
autoload :Database, "dead_bro/collectors/database"
|
|
8
|
+
autoload :ProcessInfo, "dead_bro/collectors/process_info"
|
|
9
|
+
autoload :System, "dead_bro/collectors/system"
|
|
10
|
+
autoload :Filesystem, "dead_bro/collectors/filesystem"
|
|
11
|
+
autoload :Network, "dead_bro/collectors/network"
|
|
12
|
+
autoload :SampleStore, "dead_bro/collectors/sample_store"
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -2,11 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
module DeadBro
|
|
4
4
|
class Configuration
|
|
5
|
-
attr_accessor :api_key, :open_timeout, :read_timeout, :enabled, :ruby_dev, :memory_tracking_enabled,
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
attr_accessor :api_key, :open_timeout, :read_timeout, :enabled, :ruby_dev, :memory_tracking_enabled,
|
|
6
|
+
:allocation_tracking_enabled, :circuit_breaker_enabled, :circuit_breaker_failure_threshold, :circuit_breaker_recovery_timeout,
|
|
7
|
+
:circuit_breaker_retry_timeout, :sample_rate, :excluded_controllers, :excluded_jobs,
|
|
8
|
+
:exclusive_controllers, :exclusive_jobs, :deploy_id, :slow_query_threshold_ms, :explain_analyze_enabled,
|
|
9
|
+
:job_queue_monitoring_enabled, :enable_db_stats, :enable_process_stats, :enable_system_stats,
|
|
10
|
+
:disk_paths, :interfaces_ignore
|
|
10
11
|
|
|
11
12
|
def initialize
|
|
12
13
|
@api_key = nil
|
|
@@ -30,6 +31,11 @@ module DeadBro
|
|
|
30
31
|
@slow_query_threshold_ms = 500 # Default: 500ms
|
|
31
32
|
@explain_analyze_enabled = false # Enable EXPLAIN ANALYZE for slow queries by default
|
|
32
33
|
@job_queue_monitoring_enabled = false # Disabled by default
|
|
34
|
+
@enable_db_stats = false
|
|
35
|
+
@enable_process_stats = false
|
|
36
|
+
@enable_system_stats = false
|
|
37
|
+
@disk_paths = ["/"]
|
|
38
|
+
@interfaces_ignore = %w[lo lo0 docker0]
|
|
33
39
|
end
|
|
34
40
|
|
|
35
41
|
def resolve_deploy_id
|
|
@@ -38,7 +44,7 @@ module DeadBro
|
|
|
38
44
|
|
|
39
45
|
def excluded_controller?(controller_name, action_name = nil)
|
|
40
46
|
return false if @excluded_controllers.empty?
|
|
41
|
-
|
|
47
|
+
|
|
42
48
|
# If action_name is provided, check both controller#action patterns and controller-only patterns
|
|
43
49
|
if action_name
|
|
44
50
|
target = "#{controller_name}##{action_name}"
|
|
@@ -55,7 +61,7 @@ module DeadBro
|
|
|
55
61
|
end
|
|
56
62
|
return false
|
|
57
63
|
end
|
|
58
|
-
|
|
64
|
+
|
|
59
65
|
# When action_name is nil, only check controller-only patterns (no #)
|
|
60
66
|
controller_patterns = @excluded_controllers.reject { |pat| pat.to_s.include?("#") }
|
|
61
67
|
return false if controller_patterns.empty?
|
|
@@ -78,7 +84,6 @@ module DeadBro
|
|
|
78
84
|
@exclusive_controllers.any? { |pat| match_name_or_pattern?(target, pat) }
|
|
79
85
|
end
|
|
80
86
|
|
|
81
|
-
|
|
82
87
|
def should_sample?
|
|
83
88
|
sample_rate = resolve_sample_rate
|
|
84
89
|
return true if sample_rate >= 100
|
|
@@ -87,10 +92,10 @@ module DeadBro
|
|
|
87
92
|
# Generate random number 1-100 and check if it's within sample rate
|
|
88
93
|
rand(1..100) <= sample_rate
|
|
89
94
|
end
|
|
90
|
-
|
|
95
|
+
|
|
91
96
|
def resolve_sample_rate
|
|
92
97
|
return @sample_rate unless @sample_rate.nil?
|
|
93
|
-
|
|
98
|
+
|
|
94
99
|
if ENV["dead_bro_SAMPLE_RATE"]
|
|
95
100
|
env_value = ENV["dead_bro_SAMPLE_RATE"].to_s.strip
|
|
96
101
|
# Validate that it's a valid integer string
|
|
@@ -105,10 +110,10 @@ module DeadBro
|
|
|
105
110
|
100 # default
|
|
106
111
|
end
|
|
107
112
|
end
|
|
108
|
-
|
|
113
|
+
|
|
109
114
|
def resolve_api_key
|
|
110
115
|
return @api_key unless @api_key.nil?
|
|
111
|
-
|
|
116
|
+
|
|
112
117
|
ENV["DEAD_BRO_API_KEY"]
|
|
113
118
|
end
|
|
114
119
|
|
|
@@ -129,20 +134,19 @@ module DeadBro
|
|
|
129
134
|
return false if name.nil? || pattern.nil?
|
|
130
135
|
pat = pattern.to_s
|
|
131
136
|
return !!(name.to_s == pat) unless pat.include?("*")
|
|
132
|
-
|
|
137
|
+
|
|
133
138
|
# For controller action patterns (containing '#'), use .* to match any characters including colons
|
|
134
139
|
# For controller-only patterns, use [^:]* to match namespace segments
|
|
135
|
-
if pat.include?("#")
|
|
140
|
+
regex = if pat.include?("#")
|
|
136
141
|
# Controller action pattern: allow * to match any characters including colons
|
|
137
|
-
|
|
142
|
+
Regexp.new("^" + Regexp.escape(pat).gsub("\\*", ".*") + "$")
|
|
138
143
|
else
|
|
139
144
|
# Controller-only pattern: use [^:]* to match namespace segments
|
|
140
|
-
|
|
145
|
+
Regexp.new("^" + Regexp.escape(pat).gsub("\\*", "[^:]*") + "$")
|
|
141
146
|
end
|
|
142
147
|
!!(name.to_s =~ regex)
|
|
143
148
|
rescue
|
|
144
149
|
false
|
|
145
150
|
end
|
|
146
|
-
|
|
147
151
|
end
|
|
148
152
|
end
|
|
@@ -48,7 +48,7 @@ module DeadBro
|
|
|
48
48
|
referer: truncate(env["HTTP_REFERER"].to_s, 500),
|
|
49
49
|
host: env["HTTP_HOST"]
|
|
50
50
|
},
|
|
51
|
-
rails_env:
|
|
51
|
+
rails_env: DeadBro.env,
|
|
52
52
|
app: safe_app_name,
|
|
53
53
|
pid: Process.pid,
|
|
54
54
|
logs: DeadBro.logger.logs
|
|
@@ -87,16 +87,6 @@ module DeadBro
|
|
|
87
87
|
str[0..(max - 1)]
|
|
88
88
|
end
|
|
89
89
|
|
|
90
|
-
def safe_rails_env
|
|
91
|
-
if defined?(Rails) && Rails.respond_to?(:env)
|
|
92
|
-
Rails.env
|
|
93
|
-
else
|
|
94
|
-
ENV["RAILS_ENV"] || ENV["RACK_ENV"] || "development"
|
|
95
|
-
end
|
|
96
|
-
rescue
|
|
97
|
-
"development"
|
|
98
|
-
end
|
|
99
|
-
|
|
100
90
|
def safe_app_name
|
|
101
91
|
if defined?(Rails) && Rails.respond_to?(:application)
|
|
102
92
|
begin
|
|
@@ -116,17 +116,17 @@ module DeadBro
|
|
|
116
116
|
def self.should_continue_tracking?
|
|
117
117
|
events = Thread.current[THREAD_LOCAL_KEY]
|
|
118
118
|
return false unless events
|
|
119
|
-
|
|
119
|
+
|
|
120
120
|
# Check count limit
|
|
121
121
|
return false if events.length >= MAX_TRACKED_EVENTS
|
|
122
|
-
|
|
122
|
+
|
|
123
123
|
# Check time limit
|
|
124
124
|
start_time = Thread.current[DeadBro::TRACKING_START_TIME_KEY]
|
|
125
125
|
if start_time
|
|
126
126
|
elapsed_seconds = Time.now - start_time
|
|
127
127
|
return false if elapsed_seconds >= DeadBro::MAX_TRACKING_DURATION_SECONDS
|
|
128
128
|
end
|
|
129
|
-
|
|
129
|
+
|
|
130
130
|
true
|
|
131
131
|
end
|
|
132
132
|
end
|
|
@@ -7,10 +7,10 @@ module DeadBro
|
|
|
7
7
|
ActiveSupport::Notifications.subscribe("perform_start.active_job") do |name, started, finished, _unique_id, data|
|
|
8
8
|
# Clear logs for this job
|
|
9
9
|
DeadBro.logger.clear
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
# Set tracking start time once for all subscribers (before starting any tracking)
|
|
12
12
|
Thread.current[DeadBro::TRACKING_START_TIME_KEY] = Time.now
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
DeadBro::SqlSubscriber.start_request_tracking
|
|
15
15
|
|
|
16
16
|
# Start lightweight memory tracking for this job
|
|
@@ -75,7 +75,7 @@ module DeadBro
|
|
|
75
75
|
duration_ms: duration_ms,
|
|
76
76
|
status: "completed",
|
|
77
77
|
sql_queries: sql_queries,
|
|
78
|
-
rails_env:
|
|
78
|
+
rails_env: DeadBro.env,
|
|
79
79
|
host: safe_host,
|
|
80
80
|
memory_usage: memory_usage_mb,
|
|
81
81
|
gc_stats: gc_stats,
|
|
@@ -154,7 +154,7 @@ module DeadBro
|
|
|
154
154
|
exception_class: exception&.class&.name,
|
|
155
155
|
message: exception&.message&.to_s&.[](0, 1000),
|
|
156
156
|
backtrace: Array(exception&.backtrace).first(50),
|
|
157
|
-
rails_env:
|
|
157
|
+
rails_env: DeadBro.env,
|
|
158
158
|
host: safe_host,
|
|
159
159
|
memory_usage: memory_usage_mb,
|
|
160
160
|
gc_stats: gc_stats,
|
|
@@ -202,16 +202,6 @@ module DeadBro
|
|
|
202
202
|
[]
|
|
203
203
|
end
|
|
204
204
|
|
|
205
|
-
def self.safe_rails_env
|
|
206
|
-
if defined?(Rails) && Rails.respond_to?(:env)
|
|
207
|
-
Rails.env
|
|
208
|
-
else
|
|
209
|
-
ENV["RACK_ENV"] || ENV["RAILS_ENV"] || "development"
|
|
210
|
-
end
|
|
211
|
-
rescue
|
|
212
|
-
"development"
|
|
213
|
-
end
|
|
214
|
-
|
|
215
205
|
def self.safe_host
|
|
216
206
|
if defined?(Rails) && Rails.respond_to?(:application)
|
|
217
207
|
begin
|