dead_bro 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +42 -43
- data/lib/dead_bro/circuit_breaker.rb +58 -38
- data/lib/dead_bro/client.rb +131 -130
- data/lib/dead_bro/configuration.rb +155 -81
- data/lib/dead_bro/dispatcher.rb +130 -0
- data/lib/dead_bro/error_middleware.rb +1 -1
- data/lib/dead_bro/job_subscriber.rb +36 -13
- data/lib/dead_bro/lightweight_memory_tracker.rb +5 -7
- data/lib/dead_bro/logger.rb +30 -11
- data/lib/dead_bro/memory_details.rb +71 -0
- data/lib/dead_bro/memory_helpers.rb +62 -0
- data/lib/dead_bro/memory_leak_detector.rb +178 -158
- data/lib/dead_bro/memory_tracking_subscriber.rb +12 -36
- data/lib/dead_bro/monitor.rb +18 -5
- data/lib/dead_bro/railtie.rb +6 -6
- data/lib/dead_bro/redis_subscriber.rb +2 -2
- data/lib/dead_bro/sql_subscriber.rb +104 -71
- data/lib/dead_bro/subscriber.rb +41 -17
- data/lib/dead_bro/version.rb +1 -1
- data/lib/dead_bro.rb +87 -96
- metadata +4 -2
|
@@ -2,42 +2,125 @@
|
|
|
2
2
|
|
|
3
3
|
module DeadBro
|
|
4
4
|
class Configuration
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
# Local-only settings (not overwritten by API `settings` payloads).
|
|
6
|
+
# Note: `enabled` may still be updated remotely via apply_remote_settings when the backend
|
|
7
|
+
# returns it in a response; local configure() values apply until the next remote update.
|
|
8
|
+
attr_accessor :api_key, :open_timeout, :read_timeout, :enabled, :ruby_dev,
|
|
9
|
+
:circuit_breaker_enabled, :circuit_breaker_failure_threshold, :circuit_breaker_recovery_timeout,
|
|
10
|
+
:circuit_breaker_retry_timeout, :deploy_id, :disk_paths, :interfaces_ignore
|
|
11
|
+
|
|
12
|
+
# Remote-managed settings (overwritten by backend JSON `settings` on successful API responses)
|
|
13
|
+
attr_accessor :memory_tracking_enabled, :allocation_tracking_enabled,
|
|
14
|
+
:sample_rate, :slow_query_threshold_ms, :explain_analyze_enabled,
|
|
9
15
|
:job_queue_monitoring_enabled, :enable_db_stats, :enable_process_stats, :enable_system_stats,
|
|
10
|
-
:
|
|
16
|
+
:max_sql_queries_to_send, :max_logs_to_send
|
|
17
|
+
|
|
18
|
+
# Readers for exclusion lists. Writers are defined below so we can compile
|
|
19
|
+
# and cache the regex form once, instead of rebuilding it per request.
|
|
20
|
+
attr_reader :excluded_controllers, :excluded_jobs, :exclusive_controllers, :exclusive_jobs
|
|
21
|
+
|
|
22
|
+
# Tracks when we last received settings from the backend (in-memory only)
|
|
23
|
+
attr_accessor :settings_received_at
|
|
24
|
+
|
|
25
|
+
# Last successful heartbeat HTTP response time while disabled (in-memory only)
|
|
26
|
+
attr_accessor :last_heartbeat_at
|
|
27
|
+
|
|
28
|
+
# Throttles heartbeat attempts to HEARTBEAT_INTERVAL (set when a heartbeat request is started)
|
|
29
|
+
attr_accessor :last_heartbeat_attempt_at
|
|
30
|
+
|
|
31
|
+
HEARTBEAT_INTERVAL = 60 # seconds
|
|
32
|
+
|
|
33
|
+
REMOTE_SETTING_KEYS = %w[
|
|
34
|
+
enabled sample_rate memory_tracking_enabled allocation_tracking_enabled
|
|
35
|
+
explain_analyze_enabled slow_query_threshold_ms max_sql_queries_to_send max_logs_to_send
|
|
36
|
+
excluded_controllers excluded_jobs exclusive_controllers exclusive_jobs
|
|
37
|
+
job_queue_monitoring_enabled enable_db_stats enable_process_stats enable_system_stats
|
|
38
|
+
].freeze
|
|
11
39
|
|
|
12
40
|
def initialize
|
|
13
41
|
@api_key = nil
|
|
14
|
-
@endpoint_url = nil
|
|
15
42
|
@open_timeout = 1.0
|
|
16
43
|
@read_timeout = 1.0
|
|
17
44
|
@enabled = true
|
|
18
45
|
@ruby_dev = false
|
|
19
|
-
@memory_tracking_enabled = true
|
|
20
|
-
@allocation_tracking_enabled = false # Disabled by default for performance
|
|
21
46
|
@circuit_breaker_enabled = true
|
|
22
47
|
@circuit_breaker_failure_threshold = 3
|
|
23
|
-
@circuit_breaker_recovery_timeout = 60
|
|
24
|
-
@circuit_breaker_retry_timeout = 300
|
|
25
|
-
@sample_rate = 100
|
|
26
|
-
@excluded_controllers = []
|
|
27
|
-
@excluded_jobs = []
|
|
28
|
-
@exclusive_controllers = []
|
|
29
|
-
@exclusive_jobs = []
|
|
48
|
+
@circuit_breaker_recovery_timeout = 60
|
|
49
|
+
@circuit_breaker_retry_timeout = 300
|
|
30
50
|
@deploy_id = resolve_deploy_id
|
|
31
|
-
@slow_query_threshold_ms = 500 # Default: 500ms
|
|
32
|
-
@explain_analyze_enabled = false # Enable EXPLAIN ANALYZE for slow queries by default
|
|
33
|
-
@job_queue_monitoring_enabled = false # Disabled by default
|
|
34
|
-
@enable_db_stats = false
|
|
35
|
-
@enable_process_stats = false
|
|
36
|
-
@enable_system_stats = false
|
|
37
51
|
@disk_paths = ["/"]
|
|
38
52
|
@interfaces_ignore = %w[lo lo0 docker0]
|
|
39
|
-
|
|
53
|
+
|
|
54
|
+
# Remote-managed defaults (used until backend sends real values)
|
|
55
|
+
@sample_rate = 100
|
|
56
|
+
@memory_tracking_enabled = true
|
|
57
|
+
@allocation_tracking_enabled = false
|
|
58
|
+
@explain_analyze_enabled = false
|
|
59
|
+
@slow_query_threshold_ms = 500
|
|
60
|
+
@max_sql_queries_to_send = 500
|
|
40
61
|
@max_logs_to_send = 100
|
|
62
|
+
self.excluded_controllers = []
|
|
63
|
+
self.excluded_jobs = []
|
|
64
|
+
self.exclusive_controllers = []
|
|
65
|
+
self.exclusive_jobs = []
|
|
66
|
+
@job_queue_monitoring_enabled = false
|
|
67
|
+
@enable_db_stats = false
|
|
68
|
+
@enable_process_stats = false
|
|
69
|
+
@enable_system_stats = false
|
|
70
|
+
|
|
71
|
+
@settings_received_at = nil
|
|
72
|
+
@last_heartbeat_at = nil
|
|
73
|
+
@last_heartbeat_attempt_at = nil
|
|
74
|
+
@settings_mutex = Mutex.new
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def excluded_controllers=(value)
|
|
78
|
+
@excluded_controllers = Array(value).map(&:to_s)
|
|
79
|
+
@compiled_excluded_controllers = compile_patterns(@excluded_controllers)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def excluded_jobs=(value)
|
|
83
|
+
@excluded_jobs = Array(value).map(&:to_s)
|
|
84
|
+
@compiled_excluded_jobs = compile_patterns(@excluded_jobs)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def exclusive_controllers=(value)
|
|
88
|
+
@exclusive_controllers = Array(value).map(&:to_s)
|
|
89
|
+
@compiled_exclusive_controllers = compile_patterns(@exclusive_controllers)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def exclusive_jobs=(value)
|
|
93
|
+
@exclusive_jobs = Array(value).map(&:to_s)
|
|
94
|
+
@compiled_exclusive_jobs = compile_patterns(@exclusive_jobs)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Apply a settings hash received from the backend response.
|
|
98
|
+
# Only known keys are applied; unknown keys are silently ignored.
|
|
99
|
+
# Serialized so concurrent HTTP threads do not interleave writes with request-thread reads.
|
|
100
|
+
def apply_remote_settings(hash)
|
|
101
|
+
return unless hash.is_a?(Hash)
|
|
102
|
+
|
|
103
|
+
@settings_mutex.synchronize do
|
|
104
|
+
hash.each do |key, value|
|
|
105
|
+
k = key.to_s
|
|
106
|
+
next unless REMOTE_SETTING_KEYS.include?(k)
|
|
107
|
+
|
|
108
|
+
case k
|
|
109
|
+
when "sample_rate", "slow_query_threshold_ms", "max_sql_queries_to_send", "max_logs_to_send"
|
|
110
|
+
send(:"#{k}=", value.to_i)
|
|
111
|
+
when "enabled", "memory_tracking_enabled", "allocation_tracking_enabled", "explain_analyze_enabled",
|
|
112
|
+
"job_queue_monitoring_enabled", "enable_db_stats", "enable_process_stats", "enable_system_stats"
|
|
113
|
+
send(:"#{k}=", !!value)
|
|
114
|
+
when "excluded_controllers", "excluded_jobs", "exclusive_controllers", "exclusive_jobs"
|
|
115
|
+
send(:"#{k}=", Array(value).map(&:to_s))
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def heartbeat_due?
|
|
122
|
+
return false if api_key.nil?
|
|
123
|
+
last_heartbeat_attempt_at.nil? || (Time.now.utc - last_heartbeat_attempt_at) >= HEARTBEAT_INTERVAL
|
|
41
124
|
end
|
|
42
125
|
|
|
43
126
|
def resolve_deploy_id
|
|
@@ -45,49 +128,51 @@ module DeadBro
|
|
|
45
128
|
end
|
|
46
129
|
|
|
47
130
|
def excluded_controller?(controller_name, action_name = nil)
|
|
48
|
-
|
|
131
|
+
compiled = @compiled_excluded_controllers
|
|
132
|
+
return false if compiled.nil? || compiled.empty?
|
|
49
133
|
|
|
50
|
-
# If action_name is provided, check both controller#action patterns and controller-only patterns
|
|
51
134
|
if action_name
|
|
52
135
|
target = "#{controller_name}##{action_name}"
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
# If the controller itself is excluded, all its actions are excluded
|
|
60
|
-
controller_patterns = @excluded_controllers.reject { |pat| pat.to_s.include?("#") }
|
|
61
|
-
if controller_patterns.any? { |pat| match_name_or_pattern?(controller_name, pat) }
|
|
62
|
-
return true
|
|
136
|
+
compiled.each do |entry|
|
|
137
|
+
if entry[:has_hash]
|
|
138
|
+
return true if match_compiled?(target, entry)
|
|
139
|
+
elsif match_compiled?(controller_name, entry)
|
|
140
|
+
return true
|
|
141
|
+
end
|
|
63
142
|
end
|
|
64
143
|
return false
|
|
65
144
|
end
|
|
66
145
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
146
|
+
compiled.each do |entry|
|
|
147
|
+
next if entry[:has_hash]
|
|
148
|
+
return true if match_compiled?(controller_name, entry)
|
|
149
|
+
end
|
|
150
|
+
false
|
|
71
151
|
end
|
|
72
152
|
|
|
73
153
|
def excluded_job?(job_class_name)
|
|
74
|
-
|
|
75
|
-
|
|
154
|
+
compiled = @compiled_excluded_jobs
|
|
155
|
+
return false if compiled.nil? || compiled.empty?
|
|
156
|
+
compiled.any? { |entry| match_compiled?(job_class_name, entry) }
|
|
76
157
|
end
|
|
77
158
|
|
|
78
159
|
def exclusive_job?(job_class_name)
|
|
79
|
-
|
|
80
|
-
|
|
160
|
+
compiled = @compiled_exclusive_jobs
|
|
161
|
+
return true if compiled.nil? || compiled.empty?
|
|
162
|
+
compiled.any? { |entry| match_compiled?(job_class_name, entry) }
|
|
81
163
|
end
|
|
82
164
|
|
|
83
165
|
def exclusive_controller?(controller_name, action_name)
|
|
84
|
-
|
|
166
|
+
compiled = @compiled_exclusive_controllers
|
|
167
|
+
return true if compiled.nil? || compiled.empty?
|
|
85
168
|
target = "#{controller_name}##{action_name}"
|
|
86
|
-
|
|
169
|
+
compiled.any? { |entry| match_compiled?(target, entry) }
|
|
87
170
|
end
|
|
88
171
|
|
|
89
172
|
def should_sample?
|
|
90
173
|
sample_rate = resolve_sample_rate
|
|
174
|
+
sample_rate = 100 if sample_rate.nil?
|
|
175
|
+
|
|
91
176
|
return true if sample_rate >= 100
|
|
92
177
|
return false if sample_rate <= 0
|
|
93
178
|
|
|
@@ -95,22 +180,9 @@ module DeadBro
|
|
|
95
180
|
rand(1..100) <= sample_rate
|
|
96
181
|
end
|
|
97
182
|
|
|
183
|
+
# Returns the configured sample_rate only (no ENV fallback). Use DeadBro.configure or remote settings.
|
|
98
184
|
def resolve_sample_rate
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
if ENV["dead_bro_SAMPLE_RATE"]
|
|
102
|
-
env_value = ENV["dead_bro_SAMPLE_RATE"].to_s.strip
|
|
103
|
-
# Validate that it's a valid integer string
|
|
104
|
-
if env_value.match?(/^\d+$/)
|
|
105
|
-
parsed = env_value.to_i
|
|
106
|
-
# Ensure it's in valid range (0-100)
|
|
107
|
-
(parsed >= 0 && parsed <= 100) ? parsed : 100
|
|
108
|
-
else
|
|
109
|
-
100 # Invalid format, fall back to default
|
|
110
|
-
end
|
|
111
|
-
else
|
|
112
|
-
100 # default
|
|
113
|
-
end
|
|
185
|
+
@sample_rate
|
|
114
186
|
end
|
|
115
187
|
|
|
116
188
|
def resolve_api_key
|
|
@@ -119,34 +191,36 @@ module DeadBro
|
|
|
119
191
|
ENV["DEAD_BRO_API_KEY"]
|
|
120
192
|
end
|
|
121
193
|
|
|
122
|
-
|
|
123
|
-
# Allow nil to use default/resolved value
|
|
124
|
-
return @sample_rate = nil if value.nil?
|
|
194
|
+
private
|
|
125
195
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
196
|
+
# Turn a list of user-facing patterns into {pattern, has_hash, regex}
|
|
197
|
+
# entries. Regex is nil when the pattern is a plain literal (cheaper eq
|
|
198
|
+
# compare). Compiling up-front removes per-request regex allocation.
|
|
199
|
+
def compile_patterns(patterns)
|
|
200
|
+
Array(patterns).map do |pat|
|
|
201
|
+
s = pat.to_s
|
|
202
|
+
has_hash = s.include?("#")
|
|
203
|
+
regex = if s.include?("*")
|
|
204
|
+
if has_hash
|
|
205
|
+
Regexp.new("\\A" + Regexp.escape(s).gsub("\\*", ".*") + "\\z")
|
|
206
|
+
else
|
|
207
|
+
Regexp.new("\\A" + Regexp.escape(s).gsub("\\*", "[^:]*") + "\\z")
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
{pattern: s, has_hash: has_hash, regex: regex}
|
|
129
211
|
end
|
|
130
|
-
|
|
212
|
+
rescue
|
|
213
|
+
[]
|
|
131
214
|
end
|
|
132
215
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
return !!(name.to_s == pat) unless pat.include?("*")
|
|
139
|
-
|
|
140
|
-
# For controller action patterns (containing '#'), use .* to match any characters including colons
|
|
141
|
-
# For controller-only patterns, use [^:]* to match namespace segments
|
|
142
|
-
regex = if pat.include?("#")
|
|
143
|
-
# Controller action pattern: allow * to match any characters including colons
|
|
144
|
-
Regexp.new("^" + Regexp.escape(pat).gsub("\\*", ".*") + "$")
|
|
216
|
+
def match_compiled?(name, entry)
|
|
217
|
+
return false if name.nil? || entry.nil?
|
|
218
|
+
n = name.to_s
|
|
219
|
+
if entry[:regex]
|
|
220
|
+
!!(n =~ entry[:regex])
|
|
145
221
|
else
|
|
146
|
-
|
|
147
|
-
Regexp.new("^" + Regexp.escape(pat).gsub("\\*", "[^:]*") + "$")
|
|
222
|
+
n == entry[:pattern]
|
|
148
223
|
end
|
|
149
|
-
!!(name.to_s =~ regex)
|
|
150
224
|
rescue
|
|
151
225
|
false
|
|
152
226
|
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "thread"
|
|
4
|
+
|
|
5
|
+
module DeadBro
|
|
6
|
+
# Background worker pool that runs HTTP posts for Client off the request
|
|
7
|
+
# thread. Replaces the previous `Thread.new` per metric. One shared pool per
|
|
8
|
+
# process; re-initializes after fork (Puma, Unicorn).
|
|
9
|
+
class Dispatcher
|
|
10
|
+
DEFAULT_QUEUE_SIZE = 500
|
|
11
|
+
DEFAULT_WORKERS = 2
|
|
12
|
+
SHUTDOWN = Object.new
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
def instance
|
|
16
|
+
@instance ||= new
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Exposed for tests.
|
|
20
|
+
def reset!
|
|
21
|
+
@instance&.shutdown
|
|
22
|
+
@instance = nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Test hook — when true, `dispatch` runs the block inline on the caller
|
|
26
|
+
# thread instead of handing it to a worker. Keeps specs deterministic
|
|
27
|
+
# without having to stub `Thread.new` or poll for queue drain.
|
|
28
|
+
attr_accessor :inline
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def initialize(queue_size: DEFAULT_QUEUE_SIZE, workers: DEFAULT_WORKERS)
|
|
32
|
+
@queue_size = queue_size
|
|
33
|
+
@worker_count = workers
|
|
34
|
+
@mutex = Mutex.new
|
|
35
|
+
@dropped = 0
|
|
36
|
+
@shutting_down = false
|
|
37
|
+
boot_workers(Process.pid)
|
|
38
|
+
install_at_exit_hook
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Schedule a block for background execution. Never blocks the caller: if the
|
|
42
|
+
# queue is full the job is dropped and `dropped_count` is incremented.
|
|
43
|
+
def dispatch(&block)
|
|
44
|
+
return false unless block_given?
|
|
45
|
+
return false if @shutting_down
|
|
46
|
+
|
|
47
|
+
if self.class.inline
|
|
48
|
+
begin
|
|
49
|
+
block.call
|
|
50
|
+
rescue
|
|
51
|
+
# Match worker semantics — swallow job errors.
|
|
52
|
+
end
|
|
53
|
+
return true
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
ensure_workers_alive!
|
|
57
|
+
@queue.push(block, true) # non-blocking
|
|
58
|
+
true
|
|
59
|
+
rescue ThreadError
|
|
60
|
+
@mutex.synchronize { @dropped += 1 }
|
|
61
|
+
false
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def dropped_count
|
|
65
|
+
@mutex.synchronize { @dropped }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def shutdown
|
|
69
|
+
return if @shutting_down
|
|
70
|
+
@shutting_down = true
|
|
71
|
+
workers = @workers || []
|
|
72
|
+
workers.length.times do
|
|
73
|
+
begin
|
|
74
|
+
@queue.push(SHUTDOWN)
|
|
75
|
+
rescue
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
workers.each do |t|
|
|
79
|
+
begin
|
|
80
|
+
t.join(2)
|
|
81
|
+
rescue
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
private
|
|
87
|
+
|
|
88
|
+
def boot_workers(pid)
|
|
89
|
+
@pid = pid
|
|
90
|
+
@queue = SizedQueue.new(@queue_size)
|
|
91
|
+
@workers = Array.new(@worker_count) do
|
|
92
|
+
t = Thread.new { run }
|
|
93
|
+
begin
|
|
94
|
+
t.name = "dead_bro-dispatcher"
|
|
95
|
+
rescue
|
|
96
|
+
end
|
|
97
|
+
t.abort_on_exception = false
|
|
98
|
+
t
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def ensure_workers_alive!
|
|
103
|
+
return if @pid == Process.pid && @workers && @workers.all?(&:alive?)
|
|
104
|
+
|
|
105
|
+
@mutex.synchronize do
|
|
106
|
+
return if @pid == Process.pid && @workers && @workers.all?(&:alive?)
|
|
107
|
+
# Post-fork (new PID) or a worker died — bring up a fresh pool.
|
|
108
|
+
boot_workers(Process.pid)
|
|
109
|
+
@shutting_down = false
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def install_at_exit_hook
|
|
114
|
+
at_exit { shutdown }
|
|
115
|
+
rescue
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def run
|
|
119
|
+
loop do
|
|
120
|
+
job = @queue.pop
|
|
121
|
+
break if job.equal?(SHUTDOWN)
|
|
122
|
+
begin
|
|
123
|
+
job.call
|
|
124
|
+
rescue
|
|
125
|
+
# Never let a job crash the worker.
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
@@ -19,7 +19,7 @@ module DeadBro
|
|
|
19
19
|
# Use the error class name as the event name
|
|
20
20
|
event_name = exception.class.name.to_s
|
|
21
21
|
event_name = EVENT_NAME if event_name.empty?
|
|
22
|
-
@client.post_metric(event_name: event_name, payload: payload)
|
|
22
|
+
@client.post_metric(event_name: event_name, payload: payload, force: true)
|
|
23
23
|
rescue
|
|
24
24
|
# Never let APM reporting interfere with the host app
|
|
25
25
|
end
|
|
@@ -17,15 +17,26 @@ module DeadBro
|
|
|
17
17
|
begin
|
|
18
18
|
job_class_name = data[:job].class.name
|
|
19
19
|
if DeadBro.configuration.excluded_job?(job_class_name)
|
|
20
|
+
drain_job_tracking
|
|
20
21
|
next
|
|
21
22
|
end
|
|
22
23
|
# If exclusive_jobs is defined and not empty, only track matching jobs
|
|
23
24
|
unless DeadBro.configuration.exclusive_job?(job_class_name)
|
|
25
|
+
drain_job_tracking
|
|
24
26
|
next
|
|
25
27
|
end
|
|
26
28
|
rescue
|
|
27
29
|
end
|
|
28
30
|
|
|
31
|
+
# Skip out via sampling before we build any payload — jobs can be chatty
|
|
32
|
+
# enough that even the "cheap" stop/analyze work matters under load.
|
|
33
|
+
# Completions have no exception attached; the exception subscriber below
|
|
34
|
+
# always sends errors with force: true.
|
|
35
|
+
unless DeadBro.configuration.should_sample?
|
|
36
|
+
drain_job_tracking
|
|
37
|
+
next
|
|
38
|
+
end
|
|
39
|
+
|
|
29
40
|
duration_ms = ((finished - started) * 1000.0).round(2)
|
|
30
41
|
|
|
31
42
|
# Ensure tracking was started (fallback if perform_start.active_job didn't fire)
|
|
@@ -34,6 +45,11 @@ module DeadBro
|
|
|
34
45
|
DeadBro.logger.clear
|
|
35
46
|
Thread.current[DeadBro::TRACKING_START_TIME_KEY] = Time.now
|
|
36
47
|
DeadBro::SqlSubscriber.start_request_tracking
|
|
48
|
+
if DeadBro.configuration.allocation_tracking_enabled && defined?(DeadBro::MemoryTrackingSubscriber)
|
|
49
|
+
DeadBro::MemoryTrackingSubscriber.start_request_tracking
|
|
50
|
+
else
|
|
51
|
+
DeadBro::LightweightMemoryTracker.start_request_tracking if defined?(DeadBro::LightweightMemoryTracker)
|
|
52
|
+
end
|
|
37
53
|
end
|
|
38
54
|
|
|
39
55
|
# Get SQL queries executed during this job
|
|
@@ -103,13 +119,18 @@ module DeadBro
|
|
|
103
119
|
|
|
104
120
|
duration_ms = ((finished - started) * 1000.0).round(2)
|
|
105
121
|
exception = data[:exception_object]
|
|
106
|
-
|
|
122
|
+
data[:job].class.name
|
|
107
123
|
|
|
108
124
|
# Ensure tracking was started (fallback if perform_start.active_job didn't fire)
|
|
109
125
|
unless DeadBro::SqlSubscriber.tracking_active?
|
|
110
126
|
DeadBro.logger.clear
|
|
111
127
|
Thread.current[DeadBro::TRACKING_START_TIME_KEY] = Time.now
|
|
112
128
|
DeadBro::SqlSubscriber.start_request_tracking
|
|
129
|
+
if DeadBro.configuration.allocation_tracking_enabled && defined?(DeadBro::MemoryTrackingSubscriber)
|
|
130
|
+
DeadBro::MemoryTrackingSubscriber.start_request_tracking
|
|
131
|
+
else
|
|
132
|
+
DeadBro::LightweightMemoryTracker.start_request_tracking if defined?(DeadBro::LightweightMemoryTracker)
|
|
133
|
+
end
|
|
113
134
|
end
|
|
114
135
|
|
|
115
136
|
# Get SQL queries executed during this job
|
|
@@ -164,12 +185,24 @@ module DeadBro
|
|
|
164
185
|
}
|
|
165
186
|
|
|
166
187
|
event_name = exception&.class&.name || "ActiveJob::Exception"
|
|
167
|
-
client.post_metric(event_name: event_name, payload: payload,
|
|
188
|
+
client.post_metric(event_name: event_name, payload: payload, force: true)
|
|
168
189
|
end
|
|
169
190
|
rescue
|
|
170
191
|
# Never raise from instrumentation install
|
|
171
192
|
end
|
|
172
193
|
|
|
194
|
+
# Release job-side thread-local tracking state when we've decided not to
|
|
195
|
+
# build a payload (excluded job / sampled out). Matches Subscriber.drain_request_tracking.
|
|
196
|
+
def self.drain_job_tracking
|
|
197
|
+
DeadBro::SqlSubscriber.stop_request_tracking if defined?(DeadBro::SqlSubscriber)
|
|
198
|
+
DeadBro::LightweightMemoryTracker.stop_request_tracking if defined?(DeadBro::LightweightMemoryTracker)
|
|
199
|
+
if DeadBro.configuration.allocation_tracking_enabled && defined?(DeadBro::MemoryTrackingSubscriber)
|
|
200
|
+
DeadBro::MemoryTrackingSubscriber.stop_request_tracking
|
|
201
|
+
end
|
|
202
|
+
rescue
|
|
203
|
+
# Best effort
|
|
204
|
+
end
|
|
205
|
+
|
|
173
206
|
private
|
|
174
207
|
|
|
175
208
|
def self.safe_arguments(arguments)
|
|
@@ -215,17 +248,7 @@ module DeadBro
|
|
|
215
248
|
end
|
|
216
249
|
|
|
217
250
|
def self.memory_usage_mb
|
|
218
|
-
|
|
219
|
-
# Get memory usage in MB
|
|
220
|
-
memory_kb = begin
|
|
221
|
-
`ps -o rss= -p #{Process.pid}`.to_i
|
|
222
|
-
rescue
|
|
223
|
-
0
|
|
224
|
-
end
|
|
225
|
-
(memory_kb / 1024.0).round(2)
|
|
226
|
-
else
|
|
227
|
-
0
|
|
228
|
-
end
|
|
251
|
+
DeadBro::MemoryHelpers.rss_mb
|
|
229
252
|
rescue
|
|
230
253
|
0
|
|
231
254
|
end
|
|
@@ -43,13 +43,11 @@ module DeadBro
|
|
|
43
43
|
end
|
|
44
44
|
|
|
45
45
|
def self.lightweight_memory_usage
|
|
46
|
-
#
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# Rough estimation: 4KB per page
|
|
52
|
-
(heap_pages * 4) / 1024.0 # Convert to MB
|
|
46
|
+
# Real RSS, cached for ~1s across threads so this is cheap even on hot
|
|
47
|
+
# paths. Previous versions multiplied heap_pages by 4KB and labelled the
|
|
48
|
+
# result as MB — both the unit and the page size were wrong (MRI heap
|
|
49
|
+
# pages are ~16KB and heap != RSS), so the number was effectively fiction.
|
|
50
|
+
DeadBro::MemoryHelpers.rss_mb
|
|
53
51
|
rescue
|
|
54
52
|
0
|
|
55
53
|
end
|
data/lib/dead_bro/logger.rb
CHANGED
|
@@ -18,8 +18,14 @@ module DeadBro
|
|
|
18
18
|
COLOR_ERROR = "\033[31m" # Red
|
|
19
19
|
COLOR_FATAL = "\033[35m" # Magenta
|
|
20
20
|
|
|
21
|
+
# Hard cap per-thread buffer size. Prevents unbounded growth when a
|
|
22
|
+
# request/job logs a lot, or when tracking never gets a chance to flush
|
|
23
|
+
# (e.g. code running outside a request lifecycle).
|
|
24
|
+
MAX_LOG_ENTRIES = 500
|
|
25
|
+
|
|
21
26
|
def initialize
|
|
22
27
|
@thread_logs_key = :dead_bro_logs
|
|
28
|
+
@thread_logs_dropped_key = :dead_bro_logs_dropped
|
|
23
29
|
end
|
|
24
30
|
|
|
25
31
|
def debug(message)
|
|
@@ -42,29 +48,42 @@ module DeadBro
|
|
|
42
48
|
log(:fatal, message)
|
|
43
49
|
end
|
|
44
50
|
|
|
45
|
-
# Get all logs for the current thread
|
|
51
|
+
# Get all logs for the current thread. If the buffer was capped, append a
|
|
52
|
+
# synthetic marker entry so downstream consumers know entries were dropped.
|
|
46
53
|
def logs
|
|
47
|
-
Thread.current[@thread_logs_key] || []
|
|
54
|
+
entries = Thread.current[@thread_logs_key] || []
|
|
55
|
+
dropped = Thread.current[@thread_logs_dropped_key] || 0
|
|
56
|
+
return entries if dropped.zero?
|
|
57
|
+
|
|
58
|
+
entries + [{
|
|
59
|
+
sev: "warn",
|
|
60
|
+
msg: "[DeadBro::Logger] #{dropped} log entries dropped (buffer cap #{MAX_LOG_ENTRIES})",
|
|
61
|
+
time: Time.now.utc.iso8601(3)
|
|
62
|
+
}]
|
|
48
63
|
end
|
|
49
64
|
|
|
50
65
|
# Clear logs for the current thread
|
|
51
66
|
def clear
|
|
52
67
|
Thread.current[@thread_logs_key] = []
|
|
68
|
+
Thread.current[@thread_logs_dropped_key] = 0
|
|
53
69
|
end
|
|
54
70
|
|
|
55
71
|
private
|
|
56
72
|
|
|
57
73
|
def log(severity, message)
|
|
58
74
|
timestamp = Time.now.utc
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
75
|
+
|
|
76
|
+
buffer = (Thread.current[@thread_logs_key] ||= [])
|
|
77
|
+
if buffer.length >= MAX_LOG_ENTRIES
|
|
78
|
+
Thread.current[@thread_logs_dropped_key] =
|
|
79
|
+
(Thread.current[@thread_logs_dropped_key] || 0) + 1
|
|
80
|
+
else
|
|
81
|
+
buffer << {
|
|
82
|
+
sev: severity.to_s,
|
|
83
|
+
msg: message.to_s,
|
|
84
|
+
time: timestamp.iso8601(3) # Include milliseconds for better precision
|
|
85
|
+
}
|
|
86
|
+
end
|
|
68
87
|
|
|
69
88
|
# Print the message immediately
|
|
70
89
|
print_log(severity, message, timestamp)
|