dead_bro 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +42 -43
- data/lib/dead_bro/circuit_breaker.rb +58 -38
- data/lib/dead_bro/client.rb +131 -130
- data/lib/dead_bro/configuration.rb +155 -81
- data/lib/dead_bro/dispatcher.rb +130 -0
- data/lib/dead_bro/error_middleware.rb +1 -1
- data/lib/dead_bro/job_subscriber.rb +36 -13
- data/lib/dead_bro/lightweight_memory_tracker.rb +5 -7
- data/lib/dead_bro/logger.rb +30 -11
- data/lib/dead_bro/memory_details.rb +71 -0
- data/lib/dead_bro/memory_helpers.rb +62 -0
- data/lib/dead_bro/memory_leak_detector.rb +178 -158
- data/lib/dead_bro/memory_tracking_subscriber.rb +12 -36
- data/lib/dead_bro/monitor.rb +18 -5
- data/lib/dead_bro/railtie.rb +6 -6
- data/lib/dead_bro/redis_subscriber.rb +2 -2
- data/lib/dead_bro/sql_subscriber.rb +104 -71
- data/lib/dead_bro/subscriber.rb +41 -17
- data/lib/dead_bro/version.rb +1 -1
- data/lib/dead_bro.rb +87 -96
- metadata +4 -2
|
@@ -25,8 +25,8 @@ module DeadBro
|
|
|
25
25
|
def self.install_redis_client!
|
|
26
26
|
# Only instrument if Redis::Client actually has the call method
|
|
27
27
|
# Check both public and private methods
|
|
28
|
-
has_call = ::Redis::Client.
|
|
29
|
-
::Redis::Client.
|
|
28
|
+
has_call = ::Redis::Client.method_defined?(:call, false) ||
|
|
29
|
+
::Redis::Client.private_method_defined?(:call, false)
|
|
30
30
|
return unless has_call
|
|
31
31
|
|
|
32
32
|
mod = Module.new do
|
|
@@ -16,6 +16,17 @@ module DeadBro
|
|
|
16
16
|
THREAD_LOCAL_EXPLAIN_PENDING_KEY = :dead_bro_explain_pending
|
|
17
17
|
MAX_TRACKED_QUERIES = 1000
|
|
18
18
|
|
|
19
|
+
# Precompiled regexes used by sanitize_sql. Dynamic /.../i literals inside
|
|
20
|
+
# a hot-path method allocate a fresh Regexp on every call — pinning them
|
|
21
|
+
# here removes that allocation entirely.
|
|
22
|
+
SENSITIVE_KV_QUOTED_RE = /\b(password|token|secret|key|ssn|credit_card)\s*=\s*['"][^'"]*['"]/i
|
|
23
|
+
SENSITIVE_KV_BARE_RE = /\b(password|token|secret|key|ssn|credit_card)\s*=\s*[^'",\s)]+/i
|
|
24
|
+
WHERE_EQ_QUOTED_RE = /WHERE\s+[^=]+=\s*['"][^'"]*['"]/i
|
|
25
|
+
WHERE_EQ_QUOTED_INNER_RE = /=\s*['"][^'"]*['"]/
|
|
26
|
+
SANITIZE_MAX_LENGTH = 1000
|
|
27
|
+
SANITIZE_SKIP_SENSITIVE_WHEN_NO_KEYWORDS = /password|token|secret|key|ssn|credit_card/i
|
|
28
|
+
SANITIZE_SKIP_WHERE_WHEN_NO_KEYWORD = /WHERE/i
|
|
29
|
+
|
|
19
30
|
# True when there is at least one active tracking context (e.g. for nested jobs).
|
|
20
31
|
def self.tracking_active?
|
|
21
32
|
stack = Thread.current[THREAD_LOCAL_KEY]
|
|
@@ -62,27 +73,33 @@ module DeadBro
|
|
|
62
73
|
next unless current
|
|
63
74
|
unique_id = _unique_id
|
|
64
75
|
allocations = nil
|
|
65
|
-
captured_backtrace = nil
|
|
66
76
|
begin
|
|
67
77
|
alloc_results = Thread.current[THREAD_LOCAL_ALLOC_RESULTS_KEY]
|
|
68
78
|
allocations = alloc_results && alloc_results.delete(unique_id)
|
|
69
|
-
|
|
70
|
-
# Get the captured backtrace from when the query started
|
|
71
|
-
backtrace_map = Thread.current[THREAD_LOCAL_BACKTRACE_KEY]
|
|
72
|
-
captured_backtrace = backtrace_map && backtrace_map.delete(unique_id)
|
|
73
79
|
rescue
|
|
74
80
|
end
|
|
75
81
|
|
|
76
82
|
duration_ms = ((finished - started) * 1000.0).round(2)
|
|
77
83
|
original_sql = data[:sql]
|
|
78
84
|
|
|
85
|
+
# Only capture a backtrace for queries we actually care about tracing
|
|
86
|
+
# (slow). This skips the ~O(stack-depth) allocation on the 99% of queries
|
|
87
|
+
# that are fast. An N+1 of 100 x 1ms queries no longer eats a thousand
|
|
88
|
+
# frame allocations for traces nobody will read.
|
|
89
|
+
threshold = begin
|
|
90
|
+
DeadBro.configuration.slow_query_threshold_ms
|
|
91
|
+
rescue
|
|
92
|
+
500
|
|
93
|
+
end
|
|
94
|
+
captured_trace = (duration_ms >= threshold.to_f) ? capture_app_backtrace : []
|
|
95
|
+
|
|
79
96
|
query_info = {
|
|
80
97
|
sql: sanitize_sql(original_sql),
|
|
81
98
|
name: data[:name],
|
|
82
99
|
duration_ms: duration_ms,
|
|
83
100
|
cached: data[:cached] || false,
|
|
84
101
|
connection_id: data[:connection_id],
|
|
85
|
-
trace:
|
|
102
|
+
trace: captured_trace,
|
|
86
103
|
allocations: allocations
|
|
87
104
|
}
|
|
88
105
|
|
|
@@ -115,10 +132,10 @@ module DeadBro
|
|
|
115
132
|
# Wait for any pending EXPLAIN ANALYZE queries to complete (with timeout)
|
|
116
133
|
# This must happen BEFORE we get the queries array reference to ensure
|
|
117
134
|
# all explain_plan fields are populated
|
|
118
|
-
wait_for_pending_explains(
|
|
135
|
+
wait_for_pending_explains(EXPLAIN_WAIT_TIMEOUT_SECONDS)
|
|
119
136
|
|
|
120
137
|
stack = Thread.current[THREAD_LOCAL_KEY]
|
|
121
|
-
queries = stack.is_a?(Array) && stack.any? ? stack.pop : []
|
|
138
|
+
queries = (stack.is_a?(Array) && stack.any?) ? stack.pop : []
|
|
122
139
|
# Clear thread locals when stack is empty so "tracking not started" behaves correctly
|
|
123
140
|
if stack.nil? || stack.empty?
|
|
124
141
|
Thread.current[THREAD_LOCAL_KEY] = nil
|
|
@@ -130,13 +147,21 @@ module DeadBro
|
|
|
130
147
|
queries
|
|
131
148
|
end
|
|
132
149
|
|
|
150
|
+
# Upper bound on pending EXPLAIN threads per request — stops a slow-query
|
|
151
|
+
# storm from spawning unbounded background threads.
|
|
152
|
+
MAX_PENDING_EXPLAINS = 20
|
|
153
|
+
# Overall wall-clock we're willing to block the request thread for pending
|
|
154
|
+
# EXPLAINs. Dropped from 5s → 1s: if the plan isn't ready by then, skip it
|
|
155
|
+
# rather than stall the request.
|
|
156
|
+
EXPLAIN_WAIT_TIMEOUT_SECONDS = 1.0
|
|
157
|
+
|
|
133
158
|
def self.wait_for_pending_explains(timeout_seconds)
|
|
134
159
|
pending = Thread.current[THREAD_LOCAL_EXPLAIN_PENDING_KEY]
|
|
135
160
|
return unless pending && !pending.empty?
|
|
136
161
|
|
|
137
|
-
start_time =
|
|
162
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
138
163
|
pending.each do |thread|
|
|
139
|
-
remaining_time = timeout_seconds - (
|
|
164
|
+
remaining_time = timeout_seconds - (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time)
|
|
140
165
|
break if remaining_time <= 0
|
|
141
166
|
|
|
142
167
|
begin
|
|
@@ -150,17 +175,26 @@ module DeadBro
|
|
|
150
175
|
def self.sanitize_sql(sql)
|
|
151
176
|
return sql unless sql.is_a?(String)
|
|
152
177
|
|
|
153
|
-
#
|
|
154
|
-
|
|
155
|
-
|
|
178
|
+
# Cap length first — most "expensive" queries from the app's perspective
|
|
179
|
+
# are big UPDATE/INSERT with long literal blobs; don't burn regex time on
|
|
180
|
+
# those when we're going to truncate anyway.
|
|
181
|
+
sql = sql[0..SANITIZE_MAX_LENGTH] + "..." if sql.length > SANITIZE_MAX_LENGTH
|
|
182
|
+
|
|
183
|
+
# Only scan for sensitive KV pairs if one of the keywords is actually
|
|
184
|
+
# present — saves two regex passes on the vast majority of queries.
|
|
185
|
+
if sql.match?(SANITIZE_SKIP_SENSITIVE_WHEN_NO_KEYWORDS)
|
|
186
|
+
sql = sql.gsub(SENSITIVE_KV_QUOTED_RE, '\1 = ?')
|
|
187
|
+
sql = sql.gsub(SENSITIVE_KV_BARE_RE, '\1 = ?')
|
|
188
|
+
end
|
|
156
189
|
|
|
157
|
-
#
|
|
158
|
-
|
|
159
|
-
|
|
190
|
+
# Same short-circuit for WHERE rewrite.
|
|
191
|
+
if sql.match?(SANITIZE_SKIP_WHERE_WHEN_NO_KEYWORD)
|
|
192
|
+
sql = sql.gsub(WHERE_EQ_QUOTED_RE) do |match|
|
|
193
|
+
match.gsub(WHERE_EQ_QUOTED_INNER_RE, "= ?")
|
|
194
|
+
end
|
|
160
195
|
end
|
|
161
196
|
|
|
162
|
-
|
|
163
|
-
(sql.length > 1000) ? sql[0..1000] + "..." : sql
|
|
197
|
+
sql
|
|
164
198
|
end
|
|
165
199
|
|
|
166
200
|
def self.should_explain_query?(duration_ms, sql)
|
|
@@ -185,64 +219,47 @@ module DeadBro
|
|
|
185
219
|
return unless defined?(ActiveRecord)
|
|
186
220
|
return unless ActiveRecord::Base.respond_to?(:connection)
|
|
187
221
|
|
|
222
|
+
# Cap pending EXPLAINs per request. A slow-query storm that would have
|
|
223
|
+
# spawned 200 threads and starved the AR pool now drops excess plans
|
|
224
|
+
# instead of cascading into a timeout.
|
|
225
|
+
pending = Thread.current[THREAD_LOCAL_EXPLAIN_PENDING_KEY] ||= []
|
|
226
|
+
if pending.length >= MAX_PENDING_EXPLAINS
|
|
227
|
+
query_info[:explain_plan] = nil
|
|
228
|
+
return
|
|
229
|
+
end
|
|
230
|
+
|
|
188
231
|
# Capture the main thread reference to append logs to the correct thread
|
|
189
232
|
main_thread = Thread.current
|
|
190
233
|
|
|
191
|
-
# Run EXPLAIN in a background thread to avoid blocking the main request
|
|
234
|
+
# Run EXPLAIN in a background thread to avoid blocking the main request.
|
|
235
|
+
# We use `with_connection` so the connection returns to the pool even if
|
|
236
|
+
# the thread is killed or the block raises — the previous manual
|
|
237
|
+
# checkout/checkin could leak a connection under pathological paths.
|
|
192
238
|
explain_thread = Thread.new do
|
|
193
|
-
connection = nil
|
|
194
239
|
begin
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
else
|
|
199
|
-
ActiveRecord::Base.connection
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
# Interpolate binds if present to ensure EXPLAIN works with placeholders
|
|
203
|
-
final_sql = interpolate_sql_with_binds(sql, binds, connection)
|
|
204
|
-
|
|
205
|
-
# Build EXPLAIN query based on database adapter
|
|
206
|
-
explain_sql = build_explain_query(final_sql, connection)
|
|
240
|
+
ActiveRecord::Base.connection_pool.with_connection do |connection|
|
|
241
|
+
final_sql = interpolate_sql_with_binds(sql, binds, connection)
|
|
242
|
+
explain_sql = build_explain_query(final_sql, connection)
|
|
207
243
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
connection.select_all(explain_sql)
|
|
215
|
-
else
|
|
216
|
-
# Other databases: use execute
|
|
217
|
-
connection.execute(explain_sql)
|
|
218
|
-
end
|
|
219
|
-
|
|
220
|
-
# Format the result based on database adapter
|
|
221
|
-
explain_plan = format_explain_result(result, connection)
|
|
244
|
+
adapter_name = connection.adapter_name.downcase
|
|
245
|
+
result = if adapter_name == "postgresql" || adapter_name == "postgis"
|
|
246
|
+
connection.select_all(explain_sql)
|
|
247
|
+
else
|
|
248
|
+
connection.execute(explain_sql)
|
|
249
|
+
end
|
|
222
250
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
251
|
+
explain_plan = format_explain_result(result, connection)
|
|
252
|
+
query_info[:explain_plan] = if explain_plan && !explain_plan.to_s.strip.empty?
|
|
253
|
+
explain_plan
|
|
254
|
+
end
|
|
227
255
|
end
|
|
228
256
|
rescue => e
|
|
229
|
-
# Silently fail
|
|
257
|
+
# Silently fail — don't let EXPLAIN break the application.
|
|
230
258
|
append_log_to_thread(main_thread, :debug, "Failed to capture EXPLAIN ANALYZE: #{e.message}")
|
|
231
259
|
query_info[:explain_plan] = nil
|
|
232
|
-
ensure
|
|
233
|
-
# Return connection to pool if we checked it out
|
|
234
|
-
if connection && ActiveRecord::Base.connection_pool.respond_to?(:checkin)
|
|
235
|
-
begin
|
|
236
|
-
ActiveRecord::Base.connection_pool.checkin(connection)
|
|
237
|
-
rescue
|
|
238
|
-
nil
|
|
239
|
-
end
|
|
240
|
-
end
|
|
241
260
|
end
|
|
242
261
|
end
|
|
243
262
|
|
|
244
|
-
# Track the thread so we can wait for it when stopping request tracking
|
|
245
|
-
pending = Thread.current[THREAD_LOCAL_EXPLAIN_PENDING_KEY] ||= []
|
|
246
263
|
pending << explain_thread
|
|
247
264
|
rescue => e
|
|
248
265
|
# Use DeadBro.logger here since we're still in the main thread
|
|
@@ -419,6 +436,27 @@ module DeadBro
|
|
|
419
436
|
result.to_s
|
|
420
437
|
end
|
|
421
438
|
|
|
439
|
+
APP_BACKTRACE_MAX_FRAMES = 25
|
|
440
|
+
APP_BACKTRACE_SENSITIVE_RE = /\/[^\/]*(password|secret|key|token)[^\/]*\//i
|
|
441
|
+
|
|
442
|
+
# Cheap app-only backtrace for the current query. Uses caller_locations
|
|
443
|
+
# (lazy frame objects, no string allocations until we render) and keeps
|
|
444
|
+
# only frames under app/ (filtering vendor/). Returns at most N frames.
|
|
445
|
+
def self.capture_app_backtrace
|
|
446
|
+
locations = caller_locations(1, 100) || []
|
|
447
|
+
frames = []
|
|
448
|
+
locations.each do |loc|
|
|
449
|
+
path = loc.path.to_s
|
|
450
|
+
next unless path.include?("app/")
|
|
451
|
+
next if path.include?("/vendor/")
|
|
452
|
+
frames << "#{path}:#{loc.lineno}:in `#{loc.label}'".gsub(APP_BACKTRACE_SENSITIVE_RE, "/[FILTERED]/")
|
|
453
|
+
break if frames.length >= APP_BACKTRACE_MAX_FRAMES
|
|
454
|
+
end
|
|
455
|
+
frames
|
|
456
|
+
rescue
|
|
457
|
+
[]
|
|
458
|
+
end
|
|
459
|
+
|
|
422
460
|
def self.safe_query_trace(data, captured_backtrace = nil)
|
|
423
461
|
return [] unless data.is_a?(Hash)
|
|
424
462
|
|
|
@@ -520,15 +558,10 @@ module DeadBro
|
|
|
520
558
|
def start(name, id, payload)
|
|
521
559
|
map = (Thread.current[DeadBro::SqlSubscriber::THREAD_LOCAL_ALLOC_START_KEY] ||= {})
|
|
522
560
|
map[id] = GC.stat[:total_allocated_objects] if defined?(GC) && GC.respond_to?(:stat)
|
|
523
|
-
|
|
524
|
-
#
|
|
525
|
-
#
|
|
526
|
-
|
|
527
|
-
captured_backtrace = Thread.current.backtrace
|
|
528
|
-
if captured_backtrace && captured_backtrace.is_a?(Array)
|
|
529
|
-
# Skip the first few frames (our listener code) to get to the actual query execution
|
|
530
|
-
backtrace_map[id] = captured_backtrace[5..-1] || captured_backtrace
|
|
531
|
-
end
|
|
561
|
+
# Backtraces used to be captured here for every SQL event, which was
|
|
562
|
+
# dominating CPU on N+1-heavy requests (100s of full Thread#backtrace
|
|
563
|
+
# allocations). The main subscriber now captures a trimmed backtrace
|
|
564
|
+
# lazily — and only when a query exceeds slow_query_threshold_ms.
|
|
532
565
|
rescue
|
|
533
566
|
end
|
|
534
567
|
|
data/lib/dead_bro/subscriber.rb
CHANGED
|
@@ -8,21 +8,38 @@ module DeadBro
|
|
|
8
8
|
|
|
9
9
|
def self.subscribe!(client: Client.new)
|
|
10
10
|
ActiveSupport::Notifications.subscribe(EVENT_NAME) do |name, started, finished, _unique_id, data|
|
|
11
|
+
# When disabled remotely, fire a heartbeat at most once per minute so the gem
|
|
12
|
+
# can detect when tracking has been re-enabled, then skip all tracking.
|
|
13
|
+
unless DeadBro.configuration.enabled
|
|
14
|
+
client.post_heartbeat if DeadBro.configuration.heartbeat_due?
|
|
15
|
+
drain_request_tracking
|
|
16
|
+
next
|
|
17
|
+
end
|
|
18
|
+
|
|
11
19
|
# Skip excluded controllers or controller#action pairs
|
|
12
20
|
# Also check exclusive_controller_actions - if defined, only track those
|
|
21
|
+
notification = data.is_a?(Hash) ? data : {}
|
|
22
|
+
controller_name = notification[:controller].to_s
|
|
23
|
+
action_name = notification[:action].to_s
|
|
13
24
|
begin
|
|
14
|
-
controller_name = data[:controller].to_s
|
|
15
|
-
action_name = data[:action].to_s
|
|
16
25
|
if DeadBro.configuration.excluded_controller?(controller_name, action_name)
|
|
17
|
-
|
|
26
|
+
drain_request_tracking
|
|
18
27
|
next
|
|
19
28
|
end
|
|
20
|
-
# If exclusive_controller_actions is defined and not empty, only track matching actions
|
|
21
29
|
unless DeadBro.configuration.exclusive_controller?(controller_name, action_name)
|
|
22
|
-
|
|
30
|
+
drain_request_tracking
|
|
23
31
|
next
|
|
24
32
|
end
|
|
25
33
|
rescue
|
|
34
|
+
drain_request_tracking
|
|
35
|
+
next
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
has_error = data[:exception] || data[:exception_object]
|
|
39
|
+
# Errors always ship regardless of sampling (this is what the docs promise).
|
|
40
|
+
unless has_error || DeadBro.configuration.should_sample?
|
|
41
|
+
drain_request_tracking
|
|
42
|
+
next
|
|
26
43
|
end
|
|
27
44
|
|
|
28
45
|
duration_ms = ((finished - started) * 1000.0).round(2)
|
|
@@ -106,7 +123,7 @@ module DeadBro
|
|
|
106
123
|
}
|
|
107
124
|
|
|
108
125
|
event_name = (exception_class || exception_obj&.class&.name || "exception").to_s
|
|
109
|
-
client.post_metric(event_name: event_name, payload: error_payload)
|
|
126
|
+
client.post_metric(event_name: event_name, payload: error_payload, force: true)
|
|
110
127
|
rescue
|
|
111
128
|
ensure
|
|
112
129
|
next
|
|
@@ -147,6 +164,23 @@ module DeadBro
|
|
|
147
164
|
end
|
|
148
165
|
end
|
|
149
166
|
|
|
167
|
+
# Release per-subscriber thread-local state when we've decided not to build
|
|
168
|
+
# a payload (disabled / excluded / sampled out). Without this, a subsequent
|
|
169
|
+
# request reusing the same Puma thread would see stale queries/events.
|
|
170
|
+
def self.drain_request_tracking
|
|
171
|
+
DeadBro::SqlSubscriber.stop_request_tracking if defined?(DeadBro::SqlSubscriber)
|
|
172
|
+
DeadBro::CacheSubscriber.stop_request_tracking if defined?(DeadBro::CacheSubscriber)
|
|
173
|
+
DeadBro::RedisSubscriber.stop_request_tracking if defined?(DeadBro::RedisSubscriber)
|
|
174
|
+
DeadBro::ViewRenderingSubscriber.stop_request_tracking if defined?(DeadBro::ViewRenderingSubscriber)
|
|
175
|
+
DeadBro::LightweightMemoryTracker.stop_request_tracking if defined?(DeadBro::LightweightMemoryTracker)
|
|
176
|
+
if DeadBro.configuration.allocation_tracking_enabled && defined?(DeadBro::MemoryTrackingSubscriber)
|
|
177
|
+
DeadBro::MemoryTrackingSubscriber.stop_request_tracking
|
|
178
|
+
end
|
|
179
|
+
Thread.current[:dead_bro_http_events] = nil
|
|
180
|
+
rescue
|
|
181
|
+
# Best effort — draining must never raise from the notifications callback.
|
|
182
|
+
end
|
|
183
|
+
|
|
150
184
|
def self.safe_path(data)
|
|
151
185
|
path = data[:path] || (data[:request] && data[:request].path)
|
|
152
186
|
path.to_s
|
|
@@ -259,17 +293,7 @@ module DeadBro
|
|
|
259
293
|
end
|
|
260
294
|
|
|
261
295
|
def self.memory_usage_mb
|
|
262
|
-
|
|
263
|
-
# Get memory usage in MB
|
|
264
|
-
memory_kb = begin
|
|
265
|
-
`ps -o rss= -p #{Process.pid}`.to_i
|
|
266
|
-
rescue
|
|
267
|
-
0
|
|
268
|
-
end
|
|
269
|
-
(memory_kb / 1024.0).round(2)
|
|
270
|
-
else
|
|
271
|
-
0
|
|
272
|
-
end
|
|
296
|
+
DeadBro::MemoryHelpers.rss_mb
|
|
273
297
|
rescue
|
|
274
298
|
0
|
|
275
299
|
end
|
data/lib/dead_bro/version.rb
CHANGED
data/lib/dead_bro.rb
CHANGED
|
@@ -5,6 +5,7 @@ require_relative "dead_bro/version"
|
|
|
5
5
|
module DeadBro
|
|
6
6
|
autoload :Configuration, "dead_bro/configuration"
|
|
7
7
|
autoload :Client, "dead_bro/client"
|
|
8
|
+
autoload :Dispatcher, "dead_bro/dispatcher"
|
|
8
9
|
autoload :CircuitBreaker, "dead_bro/circuit_breaker"
|
|
9
10
|
autoload :Collectors, "dead_bro/collectors"
|
|
10
11
|
autoload :Subscriber, "dead_bro/subscriber"
|
|
@@ -20,6 +21,7 @@ module DeadBro
|
|
|
20
21
|
autoload :JobSubscriber, "dead_bro/job_subscriber"
|
|
21
22
|
autoload :JobSqlTrackingMiddleware, "dead_bro/job_sql_tracking_middleware"
|
|
22
23
|
autoload :Monitor, "dead_bro/monitor"
|
|
24
|
+
autoload :MemoryDetails, "dead_bro/memory_details"
|
|
23
25
|
autoload :Logger, "dead_bro/logger"
|
|
24
26
|
begin
|
|
25
27
|
require "dead_bro/railtie"
|
|
@@ -110,33 +112,39 @@ module DeadBro
|
|
|
110
112
|
# - :memory_after_mb
|
|
111
113
|
# - :memory_delta_mb
|
|
112
114
|
# - :memory_details (detailed GC/allocation stats when available)
|
|
113
|
-
def self.analyze(label = nil)
|
|
115
|
+
def self.analyze(label = nil, verbose: false)
|
|
114
116
|
raise ArgumentError, "DeadBro.analyze requires a block" unless block_given?
|
|
115
117
|
|
|
116
118
|
label ||= "block"
|
|
117
119
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
120
|
+
# Lower Rails log level to DEBUG and enable ActiveRecord verbose_query_logs
|
|
121
|
+
# so Rails' own SQL logging (including ↳ caller frames) is visible.
|
|
122
|
+
original_log_level = nil
|
|
123
|
+
original_verbose_query_logs = nil
|
|
124
|
+
if verbose
|
|
125
|
+
begin
|
|
126
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger.respond_to?(:level)
|
|
127
|
+
original_log_level = Rails.logger.level
|
|
128
|
+
Rails.logger.level = 0 # Logger::DEBUG
|
|
129
|
+
end
|
|
130
|
+
rescue
|
|
126
131
|
end
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
else
|
|
134
|
-
memory_before_mb = 0.0
|
|
132
|
+
begin
|
|
133
|
+
if defined?(ActiveRecord) && ActiveRecord.respond_to?(:verbose_query_logs)
|
|
134
|
+
original_verbose_query_logs = ActiveRecord.verbose_query_logs
|
|
135
|
+
ActiveRecord.verbose_query_logs = true
|
|
136
|
+
end
|
|
137
|
+
rescue
|
|
135
138
|
end
|
|
136
|
-
rescue
|
|
137
|
-
memory_before_mb = 0.0
|
|
138
139
|
end
|
|
139
140
|
|
|
141
|
+
# Capture baseline memory stats — config-independent, analyze is debug-only.
|
|
142
|
+
gc_before = begin; GC.stat; rescue; {}; end
|
|
143
|
+
memory_before_mb = begin; DeadBro::MemoryHelpers.rss_mb; rescue; 0.0; end
|
|
144
|
+
object_counts_before = begin
|
|
145
|
+
defined?(ObjectSpace) && ObjectSpace.respond_to?(:count_objects) ? ObjectSpace.count_objects.dup : {}
|
|
146
|
+
rescue; {}; end
|
|
147
|
+
|
|
140
148
|
# Local SQL tracking just for this block.
|
|
141
149
|
# We subscribe directly to ActiveSupport::Notifications instead of relying
|
|
142
150
|
# on DeadBro's global SqlSubscriber tracking so we don't interfere with or
|
|
@@ -148,7 +156,6 @@ module DeadBro
|
|
|
148
156
|
begin
|
|
149
157
|
if defined?(ActiveSupport) && defined?(ActiveSupport::Notifications)
|
|
150
158
|
# Ensure SqlSubscriber is loaded so SQL_EVENT_NAME is defined
|
|
151
|
-
DeadBro::SqlSubscriber
|
|
152
159
|
event_name = DeadBro::SqlSubscriber::SQL_EVENT_NAME
|
|
153
160
|
|
|
154
161
|
sql_notification_subscription =
|
|
@@ -183,11 +190,7 @@ module DeadBro
|
|
|
183
190
|
"SQL"
|
|
184
191
|
end
|
|
185
192
|
|
|
186
|
-
local_sql_queries << {
|
|
187
|
-
duration_ms: duration_ms,
|
|
188
|
-
sql: normalized_sql,
|
|
189
|
-
query_type: query_type
|
|
190
|
-
}
|
|
193
|
+
local_sql_queries << {duration_ms: duration_ms, sql: normalized_sql, query_type: query_type}
|
|
191
194
|
end
|
|
192
195
|
end
|
|
193
196
|
rescue
|
|
@@ -197,14 +200,26 @@ module DeadBro
|
|
|
197
200
|
block_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
198
201
|
|
|
199
202
|
error = nil
|
|
200
|
-
result = nil
|
|
201
|
-
analysis_result = nil
|
|
202
203
|
|
|
203
204
|
begin
|
|
204
|
-
|
|
205
|
+
yield
|
|
205
206
|
rescue => e
|
|
206
207
|
error = e
|
|
207
208
|
ensure
|
|
209
|
+
# Restore Rails log level before any output
|
|
210
|
+
begin
|
|
211
|
+
if verbose && original_log_level
|
|
212
|
+
Rails.logger.level = original_log_level
|
|
213
|
+
end
|
|
214
|
+
rescue
|
|
215
|
+
end
|
|
216
|
+
begin
|
|
217
|
+
if verbose && !original_verbose_query_logs.nil?
|
|
218
|
+
ActiveRecord.verbose_query_logs = original_verbose_query_logs
|
|
219
|
+
end
|
|
220
|
+
rescue
|
|
221
|
+
end
|
|
222
|
+
|
|
208
223
|
# Always unsubscribe our local SQL subscriber
|
|
209
224
|
begin
|
|
210
225
|
if sql_notification_subscription && defined?(ActiveSupport) && defined?(ActiveSupport::Notifications)
|
|
@@ -225,7 +240,7 @@ module DeadBro
|
|
|
225
240
|
sql_time_ms = local_sql_queries.sum { |q| (q[:duration_ms] || 0.0).to_f }.round(2)
|
|
226
241
|
|
|
227
242
|
# Group SQL queries by normalized pattern to show frequency and cost
|
|
228
|
-
query_signatures = Hash.new { |h, k| h[k] = {
|
|
243
|
+
query_signatures = Hash.new { |h, k| h[k] = {count: 0, total_time_ms: 0.0, type: nil} }
|
|
229
244
|
local_sql_queries.each do |q|
|
|
230
245
|
sig = (q[:sql] || "UNKNOWN").to_s
|
|
231
246
|
entry = query_signatures[sig]
|
|
@@ -236,55 +251,41 @@ module DeadBro
|
|
|
236
251
|
|
|
237
252
|
top_query_signatures = query_signatures.sort_by { |_, data| -data[:count] }.first(3)
|
|
238
253
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
254
|
+
# Capture post-block memory state — always, regardless of config.
|
|
255
|
+
gc_after = begin; GC.stat; rescue; {}; end
|
|
256
|
+
memory_after_mb = begin; DeadBro::MemoryHelpers.rss_mb; rescue; memory_before_mb; end
|
|
257
|
+
object_counts_after = begin
|
|
258
|
+
defined?(ObjectSpace) && ObjectSpace.respond_to?(:count_objects) ? ObjectSpace.count_objects.dup : {}
|
|
259
|
+
rescue; {}; end
|
|
242
260
|
|
|
243
|
-
|
|
244
|
-
if memory_tracking_started
|
|
245
|
-
begin
|
|
246
|
-
raw_events = DeadBro::MemoryTrackingSubscriber.stop_request_tracking || {}
|
|
247
|
-
rescue
|
|
248
|
-
raw_events = {}
|
|
249
|
-
end
|
|
250
|
-
end
|
|
251
|
-
|
|
252
|
-
begin
|
|
253
|
-
# Prefer values from detailed tracking when available
|
|
254
|
-
if raw_events[:memory_before]
|
|
255
|
-
memory_before_mb = raw_events[:memory_before]
|
|
256
|
-
end
|
|
261
|
+
memory_delta_mb = (memory_after_mb - memory_before_mb).round(2)
|
|
257
262
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
263
|
+
# Large object scan — full ObjectSpace walk. analyze is debug-only, not hot path.
|
|
264
|
+
large_objects = begin
|
|
265
|
+
if defined?(ObjectSpace) && ObjectSpace.respond_to?(:each_object) && ObjectSpace.respond_to?(:memsize_of)
|
|
266
|
+
found = []
|
|
267
|
+
ObjectSpace.each_object do |obj|
|
|
268
|
+
size = begin; ObjectSpace.memsize_of(obj); rescue; 0; end
|
|
269
|
+
next unless size > 1_000_000
|
|
270
|
+
klass = begin; obj.class.name || "Unknown"; rescue; "Unknown"; end
|
|
271
|
+
found << {class_name: klass, size_mb: (size / 1_000_000.0).round(2)}
|
|
272
|
+
break if found.length >= 50
|
|
265
273
|
end
|
|
274
|
+
found.sort_by { |h| -h[:size_mb] }
|
|
275
|
+
else
|
|
276
|
+
[]
|
|
266
277
|
end
|
|
267
|
-
rescue
|
|
268
|
-
memory_after_mb = memory_before_mb
|
|
269
|
-
end
|
|
270
|
-
|
|
271
|
-
memory_delta_mb = (memory_after_mb - memory_before_mb).round(2)
|
|
278
|
+
rescue; []; end
|
|
272
279
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
top_allocating_classes: (perf[:top_allocating_classes] || []).first(3)
|
|
283
|
-
}
|
|
284
|
-
rescue
|
|
285
|
-
detailed_memory_summary = nil
|
|
286
|
-
end
|
|
287
|
-
end
|
|
280
|
+
detailed_memory_summary = DeadBro::MemoryDetails.build(
|
|
281
|
+
gc_before: gc_before,
|
|
282
|
+
gc_after: gc_after,
|
|
283
|
+
memory_before_mb: memory_before_mb,
|
|
284
|
+
memory_after_mb: memory_after_mb,
|
|
285
|
+
object_counts_before: object_counts_before,
|
|
286
|
+
object_counts_after: object_counts_after,
|
|
287
|
+
large_objects: large_objects
|
|
288
|
+
)
|
|
288
289
|
|
|
289
290
|
sql_queries_segment = ""
|
|
290
291
|
unless top_query_signatures.empty?
|
|
@@ -297,28 +298,17 @@ module DeadBro
|
|
|
297
298
|
sql_queries_segment = ", sql_top_queries=[#{formatted_queries.join(" | ")}]"
|
|
298
299
|
end
|
|
299
300
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
}.join(", ")
|
|
312
|
-
|
|
313
|
-
"#{base_summary}, " \
|
|
314
|
-
"memory_growth=#{detailed_memory_summary[:memory_growth_mb].round(2)}MB, " \
|
|
315
|
-
"gc_runs=+#{detailed_memory_summary[:gc_count_increase]}, " \
|
|
316
|
-
"heap_pages=+#{detailed_memory_summary[:heap_pages_increase]}, " \
|
|
317
|
-
"allocated=#{detailed_memory_summary[:total_allocated_size_mb].round(2)}MB, " \
|
|
318
|
-
"top_allocators=[#{top_classes}]"
|
|
319
|
-
else
|
|
320
|
-
base_summary
|
|
321
|
-
end
|
|
301
|
+
warnings = detailed_memory_summary[:warnings]
|
|
302
|
+
warnings_segment = warnings.any? ? ", warnings=[#{warnings.join(", ")}]" : ""
|
|
303
|
+
summary = "Analysis for #{label} - total_time=#{total_time_ms}ms, " \
|
|
304
|
+
"sql_queries=#{sql_count}, sql_time=#{sql_time_ms}ms, " \
|
|
305
|
+
"memory_before=#{memory_before_mb.round(2)}MB, " \
|
|
306
|
+
"memory_after=#{memory_after_mb.round(2)}MB, " \
|
|
307
|
+
"memory_delta=#{memory_delta_mb}MB, " \
|
|
308
|
+
"gc_collections=+#{detailed_memory_summary[:gc_collections]}, " \
|
|
309
|
+
"heap_pages_added=+#{detailed_memory_summary[:heap_pages_added]}, " \
|
|
310
|
+
"new_objects=+#{detailed_memory_summary[:new_objects]}" \
|
|
311
|
+
"#{sql_queries_segment}#{warnings_segment}"
|
|
322
312
|
|
|
323
313
|
begin
|
|
324
314
|
DeadBro.logger.info(summary)
|
|
@@ -348,7 +338,8 @@ module DeadBro
|
|
|
348
338
|
memory_before_mb: memory_before_mb,
|
|
349
339
|
memory_after_mb: memory_after_mb,
|
|
350
340
|
memory_delta_mb: memory_delta_mb,
|
|
351
|
-
memory_details: detailed_memory_summary
|
|
341
|
+
memory_details: detailed_memory_summary,
|
|
342
|
+
verbose: verbose
|
|
352
343
|
}
|
|
353
344
|
end
|
|
354
345
|
|