dead_bro 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dead_bro/client.rb +25 -0
- data/lib/dead_bro/configuration.rb +3 -1
- data/lib/dead_bro/job_subscriber.rb +3 -3
- data/lib/dead_bro/lightweight_memory_tracker.rb +12 -6
- data/lib/dead_bro/memory_tracking_subscriber.rb +68 -12
- data/lib/dead_bro/sql_subscriber.rb +36 -20
- data/lib/dead_bro/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9a78b3a99d00159acb1bf2fd97a0eed54ba0e03a24d7bbd2ec840e4bf779107f
|
|
4
|
+
data.tar.gz: 53aba3dcf00e53f210020529413561812de25b3399db8ce040b9976ab798e8d1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c150a7a452b46c4a600afbccb4a520a2f52b0c0bb3a85fec8f0a41f0ece56ea5161575a30a15002c18a8fa2b82e5692ccf13e49920ea3fffc031f7d23188027b
|
|
7
|
+
data.tar.gz: 22b3ea56482f38005ccbd98eae13be751fade782a69f355573ccd49fb2524e3f9bd2c9e40128df5bbb0c2e521924d7f8cba2c49c41a88ad67efafb6041c2f50d
|
data/lib/dead_bro/client.rb
CHANGED
|
@@ -31,6 +31,9 @@ module DeadBro
|
|
|
31
31
|
end
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
+
# Truncate large arrays to avoid 413 Request Entity Too Large
|
|
35
|
+
payload = truncate_payload_for_request(payload)
|
|
36
|
+
|
|
34
37
|
# Make the HTTP request (async)
|
|
35
38
|
make_http_request(event_name, payload, @configuration.api_key)
|
|
36
39
|
|
|
@@ -62,6 +65,28 @@ module DeadBro
|
|
|
62
65
|
|
|
63
66
|
private
|
|
64
67
|
|
|
68
|
+
# Limit payload size to avoid 413 from nginx/reverse proxies. Returns a new hash.
|
|
69
|
+
def truncate_payload_for_request(payload)
|
|
70
|
+
return payload unless payload.is_a?(Hash)
|
|
71
|
+
|
|
72
|
+
max_sql = @configuration.respond_to?(:max_sql_queries_to_send) ? @configuration.max_sql_queries_to_send : 500
|
|
73
|
+
max_logs = @configuration.respond_to?(:max_logs_to_send) ? @configuration.max_logs_to_send : 100
|
|
74
|
+
|
|
75
|
+
out = payload.dup
|
|
76
|
+
|
|
77
|
+
if out.key?(:sql_queries) && out[:sql_queries].is_a?(Array) && out[:sql_queries].size > max_sql
|
|
78
|
+
out[:sql_queries_total_count] = out[:sql_queries].size
|
|
79
|
+
out[:sql_queries] = out[:sql_queries].first(max_sql)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
if out.key?(:logs) && out[:logs].is_a?(Array) && out[:logs].size > max_logs
|
|
83
|
+
out[:logs_total_count] = out[:logs].size
|
|
84
|
+
out[:logs] = out[:logs].first(max_logs)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
out
|
|
88
|
+
end
|
|
89
|
+
|
|
65
90
|
def create_circuit_breaker
|
|
66
91
|
return nil unless @configuration.circuit_breaker_enabled
|
|
67
92
|
|
|
@@ -7,7 +7,7 @@ module DeadBro
|
|
|
7
7
|
:circuit_breaker_retry_timeout, :sample_rate, :excluded_controllers, :excluded_jobs,
|
|
8
8
|
:exclusive_controllers, :exclusive_jobs, :deploy_id, :slow_query_threshold_ms, :explain_analyze_enabled,
|
|
9
9
|
:job_queue_monitoring_enabled, :enable_db_stats, :enable_process_stats, :enable_system_stats,
|
|
10
|
-
:disk_paths, :interfaces_ignore
|
|
10
|
+
:disk_paths, :interfaces_ignore, :max_sql_queries_to_send, :max_logs_to_send
|
|
11
11
|
|
|
12
12
|
def initialize
|
|
13
13
|
@api_key = nil
|
|
@@ -36,6 +36,8 @@ module DeadBro
|
|
|
36
36
|
@enable_system_stats = false
|
|
37
37
|
@disk_paths = ["/"]
|
|
38
38
|
@interfaces_ignore = %w[lo lo0 docker0]
|
|
39
|
+
@max_sql_queries_to_send = 500 # Cap to avoid 413 Request Entity Too Large
|
|
40
|
+
@max_logs_to_send = 100
|
|
39
41
|
end
|
|
40
42
|
|
|
41
43
|
def resolve_deploy_id
|
|
@@ -30,7 +30,7 @@ module DeadBro
|
|
|
30
30
|
|
|
31
31
|
# Ensure tracking was started (fallback if perform_start.active_job didn't fire)
|
|
32
32
|
# This handles job backends that don't emit perform_start events
|
|
33
|
-
unless
|
|
33
|
+
unless DeadBro::SqlSubscriber.tracking_active?
|
|
34
34
|
DeadBro.logger.clear
|
|
35
35
|
Thread.current[DeadBro::TRACKING_START_TIME_KEY] = Time.now
|
|
36
36
|
DeadBro::SqlSubscriber.start_request_tracking
|
|
@@ -103,10 +103,10 @@ module DeadBro
|
|
|
103
103
|
|
|
104
104
|
duration_ms = ((finished - started) * 1000.0).round(2)
|
|
105
105
|
exception = data[:exception_object]
|
|
106
|
+
job_class = data[:job].class.name
|
|
106
107
|
|
|
107
108
|
# Ensure tracking was started (fallback if perform_start.active_job didn't fire)
|
|
108
|
-
|
|
109
|
-
unless Thread.current[DeadBro::SqlSubscriber::THREAD_LOCAL_KEY]
|
|
109
|
+
unless DeadBro::SqlSubscriber.tracking_active?
|
|
110
110
|
DeadBro.logger.clear
|
|
111
111
|
Thread.current[DeadBro::TRACKING_START_TIME_KEY] = Time.now
|
|
112
112
|
DeadBro::SqlSubscriber.start_request_tracking
|
|
@@ -8,19 +8,25 @@ module DeadBro
|
|
|
8
8
|
def self.start_request_tracking
|
|
9
9
|
return unless DeadBro.configuration.memory_tracking_enabled
|
|
10
10
|
|
|
11
|
-
#
|
|
12
|
-
|
|
11
|
+
# Stack allows nested job tracking (e.g. one job performing others in the same thread)
|
|
12
|
+
mem_before = lightweight_memory_usage
|
|
13
|
+
frame = {
|
|
13
14
|
gc_before: lightweight_gc_stats,
|
|
14
|
-
memory_before:
|
|
15
|
+
memory_before: mem_before,
|
|
15
16
|
start_time: Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
16
17
|
}
|
|
18
|
+
(Thread.current[THREAD_LOCAL_KEY] ||= []) << frame
|
|
17
19
|
end
|
|
18
20
|
|
|
19
21
|
def self.stop_request_tracking
|
|
20
|
-
|
|
21
|
-
|
|
22
|
+
stack = Thread.current[THREAD_LOCAL_KEY]
|
|
23
|
+
unless stack.is_a?(Array) && stack.any?
|
|
24
|
+
Thread.current[THREAD_LOCAL_KEY] = nil
|
|
25
|
+
return {}
|
|
26
|
+
end
|
|
22
27
|
|
|
23
|
-
|
|
28
|
+
events = stack.pop
|
|
29
|
+
Thread.current[THREAD_LOCAL_KEY] = nil if stack.empty?
|
|
24
30
|
|
|
25
31
|
# Calculate only essential metrics
|
|
26
32
|
gc_after = lightweight_gc_stats
|
|
@@ -6,6 +6,7 @@ module DeadBro
|
|
|
6
6
|
class MemoryTrackingSubscriber
|
|
7
7
|
# Object allocation events
|
|
8
8
|
ALLOCATION_EVENT = "object_allocations.active_support"
|
|
9
|
+
PROCESS_ACTION_EVENT = "process_action.action_controller"
|
|
9
10
|
|
|
10
11
|
THREAD_LOCAL_KEY = :dead_bro_memory_events
|
|
11
12
|
# Consider objects larger than this many bytes as "large"
|
|
@@ -28,6 +29,23 @@ module DeadBro
|
|
|
28
29
|
next unless rand < ALLOCATION_SAMPLING_RATE
|
|
29
30
|
track_allocation(data, started, finished)
|
|
30
31
|
end
|
|
32
|
+
|
|
33
|
+
# Subscribe to process_action to capture request-level allocation counters
|
|
34
|
+
ActiveSupport::Notifications.subscribe(PROCESS_ACTION_EVENT) do |*args|
|
|
35
|
+
event = if args.length == 1 && args.first.is_a?(ActiveSupport::Notifications::Event)
|
|
36
|
+
args.first
|
|
37
|
+
else
|
|
38
|
+
ActiveSupport::Notifications::Event.new(*args)
|
|
39
|
+
end
|
|
40
|
+
allocations = event.respond_to?(:allocations) ? event.allocations : event.payload[:allocations]
|
|
41
|
+
allocated_bytes = event.respond_to?(:allocated_bytes) ? event.allocated_bytes : event.payload[:allocated_bytes]
|
|
42
|
+
next unless allocations || allocated_bytes
|
|
43
|
+
|
|
44
|
+
record_request_allocations(
|
|
45
|
+
allocations: allocations,
|
|
46
|
+
allocated_bytes: allocated_bytes
|
|
47
|
+
)
|
|
48
|
+
end
|
|
31
49
|
rescue
|
|
32
50
|
# Allocation tracking might not be available in all Ruby versions
|
|
33
51
|
end
|
|
@@ -36,29 +54,39 @@ module DeadBro
|
|
|
36
54
|
# Never raise from instrumentation install
|
|
37
55
|
end
|
|
38
56
|
|
|
57
|
+
# Current frame (top of stack) for nested job tracking; nil if none.
|
|
58
|
+
def self.current_events
|
|
59
|
+
stack = Thread.current[THREAD_LOCAL_KEY]
|
|
60
|
+
return nil unless stack.is_a?(Array) && stack.any?
|
|
61
|
+
stack.last
|
|
62
|
+
end
|
|
63
|
+
|
|
39
64
|
def self.start_request_tracking
|
|
40
65
|
# Only track if memory tracking is enabled
|
|
41
66
|
return unless DeadBro.configuration.memory_tracking_enabled
|
|
42
67
|
|
|
43
|
-
|
|
68
|
+
frame = {
|
|
44
69
|
allocations: [],
|
|
45
70
|
memory_snapshots: [],
|
|
46
71
|
large_objects: [],
|
|
72
|
+
request_allocations: nil,
|
|
47
73
|
gc_before: gc_stats,
|
|
48
74
|
memory_before: memory_usage_mb,
|
|
49
|
-
start_time: Time.now.
|
|
75
|
+
start_time: Time.now.to_f,
|
|
50
76
|
object_counts_before: count_objects_snapshot
|
|
51
77
|
}
|
|
78
|
+
(Thread.current[THREAD_LOCAL_KEY] ||= []) << frame
|
|
52
79
|
end
|
|
53
80
|
|
|
54
81
|
def self.stop_request_tracking
|
|
55
|
-
|
|
56
|
-
|
|
82
|
+
stack = Thread.current[THREAD_LOCAL_KEY]
|
|
83
|
+
events = stack.is_a?(Array) && stack.any? ? stack.pop : nil
|
|
84
|
+
Thread.current[THREAD_LOCAL_KEY] = nil if stack.nil? || stack.empty?
|
|
57
85
|
|
|
58
86
|
if events
|
|
59
87
|
events[:gc_after] = gc_stats
|
|
60
88
|
events[:memory_after] = memory_usage_mb
|
|
61
|
-
events[:end_time] = Time.now.
|
|
89
|
+
events[:end_time] = Time.now.to_f
|
|
62
90
|
events[:duration_seconds] = events[:end_time] - events[:start_time]
|
|
63
91
|
events[:object_counts_after] = count_objects_snapshot
|
|
64
92
|
|
|
@@ -71,14 +99,26 @@ module DeadBro
|
|
|
71
99
|
events || {}
|
|
72
100
|
end
|
|
73
101
|
|
|
102
|
+
# Record request-level allocation counters from Rails instrumentation.
|
|
103
|
+
def self.record_request_allocations(allocations:, allocated_bytes:)
|
|
104
|
+
events = current_events
|
|
105
|
+
return unless events
|
|
106
|
+
|
|
107
|
+
events[:request_allocations] = {
|
|
108
|
+
allocations: allocations,
|
|
109
|
+
allocated_bytes: allocated_bytes
|
|
110
|
+
}
|
|
111
|
+
end
|
|
112
|
+
|
|
74
113
|
def self.track_allocation(data, started, finished)
|
|
75
|
-
|
|
114
|
+
events = current_events
|
|
115
|
+
return unless events
|
|
76
116
|
|
|
77
117
|
# Only track if we have meaningful allocation data
|
|
78
118
|
return unless data.is_a?(Hash) && data[:count] && data[:size]
|
|
79
119
|
|
|
80
120
|
# Limit allocations per request to prevent memory bloat
|
|
81
|
-
allocations =
|
|
121
|
+
allocations = events[:allocations]
|
|
82
122
|
return if allocations.length >= MAX_ALLOCATIONS_PER_REQUEST
|
|
83
123
|
|
|
84
124
|
# Simplified allocation tracking (avoid expensive operations)
|
|
@@ -95,14 +135,15 @@ module DeadBro
|
|
|
95
135
|
large_object: true,
|
|
96
136
|
size_mb: (data[:size] / 1_000_000.0).round(2)
|
|
97
137
|
)
|
|
98
|
-
|
|
138
|
+
events[:large_objects] << large_object
|
|
99
139
|
end
|
|
100
140
|
|
|
101
|
-
|
|
141
|
+
events[:allocations] << allocation
|
|
102
142
|
end
|
|
103
143
|
|
|
104
144
|
def self.take_memory_snapshot(label = nil)
|
|
105
|
-
|
|
145
|
+
events = current_events
|
|
146
|
+
return unless events
|
|
106
147
|
|
|
107
148
|
snapshot = {
|
|
108
149
|
label: label || "snapshot_#{Time.now.to_i}",
|
|
@@ -113,7 +154,7 @@ module DeadBro
|
|
|
113
154
|
heap_pages: heap_pages
|
|
114
155
|
}
|
|
115
156
|
|
|
116
|
-
|
|
157
|
+
events[:memory_snapshots] << snapshot
|
|
117
158
|
end
|
|
118
159
|
|
|
119
160
|
def self.analyze_memory_performance(memory_events)
|
|
@@ -122,6 +163,7 @@ module DeadBro
|
|
|
122
163
|
allocations = memory_events[:allocations] || []
|
|
123
164
|
large_objects = memory_events[:large_objects] || []
|
|
124
165
|
snapshots = memory_events[:memory_snapshots] || []
|
|
166
|
+
request_allocations = memory_events[:request_allocations]
|
|
125
167
|
|
|
126
168
|
# Calculate memory growth
|
|
127
169
|
memory_growth = 0
|
|
@@ -132,6 +174,19 @@ module DeadBro
|
|
|
132
174
|
# Calculate allocation totals
|
|
133
175
|
total_allocations = allocations.sum { |a| a[:count] }
|
|
134
176
|
total_allocated_size = allocations.sum { |a| a[:size] }
|
|
177
|
+
if request_allocations
|
|
178
|
+
total_allocated_size = request_allocations[:allocated_bytes].to_i if total_allocated_size.zero?
|
|
179
|
+
end
|
|
180
|
+
gc_allocations = nil
|
|
181
|
+
if memory_events[:gc_before] && memory_events[:gc_after]
|
|
182
|
+
gc_allocations = (memory_events[:gc_after][:total_allocated_objects] || 0) -
|
|
183
|
+
(memory_events[:gc_before][:total_allocated_objects] || 0)
|
|
184
|
+
end
|
|
185
|
+
if gc_allocations.to_i > 0
|
|
186
|
+
total_allocations = gc_allocations
|
|
187
|
+
elsif total_allocations.zero? && request_allocations
|
|
188
|
+
total_allocations = request_allocations[:allocations].to_i
|
|
189
|
+
end
|
|
135
190
|
|
|
136
191
|
# Group allocations by class
|
|
137
192
|
allocations_by_class = allocations.group_by { |a| a[:class_name] }
|
|
@@ -174,7 +229,8 @@ module DeadBro
|
|
|
174
229
|
(total_allocations.to_f / memory_events[:duration_seconds]).round(2) : 0,
|
|
175
230
|
top_allocating_classes: top_allocating_classes.map { |class_name, data|
|
|
176
231
|
{
|
|
177
|
-
|
|
232
|
+
class: class_name,
|
|
233
|
+
name: class_name,
|
|
178
234
|
count: data[:count],
|
|
179
235
|
size: data[:size],
|
|
180
236
|
size_mb: (data[:size] / 1_000_000.0).round(2)
|
|
@@ -16,13 +16,25 @@ module DeadBro
|
|
|
16
16
|
THREAD_LOCAL_EXPLAIN_PENDING_KEY = :dead_bro_explain_pending
|
|
17
17
|
MAX_TRACKED_QUERIES = 1000
|
|
18
18
|
|
|
19
|
+
# True when there is at least one active tracking context (e.g. for nested jobs).
|
|
20
|
+
def self.tracking_active?
|
|
21
|
+
stack = Thread.current[THREAD_LOCAL_KEY]
|
|
22
|
+
stack.is_a?(Array) && stack.any?
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Current queries array (top of stack); nil if no active tracking.
|
|
26
|
+
def self.current_queries_array
|
|
27
|
+
stack = Thread.current[THREAD_LOCAL_KEY]
|
|
28
|
+
return nil unless stack.is_a?(Array) && stack.any?
|
|
29
|
+
stack.last
|
|
30
|
+
end
|
|
31
|
+
|
|
19
32
|
# Check if we should continue tracking based on count and time limits
|
|
20
|
-
def self.should_continue_tracking?(
|
|
21
|
-
|
|
22
|
-
return false unless events
|
|
33
|
+
def self.should_continue_tracking?(current_queries_array, max_count)
|
|
34
|
+
return false unless current_queries_array.is_a?(Array)
|
|
23
35
|
|
|
24
36
|
# Check count limit
|
|
25
|
-
return false if
|
|
37
|
+
return false if current_queries_array.length >= max_count
|
|
26
38
|
|
|
27
39
|
# Check time limit
|
|
28
40
|
start_time = Thread.current[DeadBro::TRACKING_START_TIME_KEY]
|
|
@@ -44,9 +56,10 @@ module DeadBro
|
|
|
44
56
|
end
|
|
45
57
|
|
|
46
58
|
ActiveSupport::Notifications.subscribe(SQL_EVENT_NAME) do |name, started, finished, _unique_id, data|
|
|
47
|
-
next if data[:name] == "SCHEMA"
|
|
48
|
-
# Only track queries that are part of the current request
|
|
49
|
-
|
|
59
|
+
next if data[:name] == "SCHEMA" || data[:name] == "CACHE" || data[:name] == "BEGIN" || data[:name] == "COMMIT" || data[:name] == "ROLLBACK" || data[:name] == "SAVEPOINT" || data[:name] == "RELEASE"
|
|
60
|
+
# Only track queries that are part of the current request (top of stack for nested jobs)
|
|
61
|
+
current = current_queries_array
|
|
62
|
+
next unless current
|
|
50
63
|
unique_id = _unique_id
|
|
51
64
|
allocations = nil
|
|
52
65
|
captured_backtrace = nil
|
|
@@ -82,15 +95,16 @@ module DeadBro
|
|
|
82
95
|
start_explain_analyze_background(original_sql, data[:connection_id], query_info, binds)
|
|
83
96
|
end
|
|
84
97
|
|
|
85
|
-
# Add to
|
|
86
|
-
if should_continue_tracking?(
|
|
87
|
-
|
|
98
|
+
# Add to current context (top of stack), but only if we haven't exceeded the limits
|
|
99
|
+
if should_continue_tracking?(current, MAX_TRACKED_QUERIES)
|
|
100
|
+
current << query_info
|
|
88
101
|
end
|
|
89
102
|
end
|
|
90
103
|
end
|
|
91
104
|
|
|
92
105
|
def self.start_request_tracking
|
|
93
|
-
|
|
106
|
+
# Stack allows nested job tracking (e.g. one job performing others in the same thread)
|
|
107
|
+
(Thread.current[THREAD_LOCAL_KEY] ||= []) << []
|
|
94
108
|
Thread.current[THREAD_LOCAL_ALLOC_START_KEY] = {}
|
|
95
109
|
Thread.current[THREAD_LOCAL_ALLOC_RESULTS_KEY] = {}
|
|
96
110
|
Thread.current[THREAD_LOCAL_BACKTRACE_KEY] = {}
|
|
@@ -103,15 +117,17 @@ module DeadBro
|
|
|
103
117
|
# all explain_plan fields are populated
|
|
104
118
|
wait_for_pending_explains(5.0) # 5 second timeout
|
|
105
119
|
|
|
106
|
-
|
|
107
|
-
queries =
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
120
|
+
stack = Thread.current[THREAD_LOCAL_KEY]
|
|
121
|
+
queries = stack.is_a?(Array) && stack.any? ? stack.pop : []
|
|
122
|
+
# Clear thread locals when stack is empty so "tracking not started" behaves correctly
|
|
123
|
+
if stack.nil? || stack.empty?
|
|
124
|
+
Thread.current[THREAD_LOCAL_KEY] = nil
|
|
125
|
+
Thread.current[THREAD_LOCAL_ALLOC_START_KEY] = nil
|
|
126
|
+
Thread.current[THREAD_LOCAL_ALLOC_RESULTS_KEY] = nil
|
|
127
|
+
Thread.current[THREAD_LOCAL_BACKTRACE_KEY] = nil
|
|
128
|
+
Thread.current[THREAD_LOCAL_EXPLAIN_PENDING_KEY] = nil
|
|
129
|
+
end
|
|
130
|
+
queries
|
|
115
131
|
end
|
|
116
132
|
|
|
117
133
|
def self.wait_for_pending_explains(timeout_seconds)
|
data/lib/dead_bro/version.rb
CHANGED