dead_bro 0.2.8 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +105 -43
- data/lib/dead_bro/circuit_breaker.rb +58 -38
- data/lib/dead_bro/client.rb +112 -143
- data/lib/dead_bro/configuration.rb +76 -40
- data/lib/dead_bro/dispatcher.rb +130 -0
- data/lib/dead_bro/elasticsearch_subscriber.rb +141 -0
- data/lib/dead_bro/error_middleware.rb +1 -1
- data/lib/dead_bro/http_instrumentation.rb +108 -15
- data/lib/dead_bro/job_subscriber.rb +35 -12
- data/lib/dead_bro/lightweight_memory_tracker.rb +5 -7
- data/lib/dead_bro/logger.rb +30 -11
- data/lib/dead_bro/memory_details.rb +71 -0
- data/lib/dead_bro/memory_helpers.rb +62 -0
- data/lib/dead_bro/memory_leak_detector.rb +178 -158
- data/lib/dead_bro/memory_tracking_subscriber.rb +7 -31
- data/lib/dead_bro/monitor.rb +18 -5
- data/lib/dead_bro/railtie.rb +10 -6
- data/lib/dead_bro/sql_subscriber.rb +102 -70
- data/lib/dead_bro/sql_tracking_middleware.rb +7 -1
- data/lib/dead_bro/subscriber.rb +40 -15
- data/lib/dead_bro/version.rb +1 -1
- data/lib/dead_bro.rb +129 -113
- metadata +4 -1
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DeadBro
|
|
4
|
+
class ElasticsearchSubscriber
|
|
5
|
+
THREAD_LOCAL_KEY = :dead_bro_elasticsearch_events
|
|
6
|
+
MAX_TRACKED_EVENTS = 500
|
|
7
|
+
|
|
8
|
+
# Install gem-based notification subscriber (request.elasticsearch / request.elastic_transport).
|
|
9
|
+
# The Net::HTTP path is handled by HttpInstrumentation, which calls .record directly.
|
|
10
|
+
def self.subscribe!
|
|
11
|
+
install_notifications_subscription!
|
|
12
|
+
rescue
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Called by HttpInstrumentation when it detects a Net::HTTP request to an ES host.
|
|
16
|
+
def self.record(method:, path:, status:, duration_ms:)
|
|
17
|
+
events = Thread.current[THREAD_LOCAL_KEY]
|
|
18
|
+
return unless events
|
|
19
|
+
return unless should_continue_tracking?
|
|
20
|
+
|
|
21
|
+
events << build_event(method, path, status, duration_ms)
|
|
22
|
+
rescue
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.start_request_tracking
|
|
26
|
+
Thread.current[THREAD_LOCAL_KEY] = []
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def self.stop_request_tracking
|
|
30
|
+
events = Thread.current[THREAD_LOCAL_KEY]
|
|
31
|
+
Thread.current[THREAD_LOCAL_KEY] = nil
|
|
32
|
+
events || []
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.should_continue_tracking?
|
|
36
|
+
events = Thread.current[THREAD_LOCAL_KEY]
|
|
37
|
+
return false unless events
|
|
38
|
+
return false if events.length >= MAX_TRACKED_EVENTS
|
|
39
|
+
|
|
40
|
+
start_time = Thread.current[DeadBro::TRACKING_START_TIME_KEY]
|
|
41
|
+
if start_time
|
|
42
|
+
elapsed_seconds = Time.now - start_time
|
|
43
|
+
return false if elapsed_seconds >= DeadBro::MAX_TRACKING_DURATION_SECONDS
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
true
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def self.extract_operation(method, path)
|
|
50
|
+
return "unknown" if path.nil?
|
|
51
|
+
|
|
52
|
+
clean = path.to_s.split("?").first.to_s
|
|
53
|
+
m = method.to_s.upcase
|
|
54
|
+
|
|
55
|
+
if clean =~ /_search\z/i
|
|
56
|
+
"search"
|
|
57
|
+
elsif clean =~ /_msearch\z/i
|
|
58
|
+
"msearch"
|
|
59
|
+
elsif clean =~ /_bulk\z/i
|
|
60
|
+
"bulk"
|
|
61
|
+
elsif clean =~ /_doc\/[^\/]+\/_update\z/i
|
|
62
|
+
"update"
|
|
63
|
+
elsif clean =~ /_update\/[^\/]+\z/i
|
|
64
|
+
"update"
|
|
65
|
+
elsif clean =~ /_delete_by_query\z/i
|
|
66
|
+
"delete_by_query"
|
|
67
|
+
elsif clean =~ /_count\z/i
|
|
68
|
+
"count"
|
|
69
|
+
elsif clean =~ /_mapping\z/i
|
|
70
|
+
m == "GET" ? "get_mapping" : "put_mapping"
|
|
71
|
+
elsif clean =~ /_doc\/[^\/]+\z/i
|
|
72
|
+
case m
|
|
73
|
+
when "GET" then "get"
|
|
74
|
+
when "DELETE" then "delete"
|
|
75
|
+
when "POST", "PUT" then "index"
|
|
76
|
+
else "doc"
|
|
77
|
+
end
|
|
78
|
+
elsif clean =~ /_doc\z/i
|
|
79
|
+
"index"
|
|
80
|
+
elsif clean =~ /\A\/_cluster\//i
|
|
81
|
+
"cluster"
|
|
82
|
+
elsif clean =~ /\A\/_cat\//i
|
|
83
|
+
"cat"
|
|
84
|
+
elsif clean =~ /\A\/[^\/]+\z/
|
|
85
|
+
case m
|
|
86
|
+
when "PUT" then "create_index"
|
|
87
|
+
when "DELETE" then "delete_index"
|
|
88
|
+
when "HEAD" then "index_exists"
|
|
89
|
+
when "GET" then "get_index"
|
|
90
|
+
else "index_op"
|
|
91
|
+
end
|
|
92
|
+
else
|
|
93
|
+
m.downcase
|
|
94
|
+
end
|
|
95
|
+
rescue
|
|
96
|
+
"unknown"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def self.sanitize_path(path)
|
|
100
|
+
return "" if path.nil?
|
|
101
|
+
path.to_s
|
|
102
|
+
.gsub(/\/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i, "/{id}")
|
|
103
|
+
.gsub(/\/\d+(?=\/|\z)/, "/{id}")
|
|
104
|
+
rescue
|
|
105
|
+
path.to_s
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
class << self
|
|
109
|
+
private
|
|
110
|
+
|
|
111
|
+
def install_notifications_subscription!
|
|
112
|
+
return unless defined?(::ActiveSupport::Notifications)
|
|
113
|
+
|
|
114
|
+
%w[request.elasticsearch request.elastic_transport].each do |event_name|
|
|
115
|
+
::ActiveSupport::Notifications.subscribe(event_name) do |_name, started, finished, _id, payload|
|
|
116
|
+
events = Thread.current[THREAD_LOCAL_KEY]
|
|
117
|
+
next unless events
|
|
118
|
+
next unless should_continue_tracking?
|
|
119
|
+
|
|
120
|
+
duration_ms = ((finished - started) * 1000.0).round(2)
|
|
121
|
+
method = payload[:method].to_s.upcase
|
|
122
|
+
path = payload[:path].to_s
|
|
123
|
+
events << build_event(method, path, payload[:status], duration_ms)
|
|
124
|
+
rescue
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
rescue
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def build_event(method, path, status, duration_ms)
|
|
131
|
+
{
|
|
132
|
+
method: method.to_s.upcase,
|
|
133
|
+
path: sanitize_path(path),
|
|
134
|
+
operation: extract_operation(method, path),
|
|
135
|
+
status: status,
|
|
136
|
+
duration_ms: duration_ms
|
|
137
|
+
}
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
@@ -19,7 +19,7 @@ module DeadBro
|
|
|
19
19
|
# Use the error class name as the event name
|
|
20
20
|
event_name = exception.class.name.to_s
|
|
21
21
|
event_name = EVENT_NAME if event_name.empty?
|
|
22
|
-
@client.post_metric(event_name: event_name, payload: payload)
|
|
22
|
+
@client.post_metric(event_name: event_name, payload: payload, force: true)
|
|
23
23
|
rescue
|
|
24
24
|
# Never let APM reporting interfere with the host app
|
|
25
25
|
end
|
|
@@ -12,6 +12,7 @@ module DeadBro
|
|
|
12
12
|
def self.install!(client: Client.new)
|
|
13
13
|
install_net_http!(client)
|
|
14
14
|
install_typhoeus!(client) if defined?(::Typhoeus)
|
|
15
|
+
install_faraday!(client) if defined?(::Faraday)
|
|
15
16
|
rescue
|
|
16
17
|
# Never raise from instrumentation install
|
|
17
18
|
end
|
|
@@ -38,23 +39,37 @@ module DeadBro
|
|
|
38
39
|
nil
|
|
39
40
|
end
|
|
40
41
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
42
|
+
host = (uri && uri.host) || @address
|
|
43
|
+
port = (uri && uri.port) || @port
|
|
44
|
+
is_es_host = DeadBro::HttpInstrumentation.elasticsearch_host?(host, port)
|
|
45
|
+
# Skip localhost/internal only for non-ES hosts. ES on localhost (e.g. port 9200)
|
|
46
|
+
# must still be tracked; only skip the deadbro backend itself.
|
|
47
|
+
skip_instrumentation = !is_es_host && uri && (uri.to_s.include?("localhost") || uri.to_s.include?("aberatii.com"))
|
|
44
48
|
|
|
45
|
-
|
|
49
|
+
if is_es_host
|
|
50
|
+
# Route to elasticsearch subscriber instead of http_outgoing
|
|
51
|
+
if Thread.current[DeadBro::ElasticsearchSubscriber::THREAD_LOCAL_KEY]
|
|
52
|
+
path = (uri && uri.path) || req.path
|
|
53
|
+
DeadBro::ElasticsearchSubscriber.record(
|
|
54
|
+
method: req.method,
|
|
55
|
+
path: path,
|
|
56
|
+
status: response && response.code.to_i,
|
|
57
|
+
duration_ms: duration_ms
|
|
58
|
+
)
|
|
59
|
+
end
|
|
60
|
+
elsif !skip_instrumentation
|
|
61
|
+
lib = DeadBro::HttpInstrumentation.typesense_host?(host, port) ? "typesense" : "net_http"
|
|
46
62
|
payload = {
|
|
47
|
-
library:
|
|
63
|
+
library: lib,
|
|
48
64
|
method: req.method,
|
|
49
65
|
url: uri && uri.to_s,
|
|
50
|
-
host:
|
|
66
|
+
host: host,
|
|
51
67
|
path: (uri && uri.path) || req.path,
|
|
52
68
|
status: response && response.code.to_i,
|
|
53
69
|
duration_ms: duration_ms,
|
|
54
70
|
exception: error && error.class.name
|
|
55
71
|
}
|
|
56
|
-
|
|
57
|
-
if Thread.current[THREAD_LOCAL_KEY] && should_continue_tracking?
|
|
72
|
+
if Thread.current[THREAD_LOCAL_KEY] && DeadBro::HttpInstrumentation.should_continue_tracking?
|
|
58
73
|
Thread.current[THREAD_LOCAL_KEY] << payload
|
|
59
74
|
end
|
|
60
75
|
end
|
|
@@ -85,8 +100,6 @@ module DeadBro
|
|
|
85
100
|
(respond_to?(:base_url) ? base_url : nil)
|
|
86
101
|
end
|
|
87
102
|
|
|
88
|
-
# Skip instrumentation for our own APM endpoint to prevent infinite loops,
|
|
89
|
-
# but do NOT alter the original method's return value/control flow.
|
|
90
103
|
skip_instrumentation = req_url && (req_url.include?("localhost:3100/apm/v1/metrics") || req_url.include?("deadbro.aberatii.com/apm/v1/metrics"))
|
|
91
104
|
|
|
92
105
|
unless skip_instrumentation
|
|
@@ -97,8 +110,7 @@ module DeadBro
|
|
|
97
110
|
status: response && response.code,
|
|
98
111
|
duration_ms: duration_ms
|
|
99
112
|
}
|
|
100
|
-
|
|
101
|
-
if Thread.current[THREAD_LOCAL_KEY] && should_continue_tracking?
|
|
113
|
+
if Thread.current[THREAD_LOCAL_KEY] && DeadBro::HttpInstrumentation.should_continue_tracking?
|
|
102
114
|
Thread.current[THREAD_LOCAL_KEY] << payload
|
|
103
115
|
end
|
|
104
116
|
end
|
|
@@ -112,15 +124,96 @@ module DeadBro
|
|
|
112
124
|
rescue
|
|
113
125
|
end
|
|
114
126
|
|
|
115
|
-
|
|
127
|
+
def self.install_faraday!(client)
|
|
128
|
+
return unless defined?(::Faraday)
|
|
129
|
+
|
|
130
|
+
unless defined?(::DeadBro::FaradayMiddleware)
|
|
131
|
+
middleware_klass = Class.new(::Faraday::Middleware) do
|
|
132
|
+
def call(env)
|
|
133
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
134
|
+
response = nil
|
|
135
|
+
begin
|
|
136
|
+
response = @app.call(env)
|
|
137
|
+
ensure
|
|
138
|
+
finish_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
139
|
+
duration_ms = ((finish_time - start_time) * 1000.0).round(2)
|
|
140
|
+
begin
|
|
141
|
+
url = env.url
|
|
142
|
+
host = url.host.to_s
|
|
143
|
+
port = url.port
|
|
144
|
+
url_str = url.to_s
|
|
145
|
+
|
|
146
|
+
is_es_host = DeadBro::HttpInstrumentation.elasticsearch_host?(host, port)
|
|
147
|
+
skip = !is_es_host && (url_str.include?("localhost") || url_str.include?("aberatii.com"))
|
|
148
|
+
|
|
149
|
+
if is_es_host
|
|
150
|
+
if Thread.current[DeadBro::ElasticsearchSubscriber::THREAD_LOCAL_KEY]
|
|
151
|
+
DeadBro::ElasticsearchSubscriber.record(
|
|
152
|
+
method: env.method.to_s.upcase,
|
|
153
|
+
path: url.path,
|
|
154
|
+
status: response&.status,
|
|
155
|
+
duration_ms: duration_ms
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
elsif !skip
|
|
159
|
+
lib = DeadBro::HttpInstrumentation.typesense_host?(host, port) ? "typesense" : "faraday"
|
|
160
|
+
payload = {
|
|
161
|
+
library: lib,
|
|
162
|
+
method: env.method.to_s.upcase,
|
|
163
|
+
url: url_str,
|
|
164
|
+
host: host,
|
|
165
|
+
path: url.path,
|
|
166
|
+
status: response&.status,
|
|
167
|
+
duration_ms: duration_ms
|
|
168
|
+
}
|
|
169
|
+
key = DeadBro::HttpInstrumentation::THREAD_LOCAL_KEY
|
|
170
|
+
if Thread.current[key] && DeadBro::HttpInstrumentation.should_continue_tracking?
|
|
171
|
+
Thread.current[key] << payload
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
rescue
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
::DeadBro.const_set(:FaradayMiddleware, middleware_klass)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
unless defined?(::DeadBro::FaradayInstrumentation)
|
|
183
|
+
instrumentation_mod = Module.new do
|
|
184
|
+
define_method(:initialize) do |url = nil, options = {}, &block|
|
|
185
|
+
super(url, options, &block)
|
|
186
|
+
unless builder.handlers.map(&:klass).include?(::DeadBro::FaradayMiddleware)
|
|
187
|
+
builder.use(::DeadBro::FaradayMiddleware)
|
|
188
|
+
end
|
|
189
|
+
rescue
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
::DeadBro.const_set(:FaradayInstrumentation, instrumentation_mod)
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
::Faraday::Connection.prepend(::DeadBro::FaradayInstrumentation) unless ::Faraday::Connection.ancestors.include?(::DeadBro::FaradayInstrumentation)
|
|
196
|
+
rescue
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def self.elasticsearch_host?(host, port)
|
|
200
|
+
return false if host.nil?
|
|
201
|
+
return true if port == 9200
|
|
202
|
+
h = host.to_s
|
|
203
|
+
h.end_with?(".elastic.co") || h.end_with?(".es.amazonaws.com") || h.include?("elasticsearch")
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def self.typesense_host?(host, port)
|
|
207
|
+
return false if host.nil?
|
|
208
|
+
port == 8108 || host.to_s.end_with?(".typesense.io")
|
|
209
|
+
end
|
|
210
|
+
|
|
116
211
|
def self.should_continue_tracking?
|
|
117
212
|
events = Thread.current[THREAD_LOCAL_KEY]
|
|
118
213
|
return false unless events
|
|
119
214
|
|
|
120
|
-
# Check count limit
|
|
121
215
|
return false if events.length >= MAX_TRACKED_EVENTS
|
|
122
216
|
|
|
123
|
-
# Check time limit
|
|
124
217
|
start_time = Thread.current[DeadBro::TRACKING_START_TIME_KEY]
|
|
125
218
|
if start_time
|
|
126
219
|
elapsed_seconds = Time.now - start_time
|
|
@@ -17,15 +17,26 @@ module DeadBro
|
|
|
17
17
|
begin
|
|
18
18
|
job_class_name = data[:job].class.name
|
|
19
19
|
if DeadBro.configuration.excluded_job?(job_class_name)
|
|
20
|
+
drain_job_tracking
|
|
20
21
|
next
|
|
21
22
|
end
|
|
22
23
|
# If exclusive_jobs is defined and not empty, only track matching jobs
|
|
23
24
|
unless DeadBro.configuration.exclusive_job?(job_class_name)
|
|
25
|
+
drain_job_tracking
|
|
24
26
|
next
|
|
25
27
|
end
|
|
26
28
|
rescue
|
|
27
29
|
end
|
|
28
30
|
|
|
31
|
+
# Skip out via sampling before we build any payload — jobs can be chatty
|
|
32
|
+
# enough that even the "cheap" stop/analyze work matters under load.
|
|
33
|
+
# Completions have no exception attached; the exception subscriber below
|
|
34
|
+
# always sends errors with force: true.
|
|
35
|
+
unless DeadBro.configuration.should_sample?
|
|
36
|
+
drain_job_tracking
|
|
37
|
+
next
|
|
38
|
+
end
|
|
39
|
+
|
|
29
40
|
duration_ms = ((finished - started) * 1000.0).round(2)
|
|
30
41
|
|
|
31
42
|
# Ensure tracking was started (fallback if perform_start.active_job didn't fire)
|
|
@@ -34,6 +45,11 @@ module DeadBro
|
|
|
34
45
|
DeadBro.logger.clear
|
|
35
46
|
Thread.current[DeadBro::TRACKING_START_TIME_KEY] = Time.now
|
|
36
47
|
DeadBro::SqlSubscriber.start_request_tracking
|
|
48
|
+
if DeadBro.configuration.allocation_tracking_enabled && defined?(DeadBro::MemoryTrackingSubscriber)
|
|
49
|
+
DeadBro::MemoryTrackingSubscriber.start_request_tracking
|
|
50
|
+
else
|
|
51
|
+
DeadBro::LightweightMemoryTracker.start_request_tracking if defined?(DeadBro::LightweightMemoryTracker)
|
|
52
|
+
end
|
|
37
53
|
end
|
|
38
54
|
|
|
39
55
|
# Get SQL queries executed during this job
|
|
@@ -110,6 +126,11 @@ module DeadBro
|
|
|
110
126
|
DeadBro.logger.clear
|
|
111
127
|
Thread.current[DeadBro::TRACKING_START_TIME_KEY] = Time.now
|
|
112
128
|
DeadBro::SqlSubscriber.start_request_tracking
|
|
129
|
+
if DeadBro.configuration.allocation_tracking_enabled && defined?(DeadBro::MemoryTrackingSubscriber)
|
|
130
|
+
DeadBro::MemoryTrackingSubscriber.start_request_tracking
|
|
131
|
+
else
|
|
132
|
+
DeadBro::LightweightMemoryTracker.start_request_tracking if defined?(DeadBro::LightweightMemoryTracker)
|
|
133
|
+
end
|
|
113
134
|
end
|
|
114
135
|
|
|
115
136
|
# Get SQL queries executed during this job
|
|
@@ -164,12 +185,24 @@ module DeadBro
|
|
|
164
185
|
}
|
|
165
186
|
|
|
166
187
|
event_name = exception&.class&.name || "ActiveJob::Exception"
|
|
167
|
-
client.post_metric(event_name: event_name, payload: payload,
|
|
188
|
+
client.post_metric(event_name: event_name, payload: payload, force: true)
|
|
168
189
|
end
|
|
169
190
|
rescue
|
|
170
191
|
# Never raise from instrumentation install
|
|
171
192
|
end
|
|
172
193
|
|
|
194
|
+
# Release job-side thread-local tracking state when we've decided not to
|
|
195
|
+
# build a payload (excluded job / sampled out). Matches Subscriber.drain_request_tracking.
|
|
196
|
+
def self.drain_job_tracking
|
|
197
|
+
DeadBro::SqlSubscriber.stop_request_tracking if defined?(DeadBro::SqlSubscriber)
|
|
198
|
+
DeadBro::LightweightMemoryTracker.stop_request_tracking if defined?(DeadBro::LightweightMemoryTracker)
|
|
199
|
+
if DeadBro.configuration.allocation_tracking_enabled && defined?(DeadBro::MemoryTrackingSubscriber)
|
|
200
|
+
DeadBro::MemoryTrackingSubscriber.stop_request_tracking
|
|
201
|
+
end
|
|
202
|
+
rescue
|
|
203
|
+
# Best effort
|
|
204
|
+
end
|
|
205
|
+
|
|
173
206
|
private
|
|
174
207
|
|
|
175
208
|
def self.safe_arguments(arguments)
|
|
@@ -215,17 +248,7 @@ module DeadBro
|
|
|
215
248
|
end
|
|
216
249
|
|
|
217
250
|
def self.memory_usage_mb
|
|
218
|
-
|
|
219
|
-
# Get memory usage in MB
|
|
220
|
-
memory_kb = begin
|
|
221
|
-
`ps -o rss= -p #{Process.pid}`.to_i
|
|
222
|
-
rescue
|
|
223
|
-
0
|
|
224
|
-
end
|
|
225
|
-
(memory_kb / 1024.0).round(2)
|
|
226
|
-
else
|
|
227
|
-
0
|
|
228
|
-
end
|
|
251
|
+
DeadBro::MemoryHelpers.rss_mb
|
|
229
252
|
rescue
|
|
230
253
|
0
|
|
231
254
|
end
|
|
@@ -43,13 +43,11 @@ module DeadBro
|
|
|
43
43
|
end
|
|
44
44
|
|
|
45
45
|
def self.lightweight_memory_usage
|
|
46
|
-
#
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# Rough estimation: 4KB per page
|
|
52
|
-
(heap_pages * 4) / 1024.0 # Convert to MB
|
|
46
|
+
# Real RSS, cached for ~1s across threads so this is cheap even on hot
|
|
47
|
+
# paths. Previous versions multiplied heap_pages by 4KB and labelled the
|
|
48
|
+
# result as MB — both the unit and the page size were wrong (MRI heap
|
|
49
|
+
# pages are ~16KB and heap != RSS), so the number was effectively fiction.
|
|
50
|
+
DeadBro::MemoryHelpers.rss_mb
|
|
53
51
|
rescue
|
|
54
52
|
0
|
|
55
53
|
end
|
data/lib/dead_bro/logger.rb
CHANGED
|
@@ -18,8 +18,14 @@ module DeadBro
|
|
|
18
18
|
COLOR_ERROR = "\033[31m" # Red
|
|
19
19
|
COLOR_FATAL = "\033[35m" # Magenta
|
|
20
20
|
|
|
21
|
+
# Hard cap per-thread buffer size. Prevents unbounded growth when a
|
|
22
|
+
# request/job logs a lot, or when tracking never gets a chance to flush
|
|
23
|
+
# (e.g. code running outside a request lifecycle).
|
|
24
|
+
MAX_LOG_ENTRIES = 500
|
|
25
|
+
|
|
21
26
|
def initialize
|
|
22
27
|
@thread_logs_key = :dead_bro_logs
|
|
28
|
+
@thread_logs_dropped_key = :dead_bro_logs_dropped
|
|
23
29
|
end
|
|
24
30
|
|
|
25
31
|
def debug(message)
|
|
@@ -42,29 +48,42 @@ module DeadBro
|
|
|
42
48
|
log(:fatal, message)
|
|
43
49
|
end
|
|
44
50
|
|
|
45
|
-
# Get all logs for the current thread
|
|
51
|
+
# Get all logs for the current thread. If the buffer was capped, append a
|
|
52
|
+
# synthetic marker entry so downstream consumers know entries were dropped.
|
|
46
53
|
def logs
|
|
47
|
-
Thread.current[@thread_logs_key] || []
|
|
54
|
+
entries = Thread.current[@thread_logs_key] || []
|
|
55
|
+
dropped = Thread.current[@thread_logs_dropped_key] || 0
|
|
56
|
+
return entries if dropped.zero?
|
|
57
|
+
|
|
58
|
+
entries + [{
|
|
59
|
+
sev: "warn",
|
|
60
|
+
msg: "[DeadBro::Logger] #{dropped} log entries dropped (buffer cap #{MAX_LOG_ENTRIES})",
|
|
61
|
+
time: Time.now.utc.iso8601(3)
|
|
62
|
+
}]
|
|
48
63
|
end
|
|
49
64
|
|
|
50
65
|
# Clear logs for the current thread
|
|
51
66
|
def clear
|
|
52
67
|
Thread.current[@thread_logs_key] = []
|
|
68
|
+
Thread.current[@thread_logs_dropped_key] = 0
|
|
53
69
|
end
|
|
54
70
|
|
|
55
71
|
private
|
|
56
72
|
|
|
57
73
|
def log(severity, message)
|
|
58
74
|
timestamp = Time.now.utc
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
75
|
+
|
|
76
|
+
buffer = (Thread.current[@thread_logs_key] ||= [])
|
|
77
|
+
if buffer.length >= MAX_LOG_ENTRIES
|
|
78
|
+
Thread.current[@thread_logs_dropped_key] =
|
|
79
|
+
(Thread.current[@thread_logs_dropped_key] || 0) + 1
|
|
80
|
+
else
|
|
81
|
+
buffer << {
|
|
82
|
+
sev: severity.to_s,
|
|
83
|
+
msg: message.to_s,
|
|
84
|
+
time: timestamp.iso8601(3) # Include milliseconds for better precision
|
|
85
|
+
}
|
|
86
|
+
end
|
|
68
87
|
|
|
69
88
|
# Print the message immediately
|
|
70
89
|
print_log(severity, message, timestamp)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DeadBro
|
|
4
|
+
module MemoryDetails
|
|
5
|
+
# Maps Ruby internal ObjectSpace type codes to human-readable names.
|
|
6
|
+
# Types omitted here are filtered out (internal noise).
|
|
7
|
+
OBJECT_TYPE_NAMES = {
|
|
8
|
+
T_STRING: "String",
|
|
9
|
+
T_ARRAY: "Array",
|
|
10
|
+
T_HASH: "Hash",
|
|
11
|
+
T_OBJECT: "Object",
|
|
12
|
+
T_DATA: "C Extension",
|
|
13
|
+
T_CLASS: "Class",
|
|
14
|
+
T_MODULE: "Module",
|
|
15
|
+
T_STRUCT: "Struct",
|
|
16
|
+
T_MATCH: "MatchData",
|
|
17
|
+
T_REGEXP: "Regexp",
|
|
18
|
+
T_SYMBOL: "Symbol",
|
|
19
|
+
T_FLOAT: "Float",
|
|
20
|
+
T_FILE: "File",
|
|
21
|
+
T_BIGNUM: "Integer (big)"
|
|
22
|
+
}.freeze
|
|
23
|
+
|
|
24
|
+
# Noise types never shown to users.
|
|
25
|
+
SKIP_TYPES = %i[FREE T_IMEMO TOTAL T_NODE T_ICLASS T_ZOMBIE T_MOVED].freeze
|
|
26
|
+
|
|
27
|
+
def self.format_object_breakdown(deltas)
|
|
28
|
+
result = {}
|
|
29
|
+
deltas.each do |type, count|
|
|
30
|
+
next if SKIP_TYPES.include?(type)
|
|
31
|
+
next unless count.positive?
|
|
32
|
+
name = OBJECT_TYPE_NAMES[type] || type.to_s.sub(/\AT_/, "")
|
|
33
|
+
result[name] = count
|
|
34
|
+
end
|
|
35
|
+
result.sort_by { |_, v| -v }.to_h
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.build(gc_before:, gc_after:, memory_before_mb:, memory_after_mb:,
|
|
39
|
+
object_counts_before:, object_counts_after:, large_objects:)
|
|
40
|
+
memory_delta_mb = (memory_after_mb - memory_before_mb).round(2)
|
|
41
|
+
gc_collections = (gc_after[:count] || 0) - (gc_before[:count] || 0)
|
|
42
|
+
heap_pages_added = (gc_after[:heap_allocated_pages] || 0) - (gc_before[:heap_allocated_pages] || 0)
|
|
43
|
+
new_objects = (gc_after[:total_allocated_objects] || 0) - (gc_before[:total_allocated_objects] || 0)
|
|
44
|
+
|
|
45
|
+
raw_deltas = {}
|
|
46
|
+
if object_counts_before.any? && object_counts_after.any?
|
|
47
|
+
keys = (object_counts_before.keys + object_counts_after.keys).uniq
|
|
48
|
+
keys.each do |k|
|
|
49
|
+
diff = (object_counts_after[k] || 0) - (object_counts_before[k] || 0)
|
|
50
|
+
raw_deltas[k] = diff unless diff.zero?
|
|
51
|
+
end
|
|
52
|
+
raw_deltas = raw_deltas.sort_by { |_, v| -v.abs }.first(20).to_h
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
warnings = []
|
|
56
|
+
warnings << "Memory grew #{memory_delta_mb}MB — possible leak or large allocation" if memory_delta_mb > 20
|
|
57
|
+
warnings << "GC ran #{gc_collections} times — many short-lived objects being created" if gc_collections > 5
|
|
58
|
+
warnings << "Heap grew by #{heap_pages_added} pages — Ruby needed more memory from the OS" if heap_pages_added > 10
|
|
59
|
+
warnings << "#{large_objects.length} object(s) over 1MB found in memory" if large_objects.any?
|
|
60
|
+
|
|
61
|
+
{
|
|
62
|
+
gc_collections: gc_collections,
|
|
63
|
+
heap_pages_added: heap_pages_added,
|
|
64
|
+
new_objects: new_objects,
|
|
65
|
+
object_breakdown: format_object_breakdown(raw_deltas),
|
|
66
|
+
large_objects: large_objects,
|
|
67
|
+
warnings: warnings
|
|
68
|
+
}
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -4,6 +4,68 @@ module DeadBro
|
|
|
4
4
|
module MemoryHelpers
|
|
5
5
|
# Helper methods for memory tracking and leak detection
|
|
6
6
|
|
|
7
|
+
RSS_CACHE_TTL_SECONDS = 1.0
|
|
8
|
+
@rss_cache_mutex = Mutex.new
|
|
9
|
+
@rss_cache = nil # [value_bytes, captured_at_monotonic]
|
|
10
|
+
|
|
11
|
+
# Current process RSS in bytes. Uses /proc/self/status on Linux (cheap read)
|
|
12
|
+
# and falls back to `ps` elsewhere. Result is cached for 1s across threads
|
|
13
|
+
# so this is safe to call from every request without flooding the kernel.
|
|
14
|
+
def self.rss_bytes
|
|
15
|
+
now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
16
|
+
cached = @rss_cache
|
|
17
|
+
if cached && (now - cached[1]) < RSS_CACHE_TTL_SECONDS
|
|
18
|
+
return cached[0]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
value = read_rss_bytes
|
|
22
|
+
@rss_cache_mutex.synchronize do
|
|
23
|
+
# Re-check inside the lock to avoid racing a newer reading.
|
|
24
|
+
cached = @rss_cache
|
|
25
|
+
if cached.nil? || (now - cached[1]) >= RSS_CACHE_TTL_SECONDS
|
|
26
|
+
@rss_cache = [value, now]
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
value
|
|
30
|
+
rescue
|
|
31
|
+
0
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def self.rss_mb
|
|
35
|
+
(rss_bytes.to_f / (1024 * 1024)).round(2)
|
|
36
|
+
rescue
|
|
37
|
+
0.0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.read_rss_bytes
|
|
41
|
+
if File.readable?("/proc/self/status")
|
|
42
|
+
read_rss_from_proc_status
|
|
43
|
+
else
|
|
44
|
+
read_rss_from_ps
|
|
45
|
+
end
|
|
46
|
+
rescue
|
|
47
|
+
0
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def self.read_rss_from_proc_status
|
|
51
|
+
File.foreach("/proc/self/status") do |line|
|
|
52
|
+
next unless line.start_with?("VmRSS:")
|
|
53
|
+
kb = line.split[1].to_i
|
|
54
|
+
return kb * 1024 if kb > 0
|
|
55
|
+
end
|
|
56
|
+
0
|
|
57
|
+
rescue
|
|
58
|
+
0
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def self.read_rss_from_ps
|
|
62
|
+
kb = `ps -o rss= -p #{Process.pid}`.to_i
|
|
63
|
+
return 0 if kb <= 0
|
|
64
|
+
kb * 1024
|
|
65
|
+
rescue
|
|
66
|
+
0
|
|
67
|
+
end
|
|
68
|
+
|
|
7
69
|
# Take a memory snapshot with a custom label
|
|
8
70
|
def self.snapshot(label)
|
|
9
71
|
DeadBro::MemoryTrackingSubscriber.take_memory_snapshot(label)
|