dead_bro 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/FEATURES.md +338 -0
- data/README.md +274 -0
- data/lib/dead_bro/cache_subscriber.rb +106 -0
- data/lib/dead_bro/circuit_breaker.rb +117 -0
- data/lib/dead_bro/client.rb +110 -0
- data/lib/dead_bro/configuration.rb +146 -0
- data/lib/dead_bro/error_middleware.rb +112 -0
- data/lib/dead_bro/http_instrumentation.rb +113 -0
- data/lib/dead_bro/job_sql_tracking_middleware.rb +26 -0
- data/lib/dead_bro/job_subscriber.rb +243 -0
- data/lib/dead_bro/lightweight_memory_tracker.rb +63 -0
- data/lib/dead_bro/logger.rb +127 -0
- data/lib/dead_bro/memory_helpers.rb +87 -0
- data/lib/dead_bro/memory_leak_detector.rb +196 -0
- data/lib/dead_bro/memory_tracking_subscriber.rb +361 -0
- data/lib/dead_bro/railtie.rb +90 -0
- data/lib/dead_bro/redis_subscriber.rb +282 -0
- data/lib/dead_bro/sql_subscriber.rb +467 -0
- data/lib/dead_bro/sql_tracking_middleware.rb +78 -0
- data/lib/dead_bro/subscriber.rb +357 -0
- data/lib/dead_bro/version.rb +5 -0
- data/lib/dead_bro/view_rendering_subscriber.rb +151 -0
- data/lib/dead_bro.rb +69 -0
- metadata +66 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "active_support/notifications"
|
|
4
|
+
|
|
5
|
+
module DeadBro
|
|
6
|
+
class CacheSubscriber
|
|
7
|
+
THREAD_LOCAL_KEY = :dead_bro_cache_events
|
|
8
|
+
|
|
9
|
+
EVENTS = [
|
|
10
|
+
"cache_read.active_support",
|
|
11
|
+
"cache_write.active_support",
|
|
12
|
+
"cache_delete.active_support",
|
|
13
|
+
"cache_exist?.active_support",
|
|
14
|
+
"cache_fetch_hit.active_support",
|
|
15
|
+
"cache_generate.active_support",
|
|
16
|
+
"cache_read_multi.active_support",
|
|
17
|
+
"cache_write_multi.active_support"
|
|
18
|
+
].freeze
|
|
19
|
+
|
|
20
|
+
def self.subscribe!
|
|
21
|
+
EVENTS.each do |event_name|
|
|
22
|
+
ActiveSupport::Notifications.subscribe(event_name) do |name, started, finished, _unique_id, data|
|
|
23
|
+
next unless Thread.current[THREAD_LOCAL_KEY]
|
|
24
|
+
|
|
25
|
+
duration_ms = ((finished - started) * 1000.0).round(2)
|
|
26
|
+
event = build_event(name, data, duration_ms)
|
|
27
|
+
Thread.current[THREAD_LOCAL_KEY] << event if event
|
|
28
|
+
end
|
|
29
|
+
rescue
|
|
30
|
+
end
|
|
31
|
+
rescue
|
|
32
|
+
# Never raise from instrumentation install
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.start_request_tracking
|
|
36
|
+
Thread.current[THREAD_LOCAL_KEY] = []
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def self.stop_request_tracking
|
|
40
|
+
events = Thread.current[THREAD_LOCAL_KEY]
|
|
41
|
+
Thread.current[THREAD_LOCAL_KEY] = nil
|
|
42
|
+
events || []
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def self.build_event(name, data, duration_ms)
|
|
46
|
+
return nil unless data.is_a?(Hash)
|
|
47
|
+
|
|
48
|
+
{
|
|
49
|
+
event: name,
|
|
50
|
+
duration_ms: duration_ms,
|
|
51
|
+
key: safe_key(data[:key]),
|
|
52
|
+
keys_count: safe_keys_count(data[:keys]),
|
|
53
|
+
hit: infer_hit(name, data),
|
|
54
|
+
store: safe_store_name(data[:store]),
|
|
55
|
+
namespace: safe_namespace(data[:namespace]),
|
|
56
|
+
at: Time.now.utc.to_i
|
|
57
|
+
}
|
|
58
|
+
rescue
|
|
59
|
+
nil
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def self.safe_key(key)
|
|
63
|
+
return nil if key.nil?
|
|
64
|
+
s = key.to_s
|
|
65
|
+
(s.length > 200) ? s[0, 200] + "…" : s
|
|
66
|
+
rescue
|
|
67
|
+
nil
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def self.safe_keys_count(keys)
|
|
71
|
+
if keys.respond_to?(:size)
|
|
72
|
+
keys.size
|
|
73
|
+
end
|
|
74
|
+
rescue
|
|
75
|
+
nil
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def self.safe_store_name(store)
|
|
79
|
+
return nil unless store
|
|
80
|
+
if store.respond_to?(:name)
|
|
81
|
+
store.name
|
|
82
|
+
else
|
|
83
|
+
store.class.name
|
|
84
|
+
end
|
|
85
|
+
rescue
|
|
86
|
+
nil
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def self.safe_namespace(ns)
|
|
90
|
+
ns.to_s[0, 100]
|
|
91
|
+
rescue
|
|
92
|
+
nil
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def self.infer_hit(name, data)
|
|
96
|
+
case name
|
|
97
|
+
when "cache_fetch_hit.active_support"
|
|
98
|
+
true
|
|
99
|
+
when "cache_read.active_support"
|
|
100
|
+
!!data[:hit]
|
|
101
|
+
end
|
|
102
|
+
rescue
|
|
103
|
+
nil
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DeadBro
|
|
4
|
+
class CircuitBreaker
|
|
5
|
+
# Circuit breaker states
|
|
6
|
+
CLOSED = :closed
|
|
7
|
+
OPEN = :open
|
|
8
|
+
HALF_OPEN = :half_open
|
|
9
|
+
|
|
10
|
+
# Default configuration
|
|
11
|
+
DEFAULT_FAILURE_THRESHOLD = 3
|
|
12
|
+
DEFAULT_RECOVERY_TIMEOUT = 60 # seconds
|
|
13
|
+
DEFAULT_RETRY_TIMEOUT = 300 # seconds for retry attempts
|
|
14
|
+
|
|
15
|
+
def initialize(
|
|
16
|
+
failure_threshold: DEFAULT_FAILURE_THRESHOLD,
|
|
17
|
+
recovery_timeout: DEFAULT_RECOVERY_TIMEOUT,
|
|
18
|
+
retry_timeout: DEFAULT_RETRY_TIMEOUT
|
|
19
|
+
)
|
|
20
|
+
@failure_threshold = failure_threshold
|
|
21
|
+
@recovery_timeout = recovery_timeout
|
|
22
|
+
@retry_timeout = retry_timeout
|
|
23
|
+
|
|
24
|
+
@state = CLOSED
|
|
25
|
+
@failure_count = 0
|
|
26
|
+
@last_failure_time = nil
|
|
27
|
+
@last_success_time = nil
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def call(&block)
|
|
31
|
+
case @state
|
|
32
|
+
when CLOSED
|
|
33
|
+
execute_with_monitoring(&block)
|
|
34
|
+
when OPEN
|
|
35
|
+
if should_attempt_reset?
|
|
36
|
+
@state = HALF_OPEN
|
|
37
|
+
execute_with_monitoring(&block)
|
|
38
|
+
else
|
|
39
|
+
:circuit_open
|
|
40
|
+
end
|
|
41
|
+
when HALF_OPEN
|
|
42
|
+
execute_with_monitoring(&block)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
attr_reader :state
|
|
47
|
+
|
|
48
|
+
attr_reader :failure_count
|
|
49
|
+
|
|
50
|
+
attr_reader :last_failure_time
|
|
51
|
+
|
|
52
|
+
attr_reader :last_success_time
|
|
53
|
+
|
|
54
|
+
def reset!
|
|
55
|
+
@state = CLOSED
|
|
56
|
+
@failure_count = 0
|
|
57
|
+
@last_failure_time = nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def open!
|
|
61
|
+
@state = OPEN
|
|
62
|
+
@last_failure_time = Time.now
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def transition_to_half_open!
|
|
66
|
+
@state = HALF_OPEN
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def should_attempt_reset?
|
|
70
|
+
return false unless @last_failure_time
|
|
71
|
+
|
|
72
|
+
# Try to reset after recovery timeout
|
|
73
|
+
elapsed = Time.now - @last_failure_time
|
|
74
|
+
elapsed >= @recovery_timeout
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def execute_with_monitoring(&block)
|
|
80
|
+
result = block.call
|
|
81
|
+
|
|
82
|
+
if success?(result)
|
|
83
|
+
on_success
|
|
84
|
+
result
|
|
85
|
+
else
|
|
86
|
+
on_failure
|
|
87
|
+
result
|
|
88
|
+
end
|
|
89
|
+
rescue => e
|
|
90
|
+
on_failure
|
|
91
|
+
raise e
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def success?(result)
|
|
95
|
+
# Consider 2xx status codes as success
|
|
96
|
+
result.is_a?(Net::HTTPSuccess)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def on_success
|
|
100
|
+
@failure_count = 0
|
|
101
|
+
@last_success_time = Time.now
|
|
102
|
+
@state = CLOSED
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def on_failure
|
|
106
|
+
@failure_count += 1
|
|
107
|
+
@last_failure_time = Time.now
|
|
108
|
+
|
|
109
|
+
# If we're in half-open state and get a failure, go back to open
|
|
110
|
+
if @state == HALF_OPEN
|
|
111
|
+
@state = OPEN
|
|
112
|
+
elsif @failure_count >= @failure_threshold
|
|
113
|
+
@state = OPEN
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
require "json"
|
|
6
|
+
require "timeout"
|
|
7
|
+
|
|
8
|
+
module DeadBro
|
|
9
|
+
class Client
|
|
10
|
+
def initialize(configuration = DeadBro.configuration)
|
|
11
|
+
@configuration = configuration
|
|
12
|
+
@circuit_breaker = create_circuit_breaker
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def post_metric(event_name:, payload:)
|
|
16
|
+
return if @configuration.api_key.nil?
|
|
17
|
+
return unless @configuration.enabled
|
|
18
|
+
|
|
19
|
+
# Check sampling rate - skip if not selected for sampling
|
|
20
|
+
return unless @configuration.should_sample?
|
|
21
|
+
|
|
22
|
+
# Check circuit breaker before making request
|
|
23
|
+
if @circuit_breaker && @configuration.circuit_breaker_enabled
|
|
24
|
+
if @circuit_breaker.state == :open
|
|
25
|
+
# Check if we should attempt a reset to half-open state
|
|
26
|
+
if @circuit_breaker.should_attempt_reset?
|
|
27
|
+
@circuit_breaker.transition_to_half_open!
|
|
28
|
+
else
|
|
29
|
+
return
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Make the HTTP request (async)
|
|
35
|
+
make_http_request(event_name, payload, @configuration.api_key)
|
|
36
|
+
|
|
37
|
+
nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def create_circuit_breaker
|
|
43
|
+
return nil unless @configuration.circuit_breaker_enabled
|
|
44
|
+
|
|
45
|
+
CircuitBreaker.new(
|
|
46
|
+
failure_threshold: @configuration.circuit_breaker_failure_threshold,
|
|
47
|
+
recovery_timeout: @configuration.circuit_breaker_recovery_timeout,
|
|
48
|
+
retry_timeout: @configuration.circuit_breaker_retry_timeout
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def make_http_request(event_name, payload, api_key)
|
|
53
|
+
use_staging = ENV["USE_STAGING_ENDPOINT"] && !ENV["USE_STAGING_ENDPOINT"].empty?
|
|
54
|
+
production_url = use_staging ? "https://deadbro.aberatii.com/apm/v1/metrics" : "https://www.deadbro.com/apm/v1/metrics"
|
|
55
|
+
endpoint_url = @configuration.ruby_dev ? "http://localhost:3100/apm/v1/metrics" : production_url
|
|
56
|
+
uri = URI.parse(endpoint_url)
|
|
57
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
58
|
+
http.use_ssl = (uri.scheme == "https")
|
|
59
|
+
http.open_timeout = @configuration.open_timeout
|
|
60
|
+
http.read_timeout = @configuration.read_timeout
|
|
61
|
+
|
|
62
|
+
request = Net::HTTP::Post.new(uri.request_uri)
|
|
63
|
+
request["Content-Type"] = "application/json"
|
|
64
|
+
request["Authorization"] = "Bearer #{api_key}"
|
|
65
|
+
body = {event: event_name, payload: payload, sent_at: Time.now.utc.iso8601, revision: @configuration.resolve_deploy_id}
|
|
66
|
+
request.body = JSON.dump(body)
|
|
67
|
+
|
|
68
|
+
# Fire-and-forget using a short-lived thread to avoid blocking the request cycle.
|
|
69
|
+
Thread.new do
|
|
70
|
+
response = http.request(request)
|
|
71
|
+
|
|
72
|
+
if response
|
|
73
|
+
# Update circuit breaker based on response
|
|
74
|
+
if @circuit_breaker && @configuration.circuit_breaker_enabled
|
|
75
|
+
if response.is_a?(Net::HTTPSuccess)
|
|
76
|
+
@circuit_breaker.send(:on_success)
|
|
77
|
+
else
|
|
78
|
+
@circuit_breaker.send(:on_failure)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
elsif @circuit_breaker && @configuration.circuit_breaker_enabled
|
|
82
|
+
# Treat nil response as failure for circuit breaker
|
|
83
|
+
@circuit_breaker.send(:on_failure)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
response
|
|
87
|
+
rescue Timeout::Error
|
|
88
|
+
# Update circuit breaker on timeout
|
|
89
|
+
if @circuit_breaker && @configuration.circuit_breaker_enabled
|
|
90
|
+
@circuit_breaker.send(:on_failure)
|
|
91
|
+
end
|
|
92
|
+
rescue
|
|
93
|
+
# Update circuit breaker on exception
|
|
94
|
+
if @circuit_breaker && @configuration.circuit_breaker_enabled
|
|
95
|
+
@circuit_breaker.send(:on_failure)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
nil
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def log_debug(message)
|
|
103
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
104
|
+
Rails.logger.debug(message)
|
|
105
|
+
else
|
|
106
|
+
$stdout.puts(message)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DeadBro
|
|
4
|
+
class Configuration
|
|
5
|
+
attr_accessor :api_key, :open_timeout, :read_timeout, :enabled, :ruby_dev, :memory_tracking_enabled,
|
|
6
|
+
:allocation_tracking_enabled, :circuit_breaker_enabled, :circuit_breaker_failure_threshold, :circuit_breaker_recovery_timeout,
|
|
7
|
+
:circuit_breaker_retry_timeout, :sample_rate, :excluded_controllers, :excluded_jobs,
|
|
8
|
+
:exclusive_controllers, :exclusive_jobs, :deploy_id, :slow_query_threshold_ms, :explain_analyze_enabled
|
|
9
|
+
|
|
10
|
+
def initialize
|
|
11
|
+
@api_key = nil
|
|
12
|
+
@endpoint_url = nil
|
|
13
|
+
@open_timeout = 1.0
|
|
14
|
+
@read_timeout = 1.0
|
|
15
|
+
@enabled = true
|
|
16
|
+
@ruby_dev = false
|
|
17
|
+
@memory_tracking_enabled = true
|
|
18
|
+
@allocation_tracking_enabled = false # Disabled by default for performance
|
|
19
|
+
@circuit_breaker_enabled = true
|
|
20
|
+
@circuit_breaker_failure_threshold = 3
|
|
21
|
+
@circuit_breaker_recovery_timeout = 60 # seconds
|
|
22
|
+
@circuit_breaker_retry_timeout = 300 # seconds
|
|
23
|
+
@sample_rate = 100
|
|
24
|
+
@excluded_controllers = []
|
|
25
|
+
@excluded_jobs = []
|
|
26
|
+
@exclusive_controllers = []
|
|
27
|
+
@exclusive_jobs = []
|
|
28
|
+
@deploy_id = resolve_deploy_id
|
|
29
|
+
@slow_query_threshold_ms = 500 # Default: 500ms
|
|
30
|
+
@explain_analyze_enabled = false # Enable EXPLAIN ANALYZE for slow queries by default
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def resolve_deploy_id
|
|
34
|
+
ENV["dead_bro_DEPLOY_ID"] || ENV["GIT_REV"] || ENV["HEROKU_SLUG_COMMIT"] || DeadBro.process_deploy_id
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def excluded_controller?(controller_name, action_name = nil)
|
|
38
|
+
return false if @excluded_controllers.empty?
|
|
39
|
+
|
|
40
|
+
# If action_name is provided, check both controller#action patterns and controller-only patterns
|
|
41
|
+
if action_name
|
|
42
|
+
target = "#{controller_name}##{action_name}"
|
|
43
|
+
# Check controller#action patterns (patterns containing '#')
|
|
44
|
+
action_patterns = @excluded_controllers.select { |pat| pat.to_s.include?("#") }
|
|
45
|
+
if action_patterns.any? { |pat| match_name_or_pattern?(target, pat) }
|
|
46
|
+
return true
|
|
47
|
+
end
|
|
48
|
+
# Check controller-only patterns (patterns without '#')
|
|
49
|
+
# If the controller itself is excluded, all its actions are excluded
|
|
50
|
+
controller_patterns = @excluded_controllers.reject { |pat| pat.to_s.include?("#") }
|
|
51
|
+
if controller_patterns.any? { |pat| match_name_or_pattern?(controller_name, pat) }
|
|
52
|
+
return true
|
|
53
|
+
end
|
|
54
|
+
return false
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# When action_name is nil, only check controller-only patterns (no #)
|
|
58
|
+
controller_patterns = @excluded_controllers.reject { |pat| pat.to_s.include?("#") }
|
|
59
|
+
return false if controller_patterns.empty?
|
|
60
|
+
controller_patterns.any? { |pat| match_name_or_pattern?(controller_name, pat) }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def excluded_job?(job_class_name)
|
|
64
|
+
return false if @excluded_jobs.empty?
|
|
65
|
+
@excluded_jobs.any? { |pat| match_name_or_pattern?(job_class_name, pat) }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def exclusive_job?(job_class_name)
|
|
69
|
+
return true if @exclusive_jobs.empty? # If not defined, allow all (default behavior)
|
|
70
|
+
@exclusive_jobs.any? { |pat| match_name_or_pattern?(job_class_name, pat) }
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def exclusive_controller?(controller_name, action_name)
|
|
74
|
+
return true if @exclusive_controllers.empty? # If not defined, allow all (default behavior)
|
|
75
|
+
target = "#{controller_name}##{action_name}"
|
|
76
|
+
@exclusive_controllers.any? { |pat| match_name_or_pattern?(target, pat) }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def should_sample?
|
|
81
|
+
sample_rate = resolve_sample_rate
|
|
82
|
+
return true if sample_rate >= 100
|
|
83
|
+
return false if sample_rate <= 0
|
|
84
|
+
|
|
85
|
+
# Generate random number 1-100 and check if it's within sample rate
|
|
86
|
+
rand(1..100) <= sample_rate
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def resolve_sample_rate
|
|
90
|
+
return @sample_rate unless @sample_rate.nil?
|
|
91
|
+
|
|
92
|
+
if ENV["dead_bro_SAMPLE_RATE"]
|
|
93
|
+
env_value = ENV["dead_bro_SAMPLE_RATE"].to_s.strip
|
|
94
|
+
# Validate that it's a valid integer string
|
|
95
|
+
if env_value.match?(/^\d+$/)
|
|
96
|
+
parsed = env_value.to_i
|
|
97
|
+
# Ensure it's in valid range (0-100)
|
|
98
|
+
(parsed >= 0 && parsed <= 100) ? parsed : 100
|
|
99
|
+
else
|
|
100
|
+
100 # Invalid format, fall back to default
|
|
101
|
+
end
|
|
102
|
+
else
|
|
103
|
+
100 # default
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def resolve_api_key
|
|
108
|
+
return @api_key unless @api_key.nil?
|
|
109
|
+
|
|
110
|
+
ENV["dead_bro_API_KEY"]
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def sample_rate=(value)
|
|
114
|
+
# Allow nil to use default/resolved value
|
|
115
|
+
return @sample_rate = nil if value.nil?
|
|
116
|
+
|
|
117
|
+
# Allow 0 to disable sampling, or 1-100 for percentage
|
|
118
|
+
unless value.is_a?(Integer) && value >= 0 && value <= 100
|
|
119
|
+
raise ArgumentError, "Sample rate must be an integer between 0 and 100, got: #{value.inspect}"
|
|
120
|
+
end
|
|
121
|
+
@sample_rate = value
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
private
|
|
125
|
+
|
|
126
|
+
def match_name_or_pattern?(name, pattern)
|
|
127
|
+
return false if name.nil? || pattern.nil?
|
|
128
|
+
pat = pattern.to_s
|
|
129
|
+
return !!(name.to_s == pat) unless pat.include?("*")
|
|
130
|
+
|
|
131
|
+
# For controller action patterns (containing '#'), use .* to match any characters including colons
|
|
132
|
+
# For controller-only patterns, use [^:]* to match namespace segments
|
|
133
|
+
if pat.include?("#")
|
|
134
|
+
# Controller action pattern: allow * to match any characters including colons
|
|
135
|
+
regex = Regexp.new("^" + Regexp.escape(pat).gsub("\\*", ".*") + "$")
|
|
136
|
+
else
|
|
137
|
+
# Controller-only pattern: use [^:]* to match namespace segments
|
|
138
|
+
regex = Regexp.new("^" + Regexp.escape(pat).gsub("\\*", "[^:]*") + "$")
|
|
139
|
+
end
|
|
140
|
+
!!(name.to_s =~ regex)
|
|
141
|
+
rescue
|
|
142
|
+
false
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
end
|
|
146
|
+
end
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rack"
|
|
4
|
+
|
|
5
|
+
module DeadBro
|
|
6
|
+
class ErrorMiddleware
|
|
7
|
+
EVENT_NAME = "exception.uncaught"
|
|
8
|
+
|
|
9
|
+
def initialize(app, client = nil)
|
|
10
|
+
@app = app
|
|
11
|
+
@client = client || DeadBro.client
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def call(env)
|
|
15
|
+
@app.call(env)
|
|
16
|
+
rescue Exception => exception # rubocop:disable Lint/RescueException
|
|
17
|
+
begin
|
|
18
|
+
payload = build_payload(exception, env)
|
|
19
|
+
# Use the error class name as the event name
|
|
20
|
+
event_name = exception.class.name.to_s
|
|
21
|
+
event_name = EVENT_NAME if event_name.empty?
|
|
22
|
+
@client.post_metric(event_name: event_name, payload: payload)
|
|
23
|
+
rescue
|
|
24
|
+
# Never let APM reporting interfere with the host app
|
|
25
|
+
end
|
|
26
|
+
raise
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def build_payload(exception, env)
|
|
32
|
+
req = rack_request(env)
|
|
33
|
+
|
|
34
|
+
{
|
|
35
|
+
exception_class: exception.class.name,
|
|
36
|
+
message: truncate(exception.message.to_s, 1000),
|
|
37
|
+
backtrace: safe_backtrace(exception),
|
|
38
|
+
occurred_at: Time.now.utc.to_i,
|
|
39
|
+
rack:
|
|
40
|
+
{
|
|
41
|
+
method: req&.request_method,
|
|
42
|
+
path: req&.path,
|
|
43
|
+
fullpath: req&.fullpath,
|
|
44
|
+
ip: req&.ip,
|
|
45
|
+
user_agent: truncate(req&.user_agent.to_s, 200),
|
|
46
|
+
params: safe_params(req),
|
|
47
|
+
request_id: env["action_dispatch.request_id"] || env["HTTP_X_REQUEST_ID"],
|
|
48
|
+
referer: truncate(env["HTTP_REFERER"].to_s, 500),
|
|
49
|
+
host: env["HTTP_HOST"]
|
|
50
|
+
},
|
|
51
|
+
rails_env: safe_rails_env,
|
|
52
|
+
app: safe_app_name,
|
|
53
|
+
pid: Process.pid,
|
|
54
|
+
logs: DeadBro.logger.logs
|
|
55
|
+
}
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def rack_request(env)
|
|
59
|
+
::Rack::Request.new(env)
|
|
60
|
+
rescue
|
|
61
|
+
nil
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def safe_backtrace(exception)
|
|
65
|
+
Array(exception.backtrace).first(50)
|
|
66
|
+
rescue
|
|
67
|
+
[]
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def safe_params(req)
|
|
71
|
+
return {} unless req
|
|
72
|
+
|
|
73
|
+
params = req.params || {}
|
|
74
|
+
sensitive_keys = %w[password password_confirmation token secret key authorization api_key]
|
|
75
|
+
filtered = params.dup
|
|
76
|
+
sensitive_keys.each do |k|
|
|
77
|
+
filtered.delete(k)
|
|
78
|
+
filtered.delete(k.to_sym)
|
|
79
|
+
end
|
|
80
|
+
JSON.parse(JSON.dump(filtered)) # ensure JSON-safe
|
|
81
|
+
rescue
|
|
82
|
+
{}
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def truncate(str, max)
|
|
86
|
+
return str if str.nil? || str.length <= max
|
|
87
|
+
str[0..(max - 1)]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def safe_rails_env
|
|
91
|
+
if defined?(Rails) && Rails.respond_to?(:env)
|
|
92
|
+
Rails.env
|
|
93
|
+
else
|
|
94
|
+
ENV["RAILS_ENV"] || ENV["RACK_ENV"] || "development"
|
|
95
|
+
end
|
|
96
|
+
rescue
|
|
97
|
+
"development"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def safe_app_name
|
|
101
|
+
if defined?(Rails) && Rails.respond_to?(:application)
|
|
102
|
+
begin
|
|
103
|
+
Rails.application.class.module_parent_name
|
|
104
|
+
rescue
|
|
105
|
+
""
|
|
106
|
+
end
|
|
107
|
+
else
|
|
108
|
+
""
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "uri"
|
|
4
|
+
require "net/http"
|
|
5
|
+
|
|
6
|
+
module DeadBro
|
|
7
|
+
module HttpInstrumentation
|
|
8
|
+
EVENT_NAME = "outgoing.http"
|
|
9
|
+
|
|
10
|
+
def self.install!(client: Client.new)
|
|
11
|
+
install_net_http!(client)
|
|
12
|
+
install_typhoeus!(client) if defined?(::Typhoeus)
|
|
13
|
+
rescue
|
|
14
|
+
# Never raise from instrumentation install
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.install_net_http!(client)
|
|
18
|
+
mod = Module.new do
|
|
19
|
+
define_method(:request) do |req, body = nil, &block|
|
|
20
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
21
|
+
response = nil
|
|
22
|
+
error = nil
|
|
23
|
+
begin
|
|
24
|
+
response = super(req, body, &block)
|
|
25
|
+
response
|
|
26
|
+
rescue Exception => e
|
|
27
|
+
error = e
|
|
28
|
+
raise
|
|
29
|
+
ensure
|
|
30
|
+
finish_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
31
|
+
duration_ms = ((finish_time - start_time) * 1000.0).round(2)
|
|
32
|
+
begin
|
|
33
|
+
uri = begin
|
|
34
|
+
URI.parse(req.uri ? req.uri.to_s : "http://#{@address}:#{@port}#{req.path}")
|
|
35
|
+
rescue
|
|
36
|
+
nil
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Skip instrumentation for our own APM endpoint to prevent infinite loops,
|
|
40
|
+
# but do NOT alter the original method's return value/control flow.
|
|
41
|
+
skip_instrumentation = uri && (uri.to_s.include?("localhost") || uri.to_s.include?("aberatii.com"))
|
|
42
|
+
|
|
43
|
+
unless skip_instrumentation
|
|
44
|
+
payload = {
|
|
45
|
+
library: "net_http",
|
|
46
|
+
method: req.method,
|
|
47
|
+
url: uri && uri.to_s,
|
|
48
|
+
host: (uri && uri.host) || @address,
|
|
49
|
+
path: (uri && uri.path) || req.path,
|
|
50
|
+
status: response && response.code.to_i,
|
|
51
|
+
duration_ms: duration_ms,
|
|
52
|
+
exception: error && error.class.name
|
|
53
|
+
}
|
|
54
|
+
# Accumulate per-request; only send with controller metric
|
|
55
|
+
if Thread.current[:dead_bro_http_events]
|
|
56
|
+
Thread.current[:dead_bro_http_events] << payload
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
rescue
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
::Net::HTTP.prepend(mod) unless ::Net::HTTP.ancestors.include?(mod)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def self.install_typhoeus!(client)
|
|
69
|
+
mod = Module.new do
|
|
70
|
+
define_method(:run) do |*args|
|
|
71
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
72
|
+
response = nil
|
|
73
|
+
begin
|
|
74
|
+
response = super(*args)
|
|
75
|
+
response
|
|
76
|
+
ensure
|
|
77
|
+
finish_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
78
|
+
duration_ms = ((finish_time - start_time) * 1000.0).round(2)
|
|
79
|
+
begin
|
|
80
|
+
req_url = if respond_to?(:url)
|
|
81
|
+
url
|
|
82
|
+
else
|
|
83
|
+
(respond_to?(:base_url) ? base_url : nil)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Skip instrumentation for our own APM endpoint to prevent infinite loops,
|
|
87
|
+
# but do NOT alter the original method's return value/control flow.
|
|
88
|
+
skip_instrumentation = req_url && (req_url.include?("localhost:3100/apm/v1/metrics") || req_url.include?("deadbro.aberatii.com/apm/v1/metrics"))
|
|
89
|
+
|
|
90
|
+
unless skip_instrumentation
|
|
91
|
+
payload = {
|
|
92
|
+
library: "typhoeus",
|
|
93
|
+
method: (respond_to?(:options) && options[:method]) ? options[:method].to_s.upcase : nil,
|
|
94
|
+
url: req_url,
|
|
95
|
+
status: response && response.code,
|
|
96
|
+
duration_ms: duration_ms
|
|
97
|
+
}
|
|
98
|
+
# Accumulate per-request; only send with controller metric
|
|
99
|
+
if Thread.current[:dead_bro_http_events]
|
|
100
|
+
Thread.current[:dead_bro_http_events] << payload
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
rescue
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
::Typhoeus::Request.prepend(mod) unless ::Typhoeus::Request.ancestors.include?(mod)
|
|
110
|
+
rescue
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|