debug-agent 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +62 -5
- data/lib/debug_agent/inspectors/config_inspector.rb +137 -0
- data/lib/debug_agent/inspectors/endpoint_test.rb +284 -0
- data/lib/debug_agent/inspectors/feature_flags.rb +215 -0
- data/lib/debug_agent/inspectors/locks.rb +343 -0
- data/lib/debug_agent/inspectors/migration.rb +150 -0
- data/lib/debug_agent/inspectors/pool_inspector.rb +320 -0
- data/lib/debug_agent/version.rb +1 -1
- data/lib/debug_agent.rb +6 -0
- metadata +7 -1
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
|
|
3
|
+
module DebugAgent
|
|
4
|
+
# Register feature flags for inspection.
|
|
5
|
+
#
|
|
6
|
+
# DebugAgent.register_feature_flag(:new_ui, enabled: true, variant: 'v2')
|
|
7
|
+
@feature_flags = {}
|
|
8
|
+
|
|
9
|
+
class << self
|
|
10
|
+
attr_reader :feature_flags
|
|
11
|
+
|
|
12
|
+
def register_feature_flag(name, enabled:, variant: nil)
|
|
13
|
+
@feature_flags[name.to_s] = {
|
|
14
|
+
enabled: enabled,
|
|
15
|
+
variant: variant,
|
|
16
|
+
registered_at: Time.now.iso8601
|
|
17
|
+
}
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
class << self
|
|
22
|
+
private
|
|
23
|
+
|
|
24
|
+
def flipper_flags
|
|
25
|
+
return nil unless defined?(::Flipper)
|
|
26
|
+
begin
|
|
27
|
+
flipper = if ::Flipper.respond_to?(:instance)
|
|
28
|
+
::Flipper.instance
|
|
29
|
+
elsif defined?(::Flipper::DSL)
|
|
30
|
+
::Flipper
|
|
31
|
+
end
|
|
32
|
+
return nil unless flipper
|
|
33
|
+
|
|
34
|
+
flags = []
|
|
35
|
+
flipper.features.each do |feature|
|
|
36
|
+
state = feature.state
|
|
37
|
+
flags << {
|
|
38
|
+
name: feature.key,
|
|
39
|
+
enabled: state == :on,
|
|
40
|
+
variant: feature.enabled? ? 'enabled' : 'disabled',
|
|
41
|
+
source: 'flipper'
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
flags
|
|
45
|
+
rescue
|
|
46
|
+
nil
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def rollout_flags
|
|
51
|
+
return nil unless defined?(::Rollout)
|
|
52
|
+
begin
|
|
53
|
+
r = if defined?($rollout)
|
|
54
|
+
$rollout
|
|
55
|
+
elsif ::Rollout.respond_to?(:instance)
|
|
56
|
+
::Rollout.instance
|
|
57
|
+
end
|
|
58
|
+
return nil unless r
|
|
59
|
+
|
|
60
|
+
# Rollout stores features internally
|
|
61
|
+
flags = []
|
|
62
|
+
if r.respond_to?(:features)
|
|
63
|
+
r.features.each do |name|
|
|
64
|
+
flags << {
|
|
65
|
+
name: name.to_s,
|
|
66
|
+
enabled: r.active?(name),
|
|
67
|
+
variant: nil,
|
|
68
|
+
source: 'rollout'
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
flags
|
|
73
|
+
rescue
|
|
74
|
+
nil
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
register_tool('get_feature_flags',
|
|
80
|
+
'List all feature flags (registered + auto-detected Flipper/Rollout). ' \
|
|
81
|
+
'Shows name, enabled state, and variant') do
|
|
82
|
+
flags = []
|
|
83
|
+
|
|
84
|
+
# Registered flags
|
|
85
|
+
feature_flags.each do |name, data|
|
|
86
|
+
flags << {
|
|
87
|
+
name: name,
|
|
88
|
+
enabled: data[:enabled],
|
|
89
|
+
variant: data[:variant],
|
|
90
|
+
source: 'registered',
|
|
91
|
+
registered_at: data[:registered_at]
|
|
92
|
+
}
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Flipper detection
|
|
96
|
+
flipper = flipper_flags
|
|
97
|
+
if flipper
|
|
98
|
+
existing = flags.map { |f| f[:name] }
|
|
99
|
+
flipper.each do |f|
|
|
100
|
+
flags << f unless existing.include?(f[:name])
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Rollout detection
|
|
105
|
+
rollout = rollout_flags
|
|
106
|
+
if rollout
|
|
107
|
+
existing = flags.map { |f| f[:name] }
|
|
108
|
+
rollout.each do |f|
|
|
109
|
+
flags << f unless existing.include?(f[:name])
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
{
|
|
114
|
+
total_flags: flags.size,
|
|
115
|
+
enabled_count: flags.count { |f| f[:enabled] },
|
|
116
|
+
disabled_count: flags.count { |f| !f[:enabled] },
|
|
117
|
+
flags: flags,
|
|
118
|
+
detected_providers: {
|
|
119
|
+
flipper: defined?(::Flipper),
|
|
120
|
+
rollout: defined?(::Rollout)
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
rescue => e
|
|
124
|
+
{ error: e.message }
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
register_tool('evaluate_flag',
|
|
128
|
+
'Evaluate a feature flag for a given context/user. Returns enabled state ' \
|
|
129
|
+
'and variant for the specified flag',
|
|
130
|
+
flag_name: { type: 'string', description: 'Name of the feature flag to evaluate', required: true },
|
|
131
|
+
user_context: { type: 'string', description: 'User/context identifier for targeted flag evaluation (optional)', required: false }) do |flag_name:, user_context: nil|
|
|
132
|
+
# Check registered flags first
|
|
133
|
+
reg = feature_flags[flag_name.to_s]
|
|
134
|
+
if reg
|
|
135
|
+
result = {
|
|
136
|
+
flag_name: flag_name,
|
|
137
|
+
enabled: reg[:enabled],
|
|
138
|
+
variant: reg[:variant],
|
|
139
|
+
source: 'registered'
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
# Context-aware evaluation for Flipper/Rollout
|
|
143
|
+
if user_context && defined?(::Flipper)
|
|
144
|
+
begin
|
|
145
|
+
flipper = ::Flipper.instance rescue ::Flipper
|
|
146
|
+
feature = flipper[flag_name.to_sym]
|
|
147
|
+
result[:flipper_enabled_for_user] = feature.enabled?(user_context)
|
|
148
|
+
result[:flipper_source] = 'flipper'
|
|
149
|
+
rescue
|
|
150
|
+
nil
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
if user_context && defined?(::Rollout)
|
|
155
|
+
begin
|
|
156
|
+
r = $rollout || (::Rollout.instance rescue nil)
|
|
157
|
+
if r
|
|
158
|
+
result[:rollout_active_for_user] = r.active?(flag_name.to_sym, user_context)
|
|
159
|
+
result[:rollout_source] = 'rollout'
|
|
160
|
+
end
|
|
161
|
+
rescue
|
|
162
|
+
nil
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
next result
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Try Flipper
|
|
170
|
+
if defined?(::Flipper)
|
|
171
|
+
begin
|
|
172
|
+
flipper = ::Flipper.instance rescue ::Flipper
|
|
173
|
+
feature = flipper[flag_name.to_sym]
|
|
174
|
+
next {
|
|
175
|
+
flag_name: flag_name,
|
|
176
|
+
enabled: feature.enabled?,
|
|
177
|
+
variant: nil,
|
|
178
|
+
source: 'flipper',
|
|
179
|
+
user_context: user_context,
|
|
180
|
+
enabled_for_user: user_context ? feature.enabled?(user_context) : nil
|
|
181
|
+
}
|
|
182
|
+
rescue
|
|
183
|
+
nil
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Try Rollout
|
|
188
|
+
if defined?(::Rollout)
|
|
189
|
+
begin
|
|
190
|
+
r = $rollout || (::Rollout.instance rescue nil)
|
|
191
|
+
if r
|
|
192
|
+
next {
|
|
193
|
+
flag_name: flag_name,
|
|
194
|
+
enabled: r.active?(flag_name.to_sym),
|
|
195
|
+
variant: nil,
|
|
196
|
+
source: 'rollout',
|
|
197
|
+
user_context: user_context,
|
|
198
|
+
enabled_for_user: user_context ? r.active?(flag_name.to_sym, user_context) : nil
|
|
199
|
+
}
|
|
200
|
+
end
|
|
201
|
+
rescue
|
|
202
|
+
nil
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
{
|
|
207
|
+
flag_name: flag_name,
|
|
208
|
+
enabled: false,
|
|
209
|
+
source: 'not_found',
|
|
210
|
+
error: "Flag '#{flag_name}' not found in registered flags, Flipper, or Rollout"
|
|
211
|
+
}
|
|
212
|
+
rescue => e
|
|
213
|
+
{ error: e.message }
|
|
214
|
+
end
|
|
215
|
+
end
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
require 'thread'
|
|
2
|
+
require 'time'
|
|
3
|
+
|
|
4
|
+
module DebugAgent
|
|
5
|
+
# Track registered Mutexes for lock contention and deadlock analysis.
|
|
6
|
+
#
|
|
7
|
+
# DebugAgent.register_mutex(:order_lock, Mutex.new)
|
|
8
|
+
@registered_mutexes = {}
|
|
9
|
+
@lock_stats = {}
|
|
10
|
+
@lock_meta_lock = Mutex.new
|
|
11
|
+
|
|
12
|
+
SENSITIVE_LOCK_ERRORS = []
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
attr_reader :registered_mutexes, :lock_stats
|
|
16
|
+
|
|
17
|
+
def register_mutex(name, mutex)
|
|
18
|
+
mutex_name = name.to_s
|
|
19
|
+
@registered_mutexes[mutex_name] = mutex
|
|
20
|
+
|
|
21
|
+
@lock_meta_lock.synchronize do
|
|
22
|
+
@lock_stats[mutex_name] ||= {
|
|
23
|
+
acquire_count: 0,
|
|
24
|
+
contention_count: 0,
|
|
25
|
+
total_wait_ns: 0,
|
|
26
|
+
total_hold_ns: 0,
|
|
27
|
+
last_acquired_at: nil,
|
|
28
|
+
last_released_at: nil,
|
|
29
|
+
max_wait_ns: 0,
|
|
30
|
+
max_hold_ns: 0
|
|
31
|
+
}
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Wrap the mutex's synchronize method to track timing.
|
|
35
|
+
# This only affects this specific mutex instance.
|
|
36
|
+
wrap_mutex_for_tracking(mutex, mutex_name) if mutex.respond_to?(:synchronize)
|
|
37
|
+
|
|
38
|
+
mutex
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def record_lock_acquire(mutex_name, wait_ns)
|
|
42
|
+
@lock_meta_lock.synchronize do
|
|
43
|
+
s = @lock_stats[mutex_name]
|
|
44
|
+
return unless s
|
|
45
|
+
s[:acquire_count] += 1
|
|
46
|
+
s[:contention_count] += 1 if wait_ns > 1_000_000 # > 1ms is contention
|
|
47
|
+
s[:total_wait_ns] += wait_ns
|
|
48
|
+
s[:max_wait_ns] = wait_ns if wait_ns > s[:max_wait_ns]
|
|
49
|
+
s[:last_acquired_at] = Time.now.iso8601
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def record_lock_release(mutex_name, hold_ns)
|
|
54
|
+
@lock_meta_lock.synchronize do
|
|
55
|
+
s = @lock_stats[mutex_name]
|
|
56
|
+
return unless s
|
|
57
|
+
s[:total_hold_ns] += hold_ns
|
|
58
|
+
s[:max_hold_ns] = hold_ns if hold_ns > s[:max_hold_ns]
|
|
59
|
+
s[:last_released_at] = Time.now.iso8601
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def wrap_mutex_for_tracking(mutex, name)
|
|
66
|
+
return if mutex.singleton_class.method_defined?(:__debug_agent_tracked?)
|
|
67
|
+
|
|
68
|
+
original_sync = mutex.method(:synchronize)
|
|
69
|
+
|
|
70
|
+
mutex.define_singleton_method(:__debug_agent_tracked?) { true }
|
|
71
|
+
|
|
72
|
+
mutex.define_singleton_method(:synchronize) do |&block|
|
|
73
|
+
wait_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
74
|
+
acquired = false
|
|
75
|
+
begin
|
|
76
|
+
original_sync.call do
|
|
77
|
+
acquired_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
78
|
+
wait_ns = ((acquired_at - wait_start) * 1_000_000_000).to_i
|
|
79
|
+
DebugAgent.record_lock_acquire(name, wait_ns)
|
|
80
|
+
acquired = true
|
|
81
|
+
hold_start = acquired_at
|
|
82
|
+
begin
|
|
83
|
+
block.call
|
|
84
|
+
ensure
|
|
85
|
+
hold_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - hold_start) * 1_000_000_000).to_i
|
|
86
|
+
DebugAgent.record_lock_release(name, hold_ns)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
rescue => e
|
|
90
|
+
DebugAgent.record_lock_release(name, 0) if acquired
|
|
91
|
+
raise
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
register_tool('get_lock_contention',
|
|
98
|
+
'Analyze Mutex contention: list registered mutexes with lock/hold ' \
|
|
99
|
+
'timing, contention count, and threads currently blocked on mutex ' \
|
|
100
|
+
'operations') do
|
|
101
|
+
# Scan all threads for backtraces indicating mutex blocking
|
|
102
|
+
blocked_threads = []
|
|
103
|
+
Thread.list.each do |t|
|
|
104
|
+
next unless t.alive? && t.status == 'sleep'
|
|
105
|
+
bt = begin
|
|
106
|
+
t.backtrace || []
|
|
107
|
+
rescue
|
|
108
|
+
[]
|
|
109
|
+
end
|
|
110
|
+
next if bt.empty?
|
|
111
|
+
|
|
112
|
+
mutex_frame = bt.find { |line| line =~ /mutex|synchronize|lock|Monitor|ConditionVariable/i }
|
|
113
|
+
next unless mutex_frame
|
|
114
|
+
|
|
115
|
+
blocked_threads << {
|
|
116
|
+
thread_object_id: t.object_id,
|
|
117
|
+
thread_name: (t.name rescue nil),
|
|
118
|
+
status: t.status,
|
|
119
|
+
blocking_location: mutex_frame,
|
|
120
|
+
backtrace_top: bt.first(5)
|
|
121
|
+
}
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
if registered_mutexes.empty?
|
|
125
|
+
next {
|
|
126
|
+
registered_mutex_count: 0,
|
|
127
|
+
mutexes: [],
|
|
128
|
+
blocked_threads_count: blocked_threads.size,
|
|
129
|
+
blocked_threads: blocked_threads.first(50),
|
|
130
|
+
message: 'No mutexes registered. Call DebugAgent.register_mutex(:name, mutex) to track lock contention.'
|
|
131
|
+
}
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
mutexes = registered_mutexes.map do |name, mutex|
|
|
135
|
+
stats = lock_stats[name] || {}
|
|
136
|
+
{
|
|
137
|
+
name: name,
|
|
138
|
+
object_id: mutex.object_id,
|
|
139
|
+
class: mutex.class.name,
|
|
140
|
+
tracked: mutex.singleton_class.method_defined?(:__debug_agent_tracked?),
|
|
141
|
+
locked: (mutex.locked? if mutex.respond_to?(:locked?)),
|
|
142
|
+
acquire_count: stats[:acquire_count] || 0,
|
|
143
|
+
contention_count: stats[:contention_count] || 0,
|
|
144
|
+
total_wait_ms: ((stats[:total_wait_ns] || 0) / 1_000_000.0).round(2),
|
|
145
|
+
avg_wait_ms: stats[:acquire_count].to_i.positive? ?
|
|
146
|
+
((stats[:total_wait_ns] || 0) / stats[:acquire_count] / 1_000_000.0).round(2) : 0.0,
|
|
147
|
+
max_wait_ms: ((stats[:max_wait_ns] || 0) / 1_000_000.0).round(2),
|
|
148
|
+
total_hold_ms: ((stats[:total_hold_ns] || 0) / 1_000_000.0).round(2),
|
|
149
|
+
avg_hold_ms: stats[:acquire_count].to_i.positive? ?
|
|
150
|
+
((stats[:total_hold_ns] || 0) / stats[:acquire_count] / 1_000_000.0).round(2) : 0.0,
|
|
151
|
+
max_hold_ms: ((stats[:max_hold_ns] || 0) / 1_000_000.0).round(2),
|
|
152
|
+
last_acquired_at: stats[:last_acquired_at],
|
|
153
|
+
last_released_at: stats[:last_released_at]
|
|
154
|
+
}
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
{
|
|
158
|
+
registered_mutex_count: registered_mutexes.size,
|
|
159
|
+
mutexes: mutexes,
|
|
160
|
+
blocked_threads_count: blocked_threads.size,
|
|
161
|
+
blocked_threads: blocked_threads.first(50),
|
|
162
|
+
analysis: if blocked_threads.any?
|
|
163
|
+
'Threads detected blocking on mutexes — possible contention'
|
|
164
|
+
else
|
|
165
|
+
'No threads currently blocking on mutexes'
|
|
166
|
+
end
|
|
167
|
+
}
|
|
168
|
+
rescue => e
|
|
169
|
+
{ error: e.message }
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
register_tool('get_gvl_stats',
|
|
173
|
+
'Global VM Lock (GVL) stats for Ruby 3.2+: GVL wait time, thread switch ' \
|
|
174
|
+
'count, GC profiler data, and VM statistics') do
|
|
175
|
+
stats = {
|
|
176
|
+
ruby_version: RUBY_VERSION,
|
|
177
|
+
platform: RUBY_PLATFORM,
|
|
178
|
+
thread_count: Thread.list.size,
|
|
179
|
+
alive_threads: Thread.list.count(&:alive?),
|
|
180
|
+
sleeping_threads: Thread.list.count { |t| t.status == 'sleep' }
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
# RubyVM.stat (available in MRI Ruby, provides VM-level counters)
|
|
184
|
+
begin
|
|
185
|
+
if RubyVM.respond_to?(:stat)
|
|
186
|
+
vm_stat = RubyVM.stat
|
|
187
|
+
stats[:vm_stats] = vm_stat.select { |k, _|
|
|
188
|
+
%i[instruction_sequence_count constant_cache_count constant_cache_invalidations_count
|
|
189
|
+
global_method_state global_constant_count].include?(k)
|
|
190
|
+
}
|
|
191
|
+
stats[:vm_stat_total_keys] = vm_stat.size
|
|
192
|
+
end
|
|
193
|
+
rescue
|
|
194
|
+
nil
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# GC::Profiler data (must be explicitly enabled)
|
|
198
|
+
if defined?(GC::Profiler)
|
|
199
|
+
stats[:gc_profiler_enabled] = GC::Profiler.enabled?
|
|
200
|
+
if GC::Profiler.enabled?
|
|
201
|
+
begin
|
|
202
|
+
stats[:gc_total_time_ms] = (GC::Profiler.total_time * 1000).round(2)
|
|
203
|
+
rescue
|
|
204
|
+
nil
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# GC.stat — always available in MRI, useful GVL-adjacent metrics
|
|
210
|
+
if GC.respond_to?(:stat)
|
|
211
|
+
gc = GC.stat
|
|
212
|
+
stats[:gc_stat] = {
|
|
213
|
+
count: gc[:count],
|
|
214
|
+
major_gc_count: gc[:major_gc_count],
|
|
215
|
+
minor_gc_count: gc[:minor_gc_count],
|
|
216
|
+
total_allocated_objects: gc[:total_allocated_objects],
|
|
217
|
+
heap_live_slots: gc[:heap_live_slots],
|
|
218
|
+
heap_free_slots: gc[:heap_free_slots],
|
|
219
|
+
heap_allocated_pages: gc[:heap_allocated_pages],
|
|
220
|
+
heap_eden_pages: gc[:heap_eden_pages],
|
|
221
|
+
heap_tomb_pages: gc[:heap_tomb_pages]
|
|
222
|
+
}
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Process CPU time as GVL contention proxy
|
|
226
|
+
begin
|
|
227
|
+
stats[:process_cpu_time_ns] = Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
|
|
228
|
+
stats[:thread_cpu_time_ns] = Process.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID)
|
|
229
|
+
stats[:monotonic_ns] = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
230
|
+
rescue
|
|
231
|
+
nil
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Check for pending interrupts (Thread.pending_interrupt?)
|
|
235
|
+
pending = Thread.list.select do |t|
|
|
236
|
+
begin
|
|
237
|
+
t.pending_interrupt?
|
|
238
|
+
rescue
|
|
239
|
+
false
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
stats[:threads_with_pending_interrupts] = pending.size
|
|
243
|
+
stats[:pending_interrupt_threads] = pending.map do |t|
|
|
244
|
+
{ object_id: t.object_id, name: (t.name rescue nil), status: t.status }
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Thread backtrace depth as a rough GVL pressure indicator
|
|
248
|
+
total_bt_depth = Thread.list.sum do |t|
|
|
249
|
+
begin
|
|
250
|
+
(t.backtrace || []).size
|
|
251
|
+
rescue
|
|
252
|
+
0
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
stats[:total_backtrace_depth] = total_bt_depth
|
|
256
|
+
stats[:avg_backtrace_depth] = Thread.list.empty? ? 0 : (total_bt_depth / Thread.list.size)
|
|
257
|
+
|
|
258
|
+
stats
|
|
259
|
+
rescue => e
|
|
260
|
+
{ error: e.message }
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
register_tool('detect_deadlock',
|
|
264
|
+
'Detect deadlock among registered mutexes: check if threads are blocked ' \
|
|
265
|
+
'waiting on each other in a circular dependency. Scans thread backtraces ' \
|
|
266
|
+
'for mutex/Monitor blocking patterns') do
|
|
267
|
+
# Gather all threads that appear to be blocked on lock operations
|
|
268
|
+
thread_states = Thread.list.map do |t|
|
|
269
|
+
next nil unless t.alive?
|
|
270
|
+
bt = begin
|
|
271
|
+
t.backtrace || []
|
|
272
|
+
rescue
|
|
273
|
+
[]
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
blocking_on = bt.find { |line| line =~ /mutex|synchronize|lock|Monitor|ConditionVariable/i }
|
|
277
|
+
next nil unless blocking_on
|
|
278
|
+
|
|
279
|
+
# Identify which registered mutexes might be involved by matching object_ids
|
|
280
|
+
potential_mutexes = registered_mutexes.select do |_name, m|
|
|
281
|
+
blocking_on.to_s.include?(m.object_id.to_s)
|
|
282
|
+
end.keys
|
|
283
|
+
|
|
284
|
+
{
|
|
285
|
+
thread_object_id: t.object_id,
|
|
286
|
+
thread_name: (t.name rescue nil),
|
|
287
|
+
status: t.status,
|
|
288
|
+
blocking_on: blocking_on,
|
|
289
|
+
backtrace: bt.first(10),
|
|
290
|
+
potential_mutexes: potential_mutexes
|
|
291
|
+
}
|
|
292
|
+
end.compact
|
|
293
|
+
|
|
294
|
+
blocked_count = thread_states.size
|
|
295
|
+
|
|
296
|
+
# Build wait-for graph for cycle detection
|
|
297
|
+
# If two threads are both blocking on locks and potentially reference the
|
|
298
|
+
# same mutexes, flag as potential contention/deadlock
|
|
299
|
+
potential_cycles = []
|
|
300
|
+
thread_states.each_with_index do |t1, i|
|
|
301
|
+
thread_states.each_with_index do |t2, j|
|
|
302
|
+
next if i >= j
|
|
303
|
+
if t1[:potential_mutexes].any? && t2[:potential_mutexes].any?
|
|
304
|
+
overlap = (t1[:potential_mutexes] & t2[:potential_mutexes])
|
|
305
|
+
if overlap.any?
|
|
306
|
+
potential_cycles << {
|
|
307
|
+
thread_a: { id: t1[:thread_object_id], name: t1[:thread_name], blocking_on: t1[:blocking_on] },
|
|
308
|
+
thread_b: { id: t2[:thread_object_id], name: t2[:thread_name], blocking_on: t2[:blocking_on] },
|
|
309
|
+
shared_mutexes: overlap,
|
|
310
|
+
risk_level: 'high'
|
|
311
|
+
}
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Also check: is any registered mutex held with no active threads able to proceed?
|
|
318
|
+
stuck_mutexes = registered_mutexes.select do |_name, m|
|
|
319
|
+
m.respond_to?(:locked?) && m.locked?
|
|
320
|
+
end.map { |name, _| name }
|
|
321
|
+
|
|
322
|
+
deadlock_detected = potential_cycles.any?
|
|
323
|
+
|
|
324
|
+
{
|
|
325
|
+
deadlock_detected: deadlock_detected,
|
|
326
|
+
blocked_thread_count: blocked_count,
|
|
327
|
+
blocked_threads: thread_states.first(50),
|
|
328
|
+
potential_cycles: potential_cycles,
|
|
329
|
+
stuck_mutexes: stuck_mutexes,
|
|
330
|
+
registered_mutexes: registered_mutexes.keys,
|
|
331
|
+
recommendation: if deadlock_detected
|
|
332
|
+
'Deadlock risk detected. Review lock ordering — ensure consistent ' \
|
|
333
|
+
'acquisition order across all mutexes.'
|
|
334
|
+
elsif blocked_count > 0
|
|
335
|
+
"#{blocked_count} thread(s) blocking on locks but no circular dependency detected."
|
|
336
|
+
else
|
|
337
|
+
'No deadlock patterns detected.'
|
|
338
|
+
end
|
|
339
|
+
}
|
|
340
|
+
rescue => e
|
|
341
|
+
{ error: e.message }
|
|
342
|
+
end
|
|
343
|
+
end
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
|
|
3
|
+
module DebugAgent
|
|
4
|
+
# Track database migration status.
|
|
5
|
+
#
|
|
6
|
+
# Auto-detects ActiveRecord::SchemaMigration if loaded.
|
|
7
|
+
# Custom providers can be registered:
|
|
8
|
+
#
|
|
9
|
+
# DebugAgent.register_migration_provider -> {
|
|
10
|
+
# {
|
|
11
|
+
# current_version: 3,
|
|
12
|
+
# pending: [{ version: 4, name: 'add_index' }],
|
|
13
|
+
# applied: [{ version: 1, name: 'create_users', applied_at: '...' }, ...]
|
|
14
|
+
# }
|
|
15
|
+
# }
|
|
16
|
+
@migration_provider = nil
|
|
17
|
+
|
|
18
|
+
class << self
|
|
19
|
+
attr_accessor :migration_provider
|
|
20
|
+
|
|
21
|
+
def register_migration_provider(fn)
|
|
22
|
+
@migration_provider = fn
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def migration_data
|
|
26
|
+
# Custom provider takes precedence
|
|
27
|
+
if @migration_provider
|
|
28
|
+
result = @migration_provider.call
|
|
29
|
+
return result if result.is_a?(Hash)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# ActiveRecord auto-detection
|
|
33
|
+
if defined?(::ActiveRecord::SchemaMigration)
|
|
34
|
+
return ar_migration_data
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def ar_migration_data
|
|
43
|
+
begin
|
|
44
|
+
sm = ::ActiveRecord::SchemaMigration
|
|
45
|
+
current = ::ActiveRecord::Migrator.current_version rescue sm.all_versions.map(&:to_i).max || 0
|
|
46
|
+
|
|
47
|
+
# Get applied migrations
|
|
48
|
+
applied = sm.all_versions.map(&:to_i).sort
|
|
49
|
+
|
|
50
|
+
# Get pending from migration files
|
|
51
|
+
pending = []
|
|
52
|
+
if defined?(::ActiveRecord::Migration)
|
|
53
|
+
migrations_dir = 'db/migrate/'
|
|
54
|
+
if Dir.exist?(migrations_dir)
|
|
55
|
+
file_versions = Dir.glob("#{migrations_dir}/*.rb").map do |f|
|
|
56
|
+
File.basename(f).split('_').first.to_i
|
|
57
|
+
end
|
|
58
|
+
pending = (file_versions - applied).map do |v|
|
|
59
|
+
{ version: v, name: 'pending' }
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
{
|
|
65
|
+
current_version: current,
|
|
66
|
+
applied: applied.map { |v| { version: v } },
|
|
67
|
+
pending: pending
|
|
68
|
+
}
|
|
69
|
+
rescue => e
|
|
70
|
+
{ error: "ActiveRecord migration query failed: #{e.message}" }
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
register_tool('get_migration_status',
|
|
76
|
+
'Current database schema migration status: current version, total applied, ' \
|
|
77
|
+
'total pending. Auto-detects ActiveRecord or uses registered provider') do
|
|
78
|
+
data = migration_data
|
|
79
|
+
|
|
80
|
+
if data.nil?
|
|
81
|
+
next {
|
|
82
|
+
error: 'No migration data available. Either load ActiveRecord or register a provider ' \
|
|
83
|
+
'with DebugAgent.register_migration_provider(-> { ... })'
|
|
84
|
+
}
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
next data if data[:error]
|
|
88
|
+
|
|
89
|
+
{
|
|
90
|
+
current_version: data[:current_version],
|
|
91
|
+
applied_count: (data[:applied] || []).size,
|
|
92
|
+
pending_count: (data[:pending] || []).size,
|
|
93
|
+
source: if migration_provider
|
|
94
|
+
'custom_provider'
|
|
95
|
+
elsif defined?(::ActiveRecord::SchemaMigration)
|
|
96
|
+
'active_record'
|
|
97
|
+
else
|
|
98
|
+
'unknown'
|
|
99
|
+
end
|
|
100
|
+
}
|
|
101
|
+
rescue => e
|
|
102
|
+
{ error: e.message }
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
register_tool('get_pending_migrations',
|
|
106
|
+
'List unapplied/pending database migrations that have not yet been run') do
|
|
107
|
+
data = migration_data
|
|
108
|
+
|
|
109
|
+
if data.nil?
|
|
110
|
+
next {
|
|
111
|
+
error: 'No migration data available. Either load ActiveRecord or register a provider.'
|
|
112
|
+
}
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
next data if data[:error]
|
|
116
|
+
|
|
117
|
+
pending = data[:pending] || []
|
|
118
|
+
{
|
|
119
|
+
pending_count: pending.size,
|
|
120
|
+
migrations: pending,
|
|
121
|
+
recommendation: pending.any? ?
|
|
122
|
+
'Run pending migrations before deploying.' :
|
|
123
|
+
'All migrations are up to date.'
|
|
124
|
+
}
|
|
125
|
+
rescue => e
|
|
126
|
+
{ error: e.message }
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
register_tool('get_migration_history',
|
|
130
|
+
'Applied migration history log: versions and timestamps of all applied migrations') do
|
|
131
|
+
data = migration_data
|
|
132
|
+
|
|
133
|
+
if data.nil?
|
|
134
|
+
next {
|
|
135
|
+
error: 'No migration data available. Either load ActiveRecord or register a provider.'
|
|
136
|
+
}
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
next data if data[:error]
|
|
140
|
+
|
|
141
|
+
applied = data[:applied] || []
|
|
142
|
+
{
|
|
143
|
+
total_applied: applied.size,
|
|
144
|
+
latest_version: applied.any? ? applied.last[:version] : 0,
|
|
145
|
+
migrations: applied
|
|
146
|
+
}
|
|
147
|
+
rescue => e
|
|
148
|
+
{ error: e.message }
|
|
149
|
+
end
|
|
150
|
+
end
|