debug-agent 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +46 -5
- data/lib/debug_agent/inspectors/active_record_stats.rb +131 -0
- data/lib/debug_agent/inspectors/cache.rb +194 -0
- data/lib/debug_agent/inspectors/concurrent.rb +78 -0
- data/lib/debug_agent/inspectors/error_tracking.rb +120 -0
- data/lib/debug_agent/inspectors/faraday.rb +79 -0
- data/lib/debug_agent/inspectors/gc.rb +53 -0
- data/lib/debug_agent/inspectors/health.rb +110 -0
- data/lib/debug_agent/inspectors/http_client.rb +145 -0
- data/lib/debug_agent/inspectors/logging.rb +163 -0
- data/lib/debug_agent/inspectors/metrics.rb +71 -0
- data/lib/debug_agent/inspectors/scheduler.rb +154 -0
- data/lib/debug_agent/inspectors/security.rb +201 -0
- data/lib/debug_agent/inspectors/websocket.rb +188 -0
- data/lib/debug_agent/llm_client.rb +56 -56
- data/lib/debug_agent/version.rb +1 -1
- data/lib/debug_agent.rb +12 -0
- metadata +13 -1
|
@@ -62,6 +62,59 @@ module DebugAgent
|
|
|
62
62
|
{ error: e.message }
|
|
63
63
|
end
|
|
64
64
|
|
|
65
|
+
register_tool('get_gc_profiler_detail',
|
|
66
|
+
'Get GC::Profiler raw data with computed stats: total time, count, ' \
|
|
67
|
+
'min/avg/max GC time, total mark and sweep time, per-GC entries') do
|
|
68
|
+
unless defined?(GC::Profiler)
|
|
69
|
+
next { enabled: false, message: 'GC::Profiler is not available on this Ruby implementation' }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
raw_data = GC::Profiler.raw_data
|
|
73
|
+
total_time = GC::Profiler.total_time
|
|
74
|
+
|
|
75
|
+
if raw_data.nil? || raw_data.empty?
|
|
76
|
+
next {
|
|
77
|
+
enabled: true,
|
|
78
|
+
total_gc_time_seconds: 0,
|
|
79
|
+
gc_count: 0,
|
|
80
|
+
message: 'GC::Profiler has no data. Call GC::Profiler.enable to start collecting.'
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
gc_times = raw_data.map { |e| e[:GC_TIME].to_f }
|
|
85
|
+
mark_times = raw_data.map { |e| e[:GC_MARK_TIME].to_f }
|
|
86
|
+
sweep_times = raw_data.map { |e| e[:GC_SWEEP_TIME].to_f }
|
|
87
|
+
avg = gc_times.sum / gc_times.size
|
|
88
|
+
|
|
89
|
+
{
|
|
90
|
+
enabled: true,
|
|
91
|
+
total_gc_time_seconds: total_time.round(6),
|
|
92
|
+
gc_count: raw_data.size,
|
|
93
|
+
gc_time_stats_ms: {
|
|
94
|
+
min: (gc_times.min * 1000).round(3),
|
|
95
|
+
avg: (avg * 1000).round(3),
|
|
96
|
+
max: (gc_times.max * 1000).round(3)
|
|
97
|
+
},
|
|
98
|
+
total_mark_time_seconds: mark_times.sum.round(6),
|
|
99
|
+
total_sweep_time_seconds: sweep_times.sum.round(6),
|
|
100
|
+
entries: raw_data.map.with_index do |entry, i|
|
|
101
|
+
{
|
|
102
|
+
index: i,
|
|
103
|
+
gc_time_ms: (entry[:GC_TIME].to_f * 1000).round(3),
|
|
104
|
+
gc_invoke_time: entry[:GC_INVOKE_TIME]&.round(6),
|
|
105
|
+
heap_use_pages: entry[:HEAP_USE_PAGES],
|
|
106
|
+
heap_live_objects: entry[:HEAP_LIVE_OBJECTS],
|
|
107
|
+
heap_free_objects: entry[:HEAP_FREE_OBJECTS],
|
|
108
|
+
heap_total_objects: entry[:HEAP_TOTAL_OBJECTS],
|
|
109
|
+
gc_mark_time_ms: (entry[:GC_MARK_TIME].to_f * 1000).round(3),
|
|
110
|
+
gc_sweep_time_ms: (entry[:GC_SWEEP_TIME].to_f * 1000).round(3)
|
|
111
|
+
}
|
|
112
|
+
end
|
|
113
|
+
}
|
|
114
|
+
rescue => e
|
|
115
|
+
{ error: e.message }
|
|
116
|
+
end
|
|
117
|
+
|
|
65
118
|
register_tool('force_gc',
|
|
66
119
|
'Trigger a full garbage collection (GC.start with full_mark) and show before/after comparison') do
|
|
67
120
|
before_stats = GC.stat
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
module DebugAgent
|
|
2
|
+
# Registry of health check blocks. Each block returns a hash with at least
|
|
3
|
+
# a :status key ('UP', 'DOWN', or 'DEGRADED').
|
|
4
|
+
#
|
|
5
|
+
# DebugAgent.register_health_check(:database) { { status: 'UP' } }
|
|
6
|
+
@health_checks = {}
|
|
7
|
+
|
|
8
|
+
class << self
|
|
9
|
+
attr_reader :health_checks
|
|
10
|
+
|
|
11
|
+
def register_health_check(name, &block)
|
|
12
|
+
@health_checks[name.to_s] = block
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
register_tool('get_health_status',
|
|
17
|
+
'Run all registered health checks and report status per component: UP, DOWN, or DEGRADED') do
|
|
18
|
+
if health_checks.empty?
|
|
19
|
+
next {
|
|
20
|
+
message: 'No health checks registered. Call DebugAgent.register_health_check(:name) { ... }.',
|
|
21
|
+
overall_status: 'UNKNOWN'
|
|
22
|
+
}
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
results = {}
|
|
26
|
+
up = 0
|
|
27
|
+
down = 0
|
|
28
|
+
degraded = 0
|
|
29
|
+
|
|
30
|
+
health_checks.each do |check_name, block|
|
|
31
|
+
begin
|
|
32
|
+
result = block.call
|
|
33
|
+
status = result.is_a?(Hash) ? (result[:status] || result['status'] || 'UP').to_s.upcase : 'UP'
|
|
34
|
+
results[check_name] = result.merge(status: status, latency_ms: nil)
|
|
35
|
+
|
|
36
|
+
case status
|
|
37
|
+
when 'UP' then up += 1
|
|
38
|
+
when 'DOWN' then down += 1
|
|
39
|
+
when 'DEGRADED' then degraded += 1
|
|
40
|
+
end
|
|
41
|
+
rescue => e
|
|
42
|
+
results[check_name] = { status: 'DOWN', error: e.message }
|
|
43
|
+
down += 1
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
overall = if down > 0
|
|
48
|
+
'DOWN'
|
|
49
|
+
elsif degraded > 0
|
|
50
|
+
'DEGRADED'
|
|
51
|
+
else
|
|
52
|
+
'UP'
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
{
|
|
56
|
+
overall_status: overall,
|
|
57
|
+
up: up,
|
|
58
|
+
down: down,
|
|
59
|
+
degraded: degraded,
|
|
60
|
+
total: health_checks.size,
|
|
61
|
+
components: results
|
|
62
|
+
}
|
|
63
|
+
rescue => e
|
|
64
|
+
{ error: e.message }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
register_tool('get_health_detail',
|
|
68
|
+
'Deep dive into a specific health check component for detailed diagnostics',
|
|
69
|
+
component_name: { type: 'string', description: 'Name of the health check component to inspect', required: true }) do |component_name:|
|
|
70
|
+
if health_checks.empty?
|
|
71
|
+
next { error: 'No health checks registered.' }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
key = component_name.to_s
|
|
75
|
+
block = health_checks[key]
|
|
76
|
+
next { error: "No health check registered for '#{component_name}'. Available: #{health_checks.keys.join(', ')}" } unless block
|
|
77
|
+
|
|
78
|
+
# Run the check multiple times to measure latency
|
|
79
|
+
samples = []
|
|
80
|
+
3.times do
|
|
81
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
82
|
+
begin
|
|
83
|
+
result = block.call
|
|
84
|
+
elapsed = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000.0).round(2)
|
|
85
|
+
status = result.is_a?(Hash) ? (result[:status] || result['status'] || 'UP').to_s.upcase : 'UP'
|
|
86
|
+
samples << { status: status, latency_ms: elapsed, detail: result }
|
|
87
|
+
rescue => e
|
|
88
|
+
elapsed = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000.0).round(2)
|
|
89
|
+
samples << { status: 'DOWN', latency_ms: elapsed, error: e.message }
|
|
90
|
+
break
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
latencies = samples.map { |s| s[:latency_ms] }
|
|
95
|
+
|
|
96
|
+
{
|
|
97
|
+
component: key,
|
|
98
|
+
registered_checks: health_checks.keys,
|
|
99
|
+
latest: samples.last,
|
|
100
|
+
samples: samples.size,
|
|
101
|
+
latency_ms: {
|
|
102
|
+
min: latencies.min,
|
|
103
|
+
avg: (latencies.sum / latencies.size.to_f).round(2),
|
|
104
|
+
max: latencies.max
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
rescue => e
|
|
108
|
+
{ error: e.message }
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
require 'thread'
|
|
3
|
+
|
|
4
|
+
module DebugAgent
|
|
5
|
+
# Track outbound Net::HTTP calls (latency, errors, hosts) and live
|
|
6
|
+
# connections by wrapping Net::HTTP#request, #start and #finish.
|
|
7
|
+
@outbound_stats = { total: 0, latencies: [], errors: 0, hosts: {} }
|
|
8
|
+
@outbound_lock = Mutex.new
|
|
9
|
+
@http_connections = {}
|
|
10
|
+
|
|
11
|
+
class << self
|
|
12
|
+
attr_reader :outbound_stats
|
|
13
|
+
|
|
14
|
+
def record_outbound(http, req, latency_ms, error)
|
|
15
|
+
@outbound_lock.synchronize do
|
|
16
|
+
s = @outbound_stats
|
|
17
|
+
s[:total] += 1
|
|
18
|
+
s[:latencies] << latency_ms
|
|
19
|
+
s[:latencies].shift if s[:latencies].size > 1000
|
|
20
|
+
|
|
21
|
+
host_key = "#{http.address}:#{http.port}"
|
|
22
|
+
h = (s[:hosts][host_key] ||= { count: 0, latencies: [], errors: 0 })
|
|
23
|
+
h[:count] += 1
|
|
24
|
+
h[:latencies] << latency_ms
|
|
25
|
+
h[:latencies].shift if h[:latencies].size > 200
|
|
26
|
+
if error
|
|
27
|
+
s[:errors] += 1
|
|
28
|
+
h[:errors] += 1
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def track_http_connect(http)
|
|
34
|
+
@outbound_lock.synchronize do
|
|
35
|
+
@http_connections[http.object_id] = {
|
|
36
|
+
host: http.address,
|
|
37
|
+
port: http.port,
|
|
38
|
+
use_ssl: http.use_ssl?,
|
|
39
|
+
started_at: Time.now.iso8601,
|
|
40
|
+
active: true
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def track_http_disconnect(http)
|
|
46
|
+
@outbound_lock.synchronize do
|
|
47
|
+
conn = @http_connections[http.object_id]
|
|
48
|
+
conn[:active] = false if conn
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Wrap Net::HTTP once to capture outbound request metrics.
|
|
53
|
+
def install_outbound_tracker
|
|
54
|
+
return false unless defined?(::Net::HTTP)
|
|
55
|
+
return true if ::Net::HTTP.include?(OutboundHttpTracker)
|
|
56
|
+
|
|
57
|
+
::Net::HTTP.prepend(OutboundHttpTracker)
|
|
58
|
+
true
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Prepended module that instruments Net::HTTP request lifecycle.
|
|
63
|
+
module OutboundHttpTracker
|
|
64
|
+
def start
|
|
65
|
+
DebugAgent.track_http_connect(self) rescue nil
|
|
66
|
+
super
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def finish
|
|
70
|
+
DebugAgent.track_http_disconnect(self) rescue nil
|
|
71
|
+
super
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def request(req, *args, &block)
|
|
75
|
+
started = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
76
|
+
begin
|
|
77
|
+
result = super
|
|
78
|
+
elapsed = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started) * 1000.0)
|
|
79
|
+
DebugAgent.record_outbound(self, req, elapsed, nil) rescue nil
|
|
80
|
+
result
|
|
81
|
+
rescue => e
|
|
82
|
+
elapsed = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started) * 1000.0)
|
|
83
|
+
DebugAgent.record_outbound(self, req, elapsed, e) rescue nil
|
|
84
|
+
raise
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Auto-install the tracker at load time when Net::HTTP is available.
|
|
90
|
+
install_outbound_tracker
|
|
91
|
+
|
|
92
|
+
register_tool('get_http_connections',
|
|
93
|
+
'List Net::HTTP connections and their state: host, port, use_ssl, ' \
|
|
94
|
+
'start_time, active connections') do
|
|
95
|
+
conns = @outbound_lock.synchronize { @http_connections.values }
|
|
96
|
+
active = conns.select { |c| c[:active] }
|
|
97
|
+
{
|
|
98
|
+
active_count: active.size,
|
|
99
|
+
total_tracked: conns.size,
|
|
100
|
+
tracker_active: defined?(::Net::HTTP) && ::Net::HTTP.include?(OutboundHttpTracker),
|
|
101
|
+
connections: conns.last(200)
|
|
102
|
+
}
|
|
103
|
+
rescue => e
|
|
104
|
+
{ error: e.message }
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
register_tool('get_outbound_summary',
|
|
108
|
+
'Summary of outbound HTTP calls tracked by the agent: total, avg latency, ' \
|
|
109
|
+
'error rate, top hosts') do
|
|
110
|
+
snapshot = @outbound_lock.synchronize do
|
|
111
|
+
{
|
|
112
|
+
total: @outbound_stats[:total],
|
|
113
|
+
latencies: @outbound_stats[:latencies].dup,
|
|
114
|
+
errors: @outbound_stats[:errors],
|
|
115
|
+
hosts: @outbound_stats[:hosts].transform_values(&:dup)
|
|
116
|
+
}
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
lats = snapshot[:latencies]
|
|
120
|
+
avg = lats.empty? ? 0.0 : (lats.sum / lats.size)
|
|
121
|
+
total = snapshot[:total]
|
|
122
|
+
|
|
123
|
+
top_hosts = snapshot[:hosts].map do |host, info|
|
|
124
|
+
hl = info[:latencies]
|
|
125
|
+
{
|
|
126
|
+
host: host,
|
|
127
|
+
count: info[:count],
|
|
128
|
+
avg_latency_ms: hl.empty? ? 0 : (hl.sum / hl.size).round(2),
|
|
129
|
+
errors: info[:errors]
|
|
130
|
+
}
|
|
131
|
+
end.sort_by { |h| -h[:count] }.first(10)
|
|
132
|
+
|
|
133
|
+
{
|
|
134
|
+
total_requests: total,
|
|
135
|
+
avg_latency_ms: avg.round(2),
|
|
136
|
+
error_count: snapshot[:errors],
|
|
137
|
+
error_rate: total.zero? ? '0.0%' : format('%.1f%%', snapshot[:errors].to_f / total * 100),
|
|
138
|
+
tracked_hosts: snapshot[:hosts].size,
|
|
139
|
+
tracker_active: defined?(::Net::HTTP) && ::Net::HTTP.include?(OutboundHttpTracker),
|
|
140
|
+
top_hosts: top_hosts
|
|
141
|
+
}
|
|
142
|
+
rescue => e
|
|
143
|
+
{ error: e.message }
|
|
144
|
+
end
|
|
145
|
+
end
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
require 'thread'
|
|
3
|
+
require 'logger'
|
|
4
|
+
|
|
5
|
+
module DebugAgent
|
|
6
|
+
# Ring buffer of recent log entries and a registry of named loggers.
|
|
7
|
+
#
|
|
8
|
+
# DebugAgent.register_logger(:app, Rails.logger)
|
|
9
|
+
MAX_LOGS = 100
|
|
10
|
+
|
|
11
|
+
@log_buffer = []
|
|
12
|
+
@log_buffer_lock = Mutex.new
|
|
13
|
+
@loggers = {}
|
|
14
|
+
|
|
15
|
+
class << self
|
|
16
|
+
attr_reader :loggers
|
|
17
|
+
|
|
18
|
+
def register_logger(name, logger)
|
|
19
|
+
@loggers[name.to_s] = logger
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Invoked by the wrapped Logger#add to push an entry into the ring buffer.
|
|
23
|
+
def capture_log(severity, args)
|
|
24
|
+
args = args.is_a?(Array) ? args : [args]
|
|
25
|
+
# Logger passes (message, progname); pick the meaningful value.
|
|
26
|
+
msg = args.compact.first
|
|
27
|
+
entry = {
|
|
28
|
+
timestamp: Time.now.iso8601,
|
|
29
|
+
severity: severity_label(severity),
|
|
30
|
+
message: msg.respond_to?(:to_str) ? msg.to_s : msg.inspect
|
|
31
|
+
}
|
|
32
|
+
@log_buffer_lock.synchronize do
|
|
33
|
+
@log_buffer << entry
|
|
34
|
+
@log_buffer.shift if @log_buffer.size > MAX_LOGS
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Wrap the standard Logger#add / << so all log output flows into the ring
|
|
39
|
+
# buffer. Only wraps once — guarded by checking for the aliased method.
|
|
40
|
+
def install_log_capture
|
|
41
|
+
return false unless defined?(::Logger)
|
|
42
|
+
return true if ::Logger.method_defined?(:_original_add)
|
|
43
|
+
|
|
44
|
+
::Logger.class_eval do
|
|
45
|
+
alias_method :_original_add, :add
|
|
46
|
+
alias_method :_original_lshift, :<<
|
|
47
|
+
|
|
48
|
+
def add(severity, *args, &block)
|
|
49
|
+
if block
|
|
50
|
+
msg = args[0]
|
|
51
|
+
msg = block.call if msg.nil?
|
|
52
|
+
DebugAgent.capture_log(severity, [msg]) rescue nil
|
|
53
|
+
_original_add(severity, msg, *args[1..-1])
|
|
54
|
+
else
|
|
55
|
+
DebugAgent.capture_log(severity, args) rescue nil
|
|
56
|
+
_original_add(severity, *args)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def <<(msg)
|
|
61
|
+
DebugAgent.capture_log(nil, [msg]) rescue nil
|
|
62
|
+
_original_lshift(msg)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
true
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Map a Logger severity integer to a human-readable label.
|
|
69
|
+
def severity_label(severity)
|
|
70
|
+
labels = %w[DEBUG INFO WARN ERROR FATAL ANY]
|
|
71
|
+
idx = severity.is_a?(Integer) ? severity : (defined?(::Logger) ? ::Logger::UNKNOWN : 5)
|
|
72
|
+
labels[idx] || 'UNKNOWN'
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Attempt to wrap Logger at load time (no-op if Logger isn't loaded yet).
|
|
77
|
+
install_log_capture
|
|
78
|
+
|
|
79
|
+
LEVEL_MAP = {
|
|
80
|
+
'debug' => defined?(::Logger) ? ::Logger::DEBUG : 0,
|
|
81
|
+
'info' => defined?(::Logger) ? ::Logger::INFO : 1,
|
|
82
|
+
'warn' => defined?(::Logger) ? ::Logger::WARN : 2,
|
|
83
|
+
'error' => defined?(::Logger) ? ::Logger::ERROR : 3,
|
|
84
|
+
'fatal' => defined?(::Logger) ? ::Logger::FATAL : 4
|
|
85
|
+
}.freeze
|
|
86
|
+
|
|
87
|
+
register_tool('get_log_buffer',
|
|
88
|
+
'Return recent log entries captured from the built-in ring buffer ' \
|
|
89
|
+
'(Logger#add and << are auto-wrapped)') do |limit: 50|
|
|
90
|
+
limit = limit.to_i
|
|
91
|
+
limit = 50 if limit <= 0
|
|
92
|
+
entries = @log_buffer_lock.synchronize { @log_buffer.dup }
|
|
93
|
+
{
|
|
94
|
+
total_captured: entries.size,
|
|
95
|
+
capacity: MAX_LOGS,
|
|
96
|
+
capture_active: defined?(::Logger) && ::Logger.method_defined?(:_original_add),
|
|
97
|
+
entries: entries.last(limit).reverse
|
|
98
|
+
}
|
|
99
|
+
rescue => e
|
|
100
|
+
{ error: e.message }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
register_tool('get_logger_info',
|
|
104
|
+
'List registered loggers with configuration: level, device, formatter, progname') do
|
|
105
|
+
if loggers.empty?
|
|
106
|
+
next {
|
|
107
|
+
message: 'No loggers registered. Call DebugAgent.register_logger(:name, logger).',
|
|
108
|
+
capture_active: defined?(::Logger) && ::Logger.method_defined?(:_original_add)
|
|
109
|
+
}
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
list = loggers.map do |name, logger|
|
|
113
|
+
info = { name: name, class: logger.class.name }
|
|
114
|
+
info[:level] = severity_label(logger.level) if logger.respond_to?(:level)
|
|
115
|
+
info[:progname] = logger.progname if logger.respond_to?(:progname)
|
|
116
|
+
|
|
117
|
+
if defined?(::Logger) && logger.is_a?(::Logger)
|
|
118
|
+
logdev = logger.instance_variable_get(:@logdev)
|
|
119
|
+
dev = logdev&.instance_variable_get(:@dev)
|
|
120
|
+
info[:device] =
|
|
121
|
+
case dev
|
|
122
|
+
when IO then dev.inspect
|
|
123
|
+
when String then dev
|
|
124
|
+
when nil then nil
|
|
125
|
+
else dev.inspect
|
|
126
|
+
end
|
|
127
|
+
formatter = logger.instance_variable_get(:@formatter)
|
|
128
|
+
info[:formatter] = formatter ? formatter.class.name : 'default'
|
|
129
|
+
end
|
|
130
|
+
info
|
|
131
|
+
rescue => e
|
|
132
|
+
{ name: name, error: e.message }
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
{ loggers: list }
|
|
136
|
+
rescue => e
|
|
137
|
+
{ error: e.message }
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
register_tool('set_log_level',
|
|
141
|
+
"Dynamically change a registered logger's level",
|
|
142
|
+
logger_name: { type: 'string', description: 'Registered logger name' },
|
|
143
|
+
level: { type: 'string', description: 'One of: debug, info, warn, error, fatal' }) do |logger_name:, level:|
|
|
144
|
+
logger = loggers[logger_name.to_s]
|
|
145
|
+
next({ error: "No logger registered under '#{logger_name}'" }) unless logger
|
|
146
|
+
next({ error: 'Logger does not respond to level=' }) unless logger.respond_to?(:level=)
|
|
147
|
+
|
|
148
|
+
target = LEVEL_MAP[level.to_s.downcase]
|
|
149
|
+
next({ error: "Invalid level '#{level}'. Use debug/info/warn/error/fatal." }) unless target
|
|
150
|
+
|
|
151
|
+
previous = severity_label(logger.level)
|
|
152
|
+
logger.level = target
|
|
153
|
+
|
|
154
|
+
{
|
|
155
|
+
logger: logger_name,
|
|
156
|
+
previous_level: previous,
|
|
157
|
+
new_level: level.to_s.downcase,
|
|
158
|
+
success: true
|
|
159
|
+
}
|
|
160
|
+
rescue => e
|
|
161
|
+
{ error: e.message }
|
|
162
|
+
end
|
|
163
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
module DebugAgent
|
|
2
|
+
# Inspector for Prometheus metrics (prometheus-client gem).
|
|
3
|
+
# Uses the default registry: Prometheus::Client.registry.
|
|
4
|
+
|
|
5
|
+
class << self
|
|
6
|
+
# Resolve the Prometheus registry to inspect.
|
|
7
|
+
def prometheus_registry
|
|
8
|
+
return nil unless defined?(::Prometheus) && defined?(::Prometheus::Client)
|
|
9
|
+
::Prometheus::Client.respond_to?(:registry) ? ::Prometheus::Client.registry : nil
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Safely read a metric's value(s). Different metric types return
|
|
13
|
+
# different shapes from #get.
|
|
14
|
+
def prometheus_metric_value(metric)
|
|
15
|
+
begin
|
|
16
|
+
value = metric.get({})
|
|
17
|
+
# Counter/Gauge return a Hash of {labels => value}; unwrap the unlabeled value.
|
|
18
|
+
if value.is_a?(Hash) && value.size == 1 && value.key?({})
|
|
19
|
+
value[{}]
|
|
20
|
+
else
|
|
21
|
+
value
|
|
22
|
+
end
|
|
23
|
+
rescue => e
|
|
24
|
+
{ error: e.message }
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
register_tool('get_registered_metrics',
|
|
30
|
+
'List registered Prometheus metrics from the prometheus-client gem: ' \
|
|
31
|
+
'name, type, docstring, value') do
|
|
32
|
+
registry = prometheus_registry
|
|
33
|
+
next { error: 'Prometheus client is not loaded (prometheus-client gem not installed)' } unless registry
|
|
34
|
+
next { error: 'No Prometheus registry available' } unless registry.respond_to?(:metrics)
|
|
35
|
+
|
|
36
|
+
metrics = registry.metrics.map do |metric|
|
|
37
|
+
{
|
|
38
|
+
name: metric.name,
|
|
39
|
+
type: metric.respond_to?(:type) ? metric.type.to_s : 'unknown',
|
|
40
|
+
docstring: metric.respond_to?(:docstring) ? metric.docstring : nil,
|
|
41
|
+
value: prometheus_metric_value(metric)
|
|
42
|
+
}
|
|
43
|
+
rescue => e
|
|
44
|
+
{ name: metric&.respond_to?(:name) ? metric.name : 'unknown', error: e.message }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
{ total: metrics.size, metrics: metrics }
|
|
48
|
+
rescue => e
|
|
49
|
+
{ error: e.message }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
register_tool('get_metric_value',
|
|
53
|
+
'Get a specific Prometheus metric value by name',
|
|
54
|
+
name: { type: 'string', description: 'Registered metric name' }) do |name:|
|
|
55
|
+
registry = prometheus_registry
|
|
56
|
+
next { error: 'Prometheus client is not loaded (prometheus-client gem not installed)' } unless registry
|
|
57
|
+
next { error: 'No Prometheus registry available' } unless registry.respond_to?(:metrics)
|
|
58
|
+
|
|
59
|
+
metric = registry.metrics.find { |m| m.respond_to?(:name) && m.name.to_s == name.to_s }
|
|
60
|
+
next { error: "Metric '#{name}' not found in registry" } unless metric
|
|
61
|
+
|
|
62
|
+
{
|
|
63
|
+
name: metric.name,
|
|
64
|
+
type: metric.respond_to?(:type) ? metric.type.to_s : 'unknown',
|
|
65
|
+
docstring: metric.respond_to?(:docstring) ? metric.docstring : nil,
|
|
66
|
+
value: prometheus_metric_value(metric)
|
|
67
|
+
}
|
|
68
|
+
rescue => e
|
|
69
|
+
{ error: e.message }
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
module DebugAgent
|
|
2
|
+
# Registry of scheduled jobs (Sidekiq::Cron, rufus-scheduler, whenever, or
|
|
3
|
+
# custom Thread-based timers). Applications register jobs so the inspector
|
|
4
|
+
# can list them and report execution history.
|
|
5
|
+
#
|
|
6
|
+
# DebugAgent.register_scheduled_job(:cleanup, 'every 30s', last_run: Time.now)
|
|
7
|
+
@scheduled_jobs = {}
|
|
8
|
+
@job_history = {}
|
|
9
|
+
@scheduler_lock = Mutex.new
|
|
10
|
+
|
|
11
|
+
class << self
|
|
12
|
+
attr_reader :scheduled_jobs, :job_history
|
|
13
|
+
|
|
14
|
+
def register_scheduled_job(name, schedule, **opts)
|
|
15
|
+
@scheduled_jobs[name.to_s] = {
|
|
16
|
+
schedule: schedule,
|
|
17
|
+
name: name.to_s,
|
|
18
|
+
class: opts[:class] || name.to_s,
|
|
19
|
+
queue: opts[:queue],
|
|
20
|
+
enabled: opts.key?(:enabled) ? opts[:enabled] : true,
|
|
21
|
+
last_run: opts[:last_run],
|
|
22
|
+
next_run: opts[:next_run],
|
|
23
|
+
registered_at: Time.now
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Record a job execution for history tracking.
|
|
28
|
+
def record_job_execution(name, duration_ms, success: true, error: nil)
|
|
29
|
+
@scheduler_lock.synchronize do
|
|
30
|
+
history = (@job_history[name.to_s] ||= [])
|
|
31
|
+
history << {
|
|
32
|
+
timestamp: Time.now.iso8601,
|
|
33
|
+
duration_ms: duration_ms.round(2),
|
|
34
|
+
success: success,
|
|
35
|
+
error: error
|
|
36
|
+
}
|
|
37
|
+
history.shift if history.size > 100
|
|
38
|
+
|
|
39
|
+
# Update last_run on the job itself
|
|
40
|
+
if @scheduled_jobs[name.to_s]
|
|
41
|
+
@scheduled_jobs[name.to_s][:last_run] = Time.now
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
register_tool('get_scheduled_jobs',
|
|
48
|
+
'List scheduled jobs from Sidekiq::Cron, rufus-scheduler, whenever, ' \
|
|
49
|
+
'or custom Thread-based timers. Shows schedule, last run, and status') do
|
|
50
|
+
jobs = []
|
|
51
|
+
|
|
52
|
+
# Registered jobs (Thread-based, custom, etc.)
|
|
53
|
+
scheduled_jobs.each do |name, job|
|
|
54
|
+
jobs << {
|
|
55
|
+
name: name,
|
|
56
|
+
schedule: job[:schedule],
|
|
57
|
+
class: job[:class],
|
|
58
|
+
queue: job[:queue],
|
|
59
|
+
enabled: job[:enabled],
|
|
60
|
+
source: job[:source] || 'registered',
|
|
61
|
+
last_run: job[:last_run],
|
|
62
|
+
next_run: job[:next_run]
|
|
63
|
+
}
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Sidekiq::Cron jobs
|
|
67
|
+
if defined?(::Sidekiq::Cron::Job)
|
|
68
|
+
begin
|
|
69
|
+
::Sidekiq::Cron::Job.all.each do |cron_job|
|
|
70
|
+
jobs << {
|
|
71
|
+
name: cron_job.name,
|
|
72
|
+
schedule: cron_job.cron,
|
|
73
|
+
class: cron_job.klass,
|
|
74
|
+
queue: cron_job.queue_name,
|
|
75
|
+
enabled: cron_job.status == 'enabled',
|
|
76
|
+
source: 'sidekiq-cron',
|
|
77
|
+
last_run: cron_job.last_enqueue_time
|
|
78
|
+
}
|
|
79
|
+
end
|
|
80
|
+
rescue => e
|
|
81
|
+
jobs << { source: 'sidekiq-cron', error: e.message }
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# rufus-scheduler
|
|
86
|
+
if defined?(::Rufus::Scheduler)
|
|
87
|
+
begin
|
|
88
|
+
ObjectSpace.each_object(::Rufus::Scheduler) do |scheduler|
|
|
89
|
+
scheduler.jobs.each do |job|
|
|
90
|
+
jobs << {
|
|
91
|
+
name: job.respond_to?(:tags) ? job.tags.first : nil,
|
|
92
|
+
schedule: job.respond_to?(:original) ? job.original : job.class.name,
|
|
93
|
+
class: job.class.name,
|
|
94
|
+
enabled: !job.respond_to?(:paused?) || !job.paused?,
|
|
95
|
+
source: 'rufus-scheduler',
|
|
96
|
+
last_run: job.respond_to?(:last_time) ? job.last_time : nil,
|
|
97
|
+
next_run: job.respond_to?(:next_time) ? job.next_time : nil
|
|
98
|
+
}
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
rescue => e
|
|
102
|
+
jobs << { source: 'rufus-scheduler', error: e.message }
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
if jobs.empty?
|
|
107
|
+
next {
|
|
108
|
+
message: 'No scheduled jobs registered. Call DebugAgent.register_scheduled_job(:name, schedule). ' \
|
|
109
|
+
'Also auto-detects Sidekiq::Cron and rufus-scheduler if loaded.',
|
|
110
|
+
total: 0
|
|
111
|
+
}
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
{ total: jobs.size, jobs: jobs }
|
|
115
|
+
rescue => e
|
|
116
|
+
{ error: e.message }
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
register_tool('get_job_history',
|
|
120
|
+
'Get recent execution history for scheduled jobs: run times, duration, success/failure',
|
|
121
|
+
job_name: { type: 'string', description: 'Job name to filter history (optional, returns all if omitted)', required: false }) do |job_name: nil|
|
|
122
|
+
if job_history.empty?
|
|
123
|
+
next { message: 'No job execution history recorded. Jobs must call DebugAgent.record_job_execution to track history.', total: 0 }
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
if job_name
|
|
127
|
+
key = job_name.to_s
|
|
128
|
+
history = job_history[key] || []
|
|
129
|
+
next {
|
|
130
|
+
job: key,
|
|
131
|
+
total: history.size,
|
|
132
|
+
history: history.reverse.first(50)
|
|
133
|
+
}
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
all = job_history.map do |name, entries|
|
|
137
|
+
successful = entries.count { |e| e[:success] }
|
|
138
|
+
failed = entries.count { |e| !e[:success] }
|
|
139
|
+
durations = entries.map { |e| e[:duration_ms] }
|
|
140
|
+
{
|
|
141
|
+
job: name,
|
|
142
|
+
total_runs: entries.size,
|
|
143
|
+
successful: successful,
|
|
144
|
+
failed: failed,
|
|
145
|
+
avg_duration_ms: durations.empty? ? 0 : (durations.sum / durations.size.to_f).round(2),
|
|
146
|
+
last_run: entries.last&.dig(:timestamp)
|
|
147
|
+
}
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
{ total_jobs: all.size, total_runs: job_history.values.map(&:size).sum, jobs: all }
|
|
151
|
+
rescue => e
|
|
152
|
+
{ error: e.message }
|
|
153
|
+
end
|
|
154
|
+
end
|