decision_agent 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -919
  3. data/bin/decision_agent +5 -5
  4. data/lib/decision_agent/agent.rb +19 -26
  5. data/lib/decision_agent/audit/null_adapter.rb +1 -2
  6. data/lib/decision_agent/decision.rb +3 -1
  7. data/lib/decision_agent/dsl/condition_evaluator.rb +4 -3
  8. data/lib/decision_agent/dsl/rule_parser.rb +4 -6
  9. data/lib/decision_agent/dsl/schema_validator.rb +27 -31
  10. data/lib/decision_agent/errors.rb +21 -6
  11. data/lib/decision_agent/evaluation.rb +3 -1
  12. data/lib/decision_agent/evaluation_validator.rb +78 -0
  13. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +26 -0
  14. data/lib/decision_agent/evaluators/static_evaluator.rb +2 -6
  15. data/lib/decision_agent/monitoring/alert_manager.rb +282 -0
  16. data/lib/decision_agent/monitoring/dashboard/public/dashboard.css +381 -0
  17. data/lib/decision_agent/monitoring/dashboard/public/dashboard.js +471 -0
  18. data/lib/decision_agent/monitoring/dashboard/public/index.html +161 -0
  19. data/lib/decision_agent/monitoring/dashboard_server.rb +340 -0
  20. data/lib/decision_agent/monitoring/metrics_collector.rb +278 -0
  21. data/lib/decision_agent/monitoring/monitored_agent.rb +71 -0
  22. data/lib/decision_agent/monitoring/prometheus_exporter.rb +247 -0
  23. data/lib/decision_agent/replay/replay.rb +12 -22
  24. data/lib/decision_agent/scoring/base.rb +1 -1
  25. data/lib/decision_agent/scoring/consensus.rb +5 -5
  26. data/lib/decision_agent/scoring/weighted_average.rb +1 -1
  27. data/lib/decision_agent/version.rb +1 -1
  28. data/lib/decision_agent/versioning/activerecord_adapter.rb +141 -0
  29. data/lib/decision_agent/versioning/adapter.rb +100 -0
  30. data/lib/decision_agent/versioning/file_storage_adapter.rb +290 -0
  31. data/lib/decision_agent/versioning/version_manager.rb +127 -0
  32. data/lib/decision_agent/web/public/app.js +318 -0
  33. data/lib/decision_agent/web/public/index.html +56 -1
  34. data/lib/decision_agent/web/public/styles.css +219 -0
  35. data/lib/decision_agent/web/server.rb +169 -9
  36. data/lib/decision_agent.rb +11 -0
  37. data/lib/generators/decision_agent/install/install_generator.rb +40 -0
  38. data/lib/generators/decision_agent/install/templates/README +47 -0
  39. data/lib/generators/decision_agent/install/templates/migration.rb +37 -0
  40. data/lib/generators/decision_agent/install/templates/rule.rb +30 -0
  41. data/lib/generators/decision_agent/install/templates/rule_version.rb +66 -0
  42. data/spec/activerecord_thread_safety_spec.rb +553 -0
  43. data/spec/agent_spec.rb +13 -13
  44. data/spec/api_contract_spec.rb +16 -16
  45. data/spec/audit_adapters_spec.rb +3 -3
  46. data/spec/comprehensive_edge_cases_spec.rb +86 -86
  47. data/spec/dsl_validation_spec.rb +83 -83
  48. data/spec/edge_cases_spec.rb +23 -23
  49. data/spec/examples/feedback_aware_evaluator_spec.rb +7 -7
  50. data/spec/examples.txt +548 -0
  51. data/spec/issue_verification_spec.rb +685 -0
  52. data/spec/json_rule_evaluator_spec.rb +15 -15
  53. data/spec/monitoring/alert_manager_spec.rb +378 -0
  54. data/spec/monitoring/metrics_collector_spec.rb +281 -0
  55. data/spec/monitoring/monitored_agent_spec.rb +222 -0
  56. data/spec/monitoring/prometheus_exporter_spec.rb +242 -0
  57. data/spec/replay_edge_cases_spec.rb +58 -58
  58. data/spec/replay_spec.rb +11 -11
  59. data/spec/rfc8785_canonicalization_spec.rb +215 -0
  60. data/spec/scoring_spec.rb +1 -1
  61. data/spec/spec_helper.rb +9 -0
  62. data/spec/thread_safety_spec.rb +482 -0
  63. data/spec/thread_safety_spec.rb.broken +878 -0
  64. data/spec/versioning_spec.rb +777 -0
  65. data/spec/web_ui_rack_spec.rb +135 -0
  66. metadata +84 -11
@@ -0,0 +1,340 @@
1
+ require "sinatra/base"
2
+ require "json"
3
+
4
+ # Faye/WebSocket is optional for real-time features
5
+ begin
6
+ require "faye/websocket"
7
+ WEBSOCKET_AVAILABLE = true
8
+ rescue LoadError
9
+ WEBSOCKET_AVAILABLE = false
10
+ warn "Warning: faye-websocket gem not found. Real-time dashboard features will be disabled."
11
+ warn "Install with: gem install faye-websocket"
12
+ end
13
+
14
+ module DecisionAgent
15
+ module Monitoring
16
+ # Real-time monitoring dashboard server
17
+ class DashboardServer < Sinatra::Base
18
+ set :public_folder, File.expand_path("dashboard/public", __dir__)
19
+ set :views, File.expand_path("dashboard/views", __dir__)
20
+ set :bind, "0.0.0.0"
21
+ set :port, 4568
22
+ set :server, :puma
23
+
24
+ # Enable CORS
25
+ before do
26
+ headers["Access-Control-Allow-Origin"] = "*"
27
+ headers["Access-Control-Allow-Methods"] = "GET, POST, PUT, DELETE, OPTIONS"
28
+ headers["Access-Control-Allow-Headers"] = "Content-Type"
29
+ end
30
+
31
+ options "*" do
32
+ 200
33
+ end
34
+
35
+ # Class-level configuration
36
+ class << self
37
+ attr_accessor :metrics_collector, :prometheus_exporter, :alert_manager
38
+ attr_reader :websocket_clients
39
+
40
+ def configure_monitoring(metrics_collector:, prometheus_exporter:, alert_manager:)
41
+ @metrics_collector = metrics_collector
42
+ @prometheus_exporter = prometheus_exporter
43
+ @alert_manager = alert_manager
44
+ @websocket_clients = []
45
+
46
+ setup_real_time_updates
47
+ end
48
+
49
+ def setup_real_time_updates
50
+ # Register observer for real-time metric updates
51
+ @metrics_collector.add_observer do |event_type, metric|
52
+ broadcast_to_clients({
53
+ type: "metric_update",
54
+ event: event_type,
55
+ data: metric,
56
+ timestamp: Time.now.utc.iso8601
57
+ })
58
+ end
59
+
60
+ # Register alert handler
61
+ @alert_manager.add_handler do |alert|
62
+ broadcast_to_clients({
63
+ type: "alert",
64
+ data: alert,
65
+ timestamp: Time.now.utc.iso8601
66
+ })
67
+ end
68
+ end
69
+
70
+ def broadcast_to_clients(message)
71
+ return unless WEBSOCKET_AVAILABLE
72
+
73
+ json_message = message.to_json
74
+ @websocket_clients.each do |client|
75
+ client.send(json_message) if client.ready_state == Faye::WebSocket::API::OPEN
76
+ rescue StandardError => e
77
+ warn "WebSocket send failed: #{e.message}"
78
+ end
79
+ end
80
+
81
+ def add_websocket_client(ws)
82
+ @websocket_clients << ws
83
+ end
84
+
85
+ def remove_websocket_client(ws)
86
+ @websocket_clients.delete(ws)
87
+ end
88
+ end
89
+
90
+ # Main dashboard page
91
+ get "/" do
92
+ send_file File.join(settings.public_folder, "index.html")
93
+ end
94
+
95
+ # WebSocket endpoint for real-time updates
96
+ get "/ws" do
97
+ halt 503, { error: "WebSocket support not available. Install faye-websocket gem." }.to_json unless WEBSOCKET_AVAILABLE
98
+
99
+ if Faye::WebSocket.websocket?(request.env)
100
+ ws = Faye::WebSocket.new(request.env)
101
+
102
+ ws.on :open do |_event|
103
+ self.class.add_websocket_client(ws)
104
+
105
+ # Send initial state
106
+ ws.send({
107
+ type: "connected",
108
+ message: "Connected to DecisionAgent monitoring",
109
+ timestamp: Time.now.utc.iso8601
110
+ }.to_json)
111
+ end
112
+
113
+ ws.on :message do |event|
114
+ # Handle client messages
115
+ handle_websocket_message(ws, event.data)
116
+ end
117
+
118
+ ws.on :close do |_event|
119
+ self.class.remove_websocket_client(ws)
120
+ end
121
+
122
+ ws.rack_response
123
+ else
124
+ status 426
125
+ { error: "WebSocket connection required" }.to_json
126
+ end
127
+ end
128
+
129
+ # API: Get current statistics
130
+ get "/api/stats" do
131
+ content_type :json
132
+
133
+ time_range = params[:time_range]&.to_i
134
+ stats = self.class.metrics_collector.statistics(time_range: time_range)
135
+
136
+ stats.to_json
137
+ end
138
+
139
+ # API: Get time series data
140
+ get "/api/timeseries/:metric_type" do
141
+ content_type :json
142
+
143
+ metric_type = params[:metric_type].to_sym
144
+ bucket_size = (params[:bucket_size] || 60).to_i
145
+ time_range = (params[:time_range] || 3600).to_i
146
+
147
+ data = self.class.metrics_collector.time_series(
148
+ metric_type: metric_type,
149
+ bucket_size: bucket_size,
150
+ time_range: time_range
151
+ )
152
+
153
+ data.to_json
154
+ end
155
+
156
+ # API: Prometheus metrics endpoint
157
+ get "/metrics" do
158
+ content_type PrometheusExporter::CONTENT_TYPE
159
+ self.class.prometheus_exporter.export
160
+ end
161
+
162
+ # API: Get Prometheus metrics in JSON format
163
+ get "/api/metrics" do
164
+ content_type :json
165
+ self.class.prometheus_exporter.metrics_hash.to_json
166
+ end
167
+
168
+ # API: Register custom KPI
169
+ post "/api/kpi" do
170
+ content_type :json
171
+
172
+ begin
173
+ data = JSON.parse(request.body.read, symbolize_names: true)
174
+
175
+ self.class.prometheus_exporter.register_kpi(
176
+ name: data[:name],
177
+ value: data[:value],
178
+ labels: data[:labels] || {},
179
+ help: data[:help]
180
+ )
181
+
182
+ { success: true, message: "KPI registered" }.to_json
183
+ rescue StandardError => e
184
+ status 400
185
+ { error: e.message }.to_json
186
+ end
187
+ end
188
+
189
+ # API: Get active alerts
190
+ get "/api/alerts" do
191
+ content_type :json
192
+ self.class.alert_manager.active_alerts.to_json
193
+ end
194
+
195
+ # API: Get all alerts
196
+ get "/api/alerts/all" do
197
+ content_type :json
198
+ limit = (params[:limit] || 100).to_i
199
+ self.class.alert_manager.all_alerts(limit: limit).to_json
200
+ end
201
+
202
+ # API: Create alert rule
203
+ post "/api/alerts/rules" do
204
+ content_type :json
205
+
206
+ begin
207
+ data = JSON.parse(request.body.read, symbolize_names: true)
208
+
209
+ # Parse condition
210
+ condition = parse_alert_condition(data[:condition], data[:condition_type])
211
+
212
+ rule = self.class.alert_manager.add_rule(
213
+ name: data[:name],
214
+ condition: condition,
215
+ severity: (data[:severity] || :warning).to_sym,
216
+ threshold: data[:threshold],
217
+ message: data[:message],
218
+ cooldown: data[:cooldown] || 300
219
+ )
220
+
221
+ status 201
222
+ rule.to_json
223
+ rescue StandardError => e
224
+ status 400
225
+ { error: e.message }.to_json
226
+ end
227
+ end
228
+
229
+ # API: Toggle alert rule
230
+ put "/api/alerts/rules/:rule_id/toggle" do
231
+ content_type :json
232
+
233
+ begin
234
+ data = JSON.parse(request.body.read, symbolize_names: true)
235
+ enabled = data[:enabled] || false
236
+
237
+ self.class.alert_manager.toggle_rule(params[:rule_id], enabled)
238
+
239
+ { success: true, message: "Rule #{enabled ? 'enabled' : 'disabled'}" }.to_json
240
+ rescue StandardError => e
241
+ status 400
242
+ { error: e.message }.to_json
243
+ end
244
+ end
245
+
246
+ # API: Acknowledge alert
247
+ post "/api/alerts/:alert_id/acknowledge" do
248
+ content_type :json
249
+
250
+ begin
251
+ data = JSON.parse(request.body.read, symbolize_names: true)
252
+ acknowledged_by = data[:acknowledged_by] || "user"
253
+
254
+ self.class.alert_manager.acknowledge_alert(params[:alert_id], acknowledged_by: acknowledged_by)
255
+
256
+ { success: true, message: "Alert acknowledged" }.to_json
257
+ rescue StandardError => e
258
+ status 400
259
+ { error: e.message }.to_json
260
+ end
261
+ end
262
+
263
+ # API: Resolve alert
264
+ post "/api/alerts/:alert_id/resolve" do
265
+ content_type :json
266
+
267
+ begin
268
+ data = JSON.parse(request.body.read, symbolize_names: true)
269
+ resolved_by = data[:resolved_by] || "user"
270
+
271
+ self.class.alert_manager.resolve_alert(params[:alert_id], resolved_by: resolved_by)
272
+
273
+ { success: true, message: "Alert resolved" }.to_json
274
+ rescue StandardError => e
275
+ status 400
276
+ { error: e.message }.to_json
277
+ end
278
+ end
279
+
280
+ # Health check
281
+ get "/health" do
282
+ content_type :json
283
+ {
284
+ status: "ok",
285
+ version: DecisionAgent::VERSION,
286
+ websocket_clients: self.class.websocket_clients.size,
287
+ metrics_count: self.class.metrics_collector.metrics_count
288
+ }.to_json
289
+ end
290
+
291
+ # Class method to start the server
292
+ def self.start!(metrics_collector:, prometheus_exporter:, alert_manager:, port: 4568, host: "0.0.0.0")
293
+ configure_monitoring(
294
+ metrics_collector: metrics_collector,
295
+ prometheus_exporter: prometheus_exporter,
296
+ alert_manager: alert_manager
297
+ )
298
+
299
+ set :port, port
300
+ set :bind, host
301
+ run!
302
+ end
303
+
304
+ private
305
+
306
+ def handle_websocket_message(ws, data)
307
+ message = JSON.parse(data, symbolize_names: true)
308
+
309
+ case message[:action]
310
+ when "subscribe"
311
+ # Send current stats
312
+ stats = self.class.metrics_collector.statistics
313
+ ws.send({ type: "stats", data: stats }.to_json)
314
+ when "get_alerts"
315
+ alerts = self.class.alert_manager.active_alerts
316
+ ws.send({ type: "alerts", data: alerts }.to_json)
317
+ end
318
+ rescue StandardError => e
319
+ ws.send({ type: "error", message: e.message }.to_json)
320
+ end
321
+
322
+ def parse_alert_condition(condition_data, condition_type)
323
+ case condition_type
324
+ when "high_error_rate"
325
+ AlertManager.high_error_rate(threshold: condition_data[:threshold] || 0.1)
326
+ when "low_confidence"
327
+ AlertManager.low_confidence(threshold: condition_data[:threshold] || 0.5)
328
+ when "high_latency"
329
+ AlertManager.high_latency(threshold_ms: condition_data[:threshold_ms] || 1000)
330
+ when "error_spike"
331
+ AlertManager.error_spike(threshold: condition_data[:threshold] || 10)
332
+ when "custom"
333
+ condition_data
334
+ else
335
+ raise "Unknown condition type: #{condition_type}"
336
+ end
337
+ end
338
+ end
339
+ end
340
+ end
@@ -0,0 +1,278 @@
1
+ require "monitor"
2
+ require "time"
3
+
4
+ module DecisionAgent
5
+ module Monitoring
6
+ # Thread-safe metrics collector for decision analytics
7
+ class MetricsCollector
8
+ include MonitorMixin
9
+
10
+ attr_reader :metrics, :window_size
11
+
12
+ def initialize(window_size: 3600)
13
+ super()
14
+ @window_size = window_size # Default: 1 hour window
15
+ @metrics = {
16
+ decisions: [],
17
+ evaluations: [],
18
+ performance: [],
19
+ errors: []
20
+ }
21
+ @observers = []
22
+ freeze_config
23
+ end
24
+
25
+ # Record a decision for analytics
26
+ def record_decision(decision, context, duration_ms: nil)
27
+ synchronize do
28
+ metric = {
29
+ timestamp: Time.now.utc,
30
+ decision: decision.decision,
31
+ confidence: decision.confidence,
32
+ evaluations_count: decision.evaluations.size,
33
+ context_size: context.to_h.size,
34
+ duration_ms: duration_ms,
35
+ evaluator_names: decision.evaluations.map(&:evaluator_name).uniq
36
+ }
37
+
38
+ @metrics[:decisions] << metric
39
+ cleanup_old_metrics!
40
+ notify_observers(:decision, metric)
41
+ metric
42
+ end
43
+ end
44
+
45
+ # Record individual evaluation metrics
46
+ def record_evaluation(evaluation)
47
+ synchronize do
48
+ metric = {
49
+ timestamp: Time.now.utc,
50
+ decision: evaluation.decision,
51
+ weight: evaluation.weight,
52
+ evaluator_name: evaluation.evaluator_name
53
+ }
54
+
55
+ @metrics[:evaluations] << metric
56
+ cleanup_old_metrics!
57
+ notify_observers(:evaluation, metric)
58
+ metric
59
+ end
60
+ end
61
+
62
+ # Record performance metrics
63
+ def record_performance(operation:, duration_ms:, success: true, metadata: {})
64
+ synchronize do
65
+ metric = {
66
+ timestamp: Time.now.utc,
67
+ operation: operation,
68
+ duration_ms: duration_ms,
69
+ success: success,
70
+ metadata: metadata
71
+ }
72
+
73
+ @metrics[:performance] << metric
74
+ cleanup_old_metrics!
75
+ notify_observers(:performance, metric)
76
+ metric
77
+ end
78
+ end
79
+
80
+ # Record error
81
+ def record_error(error, context: {})
82
+ synchronize do
83
+ metric = {
84
+ timestamp: Time.now.utc,
85
+ error_class: error.class.name,
86
+ error_message: error.message,
87
+ context: context
88
+ }
89
+
90
+ @metrics[:errors] << metric
91
+ cleanup_old_metrics!
92
+ notify_observers(:error, metric)
93
+ metric
94
+ end
95
+ end
96
+
97
+ # Get aggregated statistics
98
+ def statistics(time_range: nil)
99
+ synchronize do
100
+ range_start = time_range ? Time.now.utc - time_range : nil
101
+
102
+ decisions = filter_by_time(@metrics[:decisions], range_start)
103
+ evaluations = filter_by_time(@metrics[:evaluations], range_start)
104
+ performance = filter_by_time(@metrics[:performance], range_start)
105
+ errors = filter_by_time(@metrics[:errors], range_start)
106
+
107
+ {
108
+ summary: {
109
+ total_decisions: decisions.size,
110
+ total_evaluations: evaluations.size,
111
+ total_errors: errors.size,
112
+ time_range: range_start ? "Last #{time_range}s" : "All time"
113
+ },
114
+ decisions: compute_decision_stats(decisions),
115
+ evaluations: compute_evaluation_stats(evaluations),
116
+ performance: compute_performance_stats(performance),
117
+ errors: compute_error_stats(errors),
118
+ timestamp: Time.now.utc
119
+ }
120
+ end
121
+ end
122
+
123
+ # Get time-series data for graphing
124
+ def time_series(metric_type:, bucket_size: 60, time_range: 3600)
125
+ synchronize do
126
+ data = @metrics[metric_type] || []
127
+ range_start = Time.now.utc - time_range
128
+
129
+ buckets = {}
130
+ data.each do |metric|
131
+ next if metric[:timestamp] < range_start
132
+
133
+ bucket_key = (metric[:timestamp].to_i / bucket_size) * bucket_size
134
+ buckets[bucket_key] ||= []
135
+ buckets[bucket_key] << metric
136
+ end
137
+
138
+ buckets.sort.map do |timestamp, metrics|
139
+ {
140
+ timestamp: Time.at(timestamp).utc,
141
+ count: metrics.size,
142
+ metrics: metrics
143
+ }
144
+ end
145
+ end
146
+ end
147
+
148
+ # Register observer for real-time updates
149
+ def add_observer(&block)
150
+ synchronize do
151
+ @observers << block
152
+ end
153
+ end
154
+
155
+ # Clear all metrics
156
+ def clear!
157
+ synchronize do
158
+ @metrics.each_value(&:clear)
159
+ end
160
+ end
161
+
162
+ # Get current metrics count
163
+ def metrics_count
164
+ synchronize do
165
+ @metrics.transform_values(&:size)
166
+ end
167
+ end
168
+
169
+ private
170
+
171
+ def freeze_config
172
+ @window_size.freeze
173
+ end
174
+
175
+ def cleanup_old_metrics!
176
+ cutoff_time = Time.now.utc - @window_size
177
+
178
+ @metrics.each_value do |data|
179
+ data.delete_if { |m| m[:timestamp] < cutoff_time }
180
+ end
181
+ end
182
+
183
+ def filter_by_time(data, start_time)
184
+ return data unless start_time
185
+
186
+ data.select { |m| m[:timestamp] >= start_time }
187
+ end
188
+
189
+ def compute_decision_stats(decisions)
190
+ return {} if decisions.empty?
191
+
192
+ confidences = decisions.map { |d| d[:confidence] }
193
+ durations = decisions.map { |d| d[:duration_ms] }.compact
194
+
195
+ decision_distribution = decisions.group_by { |d| d[:decision] }
196
+ .transform_values(&:size)
197
+
198
+ {
199
+ total: decisions.size,
200
+ avg_confidence: (confidences.sum / confidences.size.to_f).round(4),
201
+ min_confidence: confidences.min.round(4),
202
+ max_confidence: confidences.max.round(4),
203
+ decision_distribution: decision_distribution,
204
+ avg_duration_ms: durations.empty? ? nil : (durations.sum / durations.size.to_f).round(2),
205
+ evaluators_used: decisions.flat_map { |d| d[:evaluator_names] }.uniq
206
+ }
207
+ end
208
+
209
+ def compute_evaluation_stats(evaluations)
210
+ return {} if evaluations.empty?
211
+
212
+ weights = evaluations.map { |e| e[:weight] }
213
+ evaluator_distribution = evaluations.group_by { |e| e[:evaluator_name] }
214
+ .transform_values(&:size)
215
+
216
+ {
217
+ total: evaluations.size,
218
+ avg_weight: (weights.sum / weights.size.to_f).round(4),
219
+ evaluator_distribution: evaluator_distribution,
220
+ decision_distribution: evaluations.group_by { |e| e[:decision] }
221
+ .transform_values(&:size)
222
+ }
223
+ end
224
+
225
+ def compute_performance_stats(performance)
226
+ return {} if performance.empty?
227
+
228
+ durations = performance.map { |p| p[:duration_ms] }
229
+ successes = performance.count { |p| p[:success] }
230
+
231
+ {
232
+ total_operations: performance.size,
233
+ successful: successes,
234
+ failed: performance.size - successes,
235
+ success_rate: (successes / performance.size.to_f).round(4),
236
+ avg_duration_ms: (durations.sum / durations.size.to_f).round(2),
237
+ min_duration_ms: durations.min.round(2),
238
+ max_duration_ms: durations.max.round(2),
239
+ p95_duration_ms: percentile(durations, 0.95).round(2),
240
+ p99_duration_ms: percentile(durations, 0.99).round(2)
241
+ }
242
+ end
243
+
244
+ def compute_error_stats(errors)
245
+ return {} if errors.empty?
246
+
247
+ {
248
+ total: errors.size,
249
+ by_type: errors.group_by { |e| e[:error_class] }.transform_values(&:size),
250
+ recent_errors: errors.last(10).map do |e|
251
+ {
252
+ timestamp: e[:timestamp],
253
+ error: e[:error_class],
254
+ message: e[:error_message]
255
+ }
256
+ end
257
+ }
258
+ end
259
+
260
+ def percentile(array, percentile)
261
+ return 0 if array.empty?
262
+
263
+ sorted = array.sort
264
+ index = (percentile * sorted.length).ceil - 1
265
+ sorted[[index, 0].max]
266
+ end
267
+
268
+ def notify_observers(event_type, metric)
269
+ @observers.each do |observer|
270
+ observer.call(event_type, metric)
271
+ rescue StandardError => e
272
+ # Silently fail observer notifications to prevent disruption
273
+ warn "Observer notification failed: #{e.message}"
274
+ end
275
+ end
276
+ end
277
+ end
278
+ end
@@ -0,0 +1,71 @@
1
+ module DecisionAgent
2
+ module Monitoring
3
+ # Wrapper around Agent that automatically records metrics
4
+ class MonitoredAgent
5
+ attr_reader :agent, :metrics_collector
6
+
7
+ def initialize(agent:, metrics_collector:)
8
+ @agent = agent
9
+ @metrics_collector = metrics_collector
10
+ end
11
+
12
+ # Make a decision and automatically record metrics
13
+ def decide(context:, feedback: {})
14
+ ctx = context.is_a?(Context) ? context : Context.new(context)
15
+
16
+ start_time = Time.now
17
+
18
+ begin
19
+ result = @agent.decide(context: ctx, feedback: feedback)
20
+ duration_ms = (Time.now - start_time) * 1000
21
+
22
+ # Record decision metrics
23
+ @metrics_collector.record_decision(result, ctx, duration_ms: duration_ms)
24
+
25
+ # Record each evaluation
26
+ result.evaluations.each do |evaluation|
27
+ @metrics_collector.record_evaluation(evaluation)
28
+ end
29
+
30
+ # Record successful performance
31
+ @metrics_collector.record_performance(
32
+ operation: "decide",
33
+ duration_ms: duration_ms,
34
+ success: true,
35
+ metadata: {
36
+ evaluators_count: result.evaluations.size,
37
+ decision: result.decision,
38
+ confidence: result.confidence
39
+ }
40
+ )
41
+
42
+ result
43
+ rescue StandardError => e
44
+ duration_ms = (Time.now - start_time) * 1000
45
+
46
+ # Record error
47
+ @metrics_collector.record_error(e, context: ctx.to_h)
48
+
49
+ # Record failed performance
50
+ @metrics_collector.record_performance(
51
+ operation: "decide",
52
+ duration_ms: duration_ms,
53
+ success: false,
54
+ metadata: { error_class: e.class.name }
55
+ )
56
+
57
+ raise
58
+ end
59
+ end
60
+
61
+ # Delegate other methods to the wrapped agent
62
+ def method_missing(method, ...)
63
+ @agent.send(method, ...)
64
+ end
65
+
66
+ def respond_to_missing?(method, include_private = false)
67
+ @agent.respond_to?(method, include_private) || super
68
+ end
69
+ end
70
+ end
71
+ end