decision_agent 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +212 -35
  3. data/bin/decision_agent +3 -8
  4. data/lib/decision_agent/ab_testing/ab_test.rb +197 -0
  5. data/lib/decision_agent/ab_testing/ab_test_assignment.rb +76 -0
  6. data/lib/decision_agent/ab_testing/ab_test_manager.rb +317 -0
  7. data/lib/decision_agent/ab_testing/ab_testing_agent.rb +152 -0
  8. data/lib/decision_agent/ab_testing/storage/activerecord_adapter.rb +155 -0
  9. data/lib/decision_agent/ab_testing/storage/adapter.rb +67 -0
  10. data/lib/decision_agent/ab_testing/storage/memory_adapter.rb +116 -0
  11. data/lib/decision_agent/agent.rb +19 -26
  12. data/lib/decision_agent/audit/null_adapter.rb +1 -2
  13. data/lib/decision_agent/decision.rb +3 -1
  14. data/lib/decision_agent/dsl/condition_evaluator.rb +4 -3
  15. data/lib/decision_agent/dsl/rule_parser.rb +4 -6
  16. data/lib/decision_agent/dsl/schema_validator.rb +27 -31
  17. data/lib/decision_agent/errors.rb +11 -8
  18. data/lib/decision_agent/evaluation.rb +3 -1
  19. data/lib/decision_agent/evaluation_validator.rb +78 -0
  20. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +26 -0
  21. data/lib/decision_agent/evaluators/static_evaluator.rb +2 -6
  22. data/lib/decision_agent/monitoring/alert_manager.rb +282 -0
  23. data/lib/decision_agent/monitoring/dashboard/public/dashboard.css +381 -0
  24. data/lib/decision_agent/monitoring/dashboard/public/dashboard.js +471 -0
  25. data/lib/decision_agent/monitoring/dashboard/public/index.html +161 -0
  26. data/lib/decision_agent/monitoring/dashboard_server.rb +340 -0
  27. data/lib/decision_agent/monitoring/metrics_collector.rb +423 -0
  28. data/lib/decision_agent/monitoring/monitored_agent.rb +71 -0
  29. data/lib/decision_agent/monitoring/prometheus_exporter.rb +247 -0
  30. data/lib/decision_agent/monitoring/storage/activerecord_adapter.rb +253 -0
  31. data/lib/decision_agent/monitoring/storage/base_adapter.rb +90 -0
  32. data/lib/decision_agent/monitoring/storage/memory_adapter.rb +222 -0
  33. data/lib/decision_agent/replay/replay.rb +12 -22
  34. data/lib/decision_agent/scoring/base.rb +1 -1
  35. data/lib/decision_agent/scoring/consensus.rb +5 -5
  36. data/lib/decision_agent/scoring/weighted_average.rb +1 -1
  37. data/lib/decision_agent/version.rb +1 -1
  38. data/lib/decision_agent/versioning/activerecord_adapter.rb +69 -33
  39. data/lib/decision_agent/versioning/adapter.rb +1 -3
  40. data/lib/decision_agent/versioning/file_storage_adapter.rb +143 -35
  41. data/lib/decision_agent/versioning/version_manager.rb +4 -12
  42. data/lib/decision_agent/web/public/index.html +1 -1
  43. data/lib/decision_agent/web/server.rb +19 -24
  44. data/lib/decision_agent.rb +14 -0
  45. data/lib/generators/decision_agent/install/install_generator.rb +42 -5
  46. data/lib/generators/decision_agent/install/templates/ab_test_assignment_model.rb +45 -0
  47. data/lib/generators/decision_agent/install/templates/ab_test_model.rb +54 -0
  48. data/lib/generators/decision_agent/install/templates/ab_testing_migration.rb +43 -0
  49. data/lib/generators/decision_agent/install/templates/ab_testing_tasks.rake +189 -0
  50. data/lib/generators/decision_agent/install/templates/decision_agent_tasks.rake +114 -0
  51. data/lib/generators/decision_agent/install/templates/decision_log.rb +57 -0
  52. data/lib/generators/decision_agent/install/templates/error_metric.rb +53 -0
  53. data/lib/generators/decision_agent/install/templates/evaluation_metric.rb +43 -0
  54. data/lib/generators/decision_agent/install/templates/migration.rb +17 -6
  55. data/lib/generators/decision_agent/install/templates/monitoring_migration.rb +109 -0
  56. data/lib/generators/decision_agent/install/templates/performance_metric.rb +76 -0
  57. data/lib/generators/decision_agent/install/templates/rule.rb +3 -3
  58. data/lib/generators/decision_agent/install/templates/rule_version.rb +13 -7
  59. data/spec/ab_testing/ab_test_manager_spec.rb +330 -0
  60. data/spec/ab_testing/ab_test_spec.rb +270 -0
  61. data/spec/activerecord_thread_safety_spec.rb +553 -0
  62. data/spec/agent_spec.rb +13 -13
  63. data/spec/api_contract_spec.rb +16 -16
  64. data/spec/audit_adapters_spec.rb +3 -3
  65. data/spec/comprehensive_edge_cases_spec.rb +86 -86
  66. data/spec/dsl_validation_spec.rb +83 -83
  67. data/spec/edge_cases_spec.rb +23 -23
  68. data/spec/examples/feedback_aware_evaluator_spec.rb +7 -7
  69. data/spec/examples.txt +612 -0
  70. data/spec/issue_verification_spec.rb +759 -0
  71. data/spec/json_rule_evaluator_spec.rb +15 -15
  72. data/spec/monitoring/alert_manager_spec.rb +378 -0
  73. data/spec/monitoring/metrics_collector_spec.rb +281 -0
  74. data/spec/monitoring/monitored_agent_spec.rb +222 -0
  75. data/spec/monitoring/prometheus_exporter_spec.rb +242 -0
  76. data/spec/monitoring/storage/activerecord_adapter_spec.rb +346 -0
  77. data/spec/monitoring/storage/memory_adapter_spec.rb +247 -0
  78. data/spec/replay_edge_cases_spec.rb +58 -58
  79. data/spec/replay_spec.rb +11 -11
  80. data/spec/rfc8785_canonicalization_spec.rb +215 -0
  81. data/spec/scoring_spec.rb +1 -1
  82. data/spec/spec_helper.rb +9 -0
  83. data/spec/thread_safety_spec.rb +482 -0
  84. data/spec/thread_safety_spec.rb.broken +878 -0
  85. data/spec/versioning_spec.rb +141 -37
  86. data/spec/web_ui_rack_spec.rb +135 -0
  87. metadata +93 -6
@@ -0,0 +1,423 @@
1
+ require "monitor"
2
+ require "time"
3
+ require_relative "storage/memory_adapter"
4
+
5
+ begin
6
+ require_relative "storage/activerecord_adapter"
7
+ rescue LoadError, NameError
8
+ # ActiveRecord adapter not available
9
+ end
10
+
11
+ module DecisionAgent
12
+ module Monitoring
13
+ # Thread-safe metrics collector for decision analytics
14
+ class MetricsCollector
15
+ include MonitorMixin
16
+
17
+ attr_reader :metrics, :window_size, :storage_adapter
18
+
19
+ def initialize(window_size: 3600, storage: :auto)
20
+ super()
21
+ @window_size = window_size # Default: 1 hour window
22
+ @storage_adapter = initialize_storage_adapter(storage, window_size)
23
+
24
+ # Legacy in-memory metrics for backward compatibility with observers
25
+ @metrics = {
26
+ decisions: [],
27
+ evaluations: [],
28
+ performance: [],
29
+ errors: []
30
+ }
31
+ @observers = []
32
+ freeze_config
33
+ end
34
+
35
+ # Record a decision for analytics
36
+ def record_decision(decision, context, duration_ms: nil)
37
+ synchronize do
38
+ metric = {
39
+ timestamp: Time.now.utc,
40
+ decision: decision.decision,
41
+ confidence: decision.confidence,
42
+ evaluations_count: decision.evaluations.size,
43
+ context_size: context.to_h.size,
44
+ duration_ms: duration_ms,
45
+ evaluator_names: decision.evaluations.map(&:evaluator_name).uniq
46
+ }
47
+
48
+ # Store in-memory for observers (backward compatibility)
49
+ @metrics[:decisions] << metric
50
+ cleanup_old_metrics!
51
+
52
+ # Persist to storage adapter
53
+ @storage_adapter.record_decision(
54
+ decision.decision,
55
+ context.to_h,
56
+ confidence: decision.confidence,
57
+ evaluations_count: decision.evaluations.size,
58
+ duration_ms: duration_ms,
59
+ status: determine_decision_status(decision)
60
+ )
61
+
62
+ notify_observers(:decision, metric)
63
+ metric
64
+ end
65
+ end
66
+
67
+ # Record individual evaluation metrics
68
+ def record_evaluation(evaluation)
69
+ synchronize do
70
+ metric = {
71
+ timestamp: Time.now.utc,
72
+ decision: evaluation.decision,
73
+ weight: evaluation.weight,
74
+ evaluator_name: evaluation.evaluator_name
75
+ }
76
+
77
+ # Store in-memory for observers (backward compatibility)
78
+ @metrics[:evaluations] << metric
79
+ cleanup_old_metrics!
80
+
81
+ # Persist to storage adapter
82
+ @storage_adapter.record_evaluation(
83
+ evaluation.evaluator_name,
84
+ score: evaluation.weight,
85
+ success: evaluation.weight.positive?,
86
+ details: { decision: evaluation.decision }
87
+ )
88
+
89
+ notify_observers(:evaluation, metric)
90
+ metric
91
+ end
92
+ end
93
+
94
+ # Record performance metrics
95
+ def record_performance(operation:, duration_ms:, success: true, metadata: {})
96
+ synchronize do
97
+ metric = {
98
+ timestamp: Time.now.utc,
99
+ operation: operation,
100
+ duration_ms: duration_ms,
101
+ success: success,
102
+ metadata: metadata
103
+ }
104
+
105
+ # Store in-memory for observers (backward compatibility)
106
+ @metrics[:performance] << metric
107
+ cleanup_old_metrics!
108
+
109
+ # Persist to storage adapter
110
+ @storage_adapter.record_performance(
111
+ operation,
112
+ duration_ms: duration_ms,
113
+ status: success ? "success" : "failure",
114
+ metadata: metadata
115
+ )
116
+
117
+ notify_observers(:performance, metric)
118
+ metric
119
+ end
120
+ end
121
+
122
+ # Record error
123
+ def record_error(error, context: {})
124
+ synchronize do
125
+ metric = {
126
+ timestamp: Time.now.utc,
127
+ error_class: error.class.name,
128
+ error_message: error.message,
129
+ context: context
130
+ }
131
+
132
+ # Store in-memory for observers (backward compatibility)
133
+ @metrics[:errors] << metric
134
+ cleanup_old_metrics!
135
+
136
+ # Persist to storage adapter
137
+ @storage_adapter.record_error(
138
+ error.class.name,
139
+ message: error.message,
140
+ stack_trace: error.backtrace,
141
+ severity: determine_error_severity(error),
142
+ context: context
143
+ )
144
+
145
+ notify_observers(:error, metric)
146
+ metric
147
+ end
148
+ end
149
+
150
+ # Get aggregated statistics
151
+ def statistics(time_range: nil)
152
+ synchronize do
153
+ # Use in-memory metrics for MemoryAdapter (to maintain backward compatibility)
154
+ # Only delegate to ActiveRecordAdapter for persistent storage
155
+ use_storage = time_range &&
156
+ @storage_adapter.respond_to?(:statistics) &&
157
+ !@storage_adapter.is_a?(Storage::MemoryAdapter)
158
+
159
+ if use_storage
160
+ stats = @storage_adapter.statistics(time_range: time_range)
161
+ return stats.merge(timestamp: Time.now.utc, storage: @storage_adapter.class.name) if stats
162
+ end
163
+
164
+ # Use in-memory metrics
165
+ range_start = time_range ? Time.now.utc - time_range : nil
166
+
167
+ decisions = filter_by_time(@metrics[:decisions], range_start)
168
+ evaluations = filter_by_time(@metrics[:evaluations], range_start)
169
+ performance = filter_by_time(@metrics[:performance], range_start)
170
+ errors = filter_by_time(@metrics[:errors], range_start)
171
+
172
+ {
173
+ summary: {
174
+ total_decisions: decisions.size,
175
+ total_evaluations: evaluations.size,
176
+ total_errors: errors.size,
177
+ time_range: range_start ? "Last #{time_range}s" : "All time"
178
+ },
179
+ decisions: compute_decision_stats(decisions),
180
+ evaluations: compute_evaluation_stats(evaluations),
181
+ performance: compute_performance_stats(performance),
182
+ errors: compute_error_stats(errors),
183
+ timestamp: Time.now.utc,
184
+ storage: "memory (fallback)"
185
+ }
186
+ end
187
+ end
188
+
189
+ # Get time-series data for graphing
190
+ def time_series(metric_type:, bucket_size: 60, time_range: 3600)
191
+ synchronize do
192
+ # Use in-memory metrics for MemoryAdapter (to maintain backward compatibility)
193
+ # Only delegate to ActiveRecordAdapter for persistent storage
194
+ use_storage = @storage_adapter.respond_to?(:time_series) &&
195
+ !@storage_adapter.is_a?(Storage::MemoryAdapter)
196
+
197
+ if use_storage
198
+ series = @storage_adapter.time_series(metric_type, bucket_size: bucket_size, time_range: time_range)
199
+ return series if series && series[:timestamps]
200
+ end
201
+
202
+ # Use in-memory metrics
203
+ data = @metrics[metric_type] || []
204
+ range_start = Time.now.utc - time_range
205
+
206
+ buckets = {}
207
+ data.each do |metric|
208
+ next if metric[:timestamp] < range_start
209
+
210
+ bucket_key = (metric[:timestamp].to_i / bucket_size) * bucket_size
211
+ buckets[bucket_key] ||= []
212
+ buckets[bucket_key] << metric
213
+ end
214
+
215
+ buckets.sort.map do |timestamp, metrics|
216
+ {
217
+ timestamp: Time.at(timestamp).utc,
218
+ count: metrics.size,
219
+ metrics: metrics
220
+ }
221
+ end
222
+ end
223
+ end
224
+
225
+ # Register observer for real-time updates
226
+ def add_observer(&block)
227
+ synchronize do
228
+ @observers << block
229
+ end
230
+ end
231
+
232
+ # Clear all metrics
233
+ def clear!
234
+ synchronize do
235
+ @metrics.each_value(&:clear)
236
+ # Also clear storage adapter if using MemoryAdapter
237
+ if @storage_adapter.is_a?(Storage::MemoryAdapter)
238
+ # Clear all by using a very large time period (100 years in seconds)
239
+ @storage_adapter.cleanup(older_than: 100 * 365 * 24 * 60 * 60)
240
+ end
241
+ end
242
+ end
243
+
244
+ # Get current metrics count
245
+ def metrics_count
246
+ synchronize do
247
+ # Use in-memory metrics for MemoryAdapter (to maintain backward compatibility)
248
+ # Only delegate to ActiveRecordAdapter for persistent storage
249
+ use_storage = @storage_adapter.respond_to?(:metrics_count) &&
250
+ !@storage_adapter.is_a?(Storage::MemoryAdapter)
251
+
252
+ return @storage_adapter.metrics_count if use_storage
253
+
254
+ # Use in-memory
255
+ @metrics.transform_values(&:size)
256
+ end
257
+ end
258
+
259
+ # Cleanup old metrics from persistent storage
260
+ def cleanup_old_metrics_from_storage(older_than:)
261
+ synchronize do
262
+ return 0 unless @storage_adapter.respond_to?(:cleanup)
263
+
264
+ @storage_adapter.cleanup(older_than: older_than)
265
+ end
266
+ end
267
+
268
+ private
269
+
270
+ def freeze_config
271
+ @window_size.freeze
272
+ end
273
+
274
+ def initialize_storage_adapter(storage_option, window_size)
275
+ case storage_option
276
+ when :auto
277
+ # Auto-detect: prefer ActiveRecord if available
278
+ if defined?(DecisionAgent::Monitoring::Storage::ActiveRecordAdapter) &&
279
+ DecisionAgent::Monitoring::Storage::ActiveRecordAdapter.available?
280
+ DecisionAgent::Monitoring::Storage::ActiveRecordAdapter.new
281
+ else
282
+ DecisionAgent::Monitoring::Storage::MemoryAdapter.new(window_size: window_size)
283
+ end
284
+ when :activerecord, :database
285
+ unless defined?(DecisionAgent::Monitoring::Storage::ActiveRecordAdapter)
286
+ raise "ActiveRecord adapter not available. Install models or use :memory storage."
287
+ end
288
+
289
+ DecisionAgent::Monitoring::Storage::ActiveRecordAdapter.new
290
+ when :memory
291
+ DecisionAgent::Monitoring::Storage::MemoryAdapter.new(window_size: window_size)
292
+ when Symbol
293
+ raise ArgumentError, "Unknown storage option: #{storage_option}. Use :auto, :activerecord, or :memory"
294
+ else
295
+ # Custom adapter instance provided
296
+ storage_option
297
+ end
298
+ end
299
+
300
+ def determine_decision_status(decision)
301
+ return "success" if decision.confidence >= 0.7
302
+ return "failure" if decision.confidence < 0.3
303
+
304
+ "success" # Default for medium confidence
305
+ end
306
+
307
+ def determine_error_severity(error)
308
+ case error
309
+ when ArgumentError, TypeError
310
+ "medium"
311
+ when StandardError
312
+ "low"
313
+ when Exception
314
+ "critical"
315
+ else
316
+ "low"
317
+ end
318
+ end
319
+
320
+ def cleanup_old_metrics!
321
+ cutoff_time = Time.now.utc - @window_size
322
+
323
+ @metrics.each_value do |data|
324
+ data.delete_if { |m| m[:timestamp] < cutoff_time }
325
+ end
326
+ end
327
+
328
+ def filter_by_time(data, start_time)
329
+ return data unless start_time
330
+
331
+ data.select { |m| m[:timestamp] >= start_time }
332
+ end
333
+
334
+ def compute_decision_stats(decisions)
335
+ return {} if decisions.empty?
336
+
337
+ confidences = decisions.map { |d| d[:confidence] }
338
+ durations = decisions.map { |d| d[:duration_ms] }.compact
339
+
340
+ decision_distribution = decisions.group_by { |d| d[:decision] }
341
+ .transform_values(&:size)
342
+
343
+ {
344
+ total: decisions.size,
345
+ avg_confidence: (confidences.sum / confidences.size.to_f).round(4),
346
+ min_confidence: confidences.min.round(4),
347
+ max_confidence: confidences.max.round(4),
348
+ decision_distribution: decision_distribution,
349
+ avg_duration_ms: durations.empty? ? nil : (durations.sum / durations.size.to_f).round(2),
350
+ evaluators_used: decisions.flat_map { |d| d[:evaluator_names] }.uniq
351
+ }
352
+ end
353
+
354
+ def compute_evaluation_stats(evaluations)
355
+ return {} if evaluations.empty?
356
+
357
+ weights = evaluations.map { |e| e[:weight] }
358
+ evaluator_distribution = evaluations.group_by { |e| e[:evaluator_name] }
359
+ .transform_values(&:size)
360
+
361
+ {
362
+ total: evaluations.size,
363
+ avg_weight: (weights.sum / weights.size.to_f).round(4),
364
+ evaluator_distribution: evaluator_distribution,
365
+ decision_distribution: evaluations.group_by { |e| e[:decision] }
366
+ .transform_values(&:size)
367
+ }
368
+ end
369
+
370
+ def compute_performance_stats(performance)
371
+ return {} if performance.empty?
372
+
373
+ durations = performance.map { |p| p[:duration_ms] }
374
+ successes = performance.count { |p| p[:success] }
375
+
376
+ {
377
+ total_operations: performance.size,
378
+ successful: successes,
379
+ failed: performance.size - successes,
380
+ success_rate: (successes / performance.size.to_f).round(4),
381
+ avg_duration_ms: (durations.sum / durations.size.to_f).round(2),
382
+ min_duration_ms: durations.min.round(2),
383
+ max_duration_ms: durations.max.round(2),
384
+ p95_duration_ms: percentile(durations, 0.95).round(2),
385
+ p99_duration_ms: percentile(durations, 0.99).round(2)
386
+ }
387
+ end
388
+
389
+ def compute_error_stats(errors)
390
+ return {} if errors.empty?
391
+
392
+ {
393
+ total: errors.size,
394
+ by_type: errors.group_by { |e| e[:error_class] }.transform_values(&:size),
395
+ recent_errors: errors.last(10).map do |e|
396
+ {
397
+ timestamp: e[:timestamp],
398
+ error: e[:error_class],
399
+ message: e[:error_message]
400
+ }
401
+ end
402
+ }
403
+ end
404
+
405
+ def percentile(array, percentile)
406
+ return 0 if array.empty?
407
+
408
+ sorted = array.sort
409
+ index = (percentile * sorted.length).ceil - 1
410
+ sorted[[index, 0].max]
411
+ end
412
+
413
+ def notify_observers(event_type, metric)
414
+ @observers.each do |observer|
415
+ observer.call(event_type, metric)
416
+ rescue StandardError => e
417
+ # Silently fail observer notifications to prevent disruption
418
+ warn "Observer notification failed: #{e.message}"
419
+ end
420
+ end
421
+ end
422
+ end
423
+ end
@@ -0,0 +1,71 @@
1
+ module DecisionAgent
2
+ module Monitoring
3
+ # Wrapper around Agent that automatically records metrics
4
+ class MonitoredAgent
5
+ attr_reader :agent, :metrics_collector
6
+
7
+ def initialize(agent:, metrics_collector:)
8
+ @agent = agent
9
+ @metrics_collector = metrics_collector
10
+ end
11
+
12
+ # Make a decision and automatically record metrics
13
+ def decide(context:, feedback: {})
14
+ ctx = context.is_a?(Context) ? context : Context.new(context)
15
+
16
+ start_time = Time.now
17
+
18
+ begin
19
+ result = @agent.decide(context: ctx, feedback: feedback)
20
+ duration_ms = (Time.now - start_time) * 1000
21
+
22
+ # Record decision metrics
23
+ @metrics_collector.record_decision(result, ctx, duration_ms: duration_ms)
24
+
25
+ # Record each evaluation
26
+ result.evaluations.each do |evaluation|
27
+ @metrics_collector.record_evaluation(evaluation)
28
+ end
29
+
30
+ # Record successful performance
31
+ @metrics_collector.record_performance(
32
+ operation: "decide",
33
+ duration_ms: duration_ms,
34
+ success: true,
35
+ metadata: {
36
+ evaluators_count: result.evaluations.size,
37
+ decision: result.decision,
38
+ confidence: result.confidence
39
+ }
40
+ )
41
+
42
+ result
43
+ rescue StandardError => e
44
+ duration_ms = (Time.now - start_time) * 1000
45
+
46
+ # Record error
47
+ @metrics_collector.record_error(e, context: ctx.to_h)
48
+
49
+ # Record failed performance
50
+ @metrics_collector.record_performance(
51
+ operation: "decide",
52
+ duration_ms: duration_ms,
53
+ success: false,
54
+ metadata: { error_class: e.class.name }
55
+ )
56
+
57
+ raise
58
+ end
59
+ end
60
+
61
+ # Delegate other methods to the wrapped agent
62
+ def method_missing(method, ...)
63
+ @agent.send(method, ...)
64
+ end
65
+
66
+ def respond_to_missing?(method, include_private = false)
67
+ @agent.respond_to?(method, include_private) || super
68
+ end
69
+ end
70
+ end
71
+ end