decision_agent 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -919
  3. data/bin/decision_agent +5 -5
  4. data/lib/decision_agent/agent.rb +19 -26
  5. data/lib/decision_agent/audit/null_adapter.rb +1 -2
  6. data/lib/decision_agent/decision.rb +3 -1
  7. data/lib/decision_agent/dsl/condition_evaluator.rb +4 -3
  8. data/lib/decision_agent/dsl/rule_parser.rb +4 -6
  9. data/lib/decision_agent/dsl/schema_validator.rb +27 -31
  10. data/lib/decision_agent/errors.rb +21 -6
  11. data/lib/decision_agent/evaluation.rb +3 -1
  12. data/lib/decision_agent/evaluation_validator.rb +78 -0
  13. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +26 -0
  14. data/lib/decision_agent/evaluators/static_evaluator.rb +2 -6
  15. data/lib/decision_agent/monitoring/alert_manager.rb +282 -0
  16. data/lib/decision_agent/monitoring/dashboard/public/dashboard.css +381 -0
  17. data/lib/decision_agent/monitoring/dashboard/public/dashboard.js +471 -0
  18. data/lib/decision_agent/monitoring/dashboard/public/index.html +161 -0
  19. data/lib/decision_agent/monitoring/dashboard_server.rb +340 -0
  20. data/lib/decision_agent/monitoring/metrics_collector.rb +278 -0
  21. data/lib/decision_agent/monitoring/monitored_agent.rb +71 -0
  22. data/lib/decision_agent/monitoring/prometheus_exporter.rb +247 -0
  23. data/lib/decision_agent/replay/replay.rb +12 -22
  24. data/lib/decision_agent/scoring/base.rb +1 -1
  25. data/lib/decision_agent/scoring/consensus.rb +5 -5
  26. data/lib/decision_agent/scoring/weighted_average.rb +1 -1
  27. data/lib/decision_agent/version.rb +1 -1
  28. data/lib/decision_agent/versioning/activerecord_adapter.rb +141 -0
  29. data/lib/decision_agent/versioning/adapter.rb +100 -0
  30. data/lib/decision_agent/versioning/file_storage_adapter.rb +290 -0
  31. data/lib/decision_agent/versioning/version_manager.rb +127 -0
  32. data/lib/decision_agent/web/public/app.js +318 -0
  33. data/lib/decision_agent/web/public/index.html +56 -1
  34. data/lib/decision_agent/web/public/styles.css +219 -0
  35. data/lib/decision_agent/web/server.rb +169 -9
  36. data/lib/decision_agent.rb +11 -0
  37. data/lib/generators/decision_agent/install/install_generator.rb +40 -0
  38. data/lib/generators/decision_agent/install/templates/README +47 -0
  39. data/lib/generators/decision_agent/install/templates/migration.rb +37 -0
  40. data/lib/generators/decision_agent/install/templates/rule.rb +30 -0
  41. data/lib/generators/decision_agent/install/templates/rule_version.rb +66 -0
  42. data/spec/activerecord_thread_safety_spec.rb +553 -0
  43. data/spec/agent_spec.rb +13 -13
  44. data/spec/api_contract_spec.rb +16 -16
  45. data/spec/audit_adapters_spec.rb +3 -3
  46. data/spec/comprehensive_edge_cases_spec.rb +86 -86
  47. data/spec/dsl_validation_spec.rb +83 -83
  48. data/spec/edge_cases_spec.rb +23 -23
  49. data/spec/examples/feedback_aware_evaluator_spec.rb +7 -7
  50. data/spec/examples.txt +548 -0
  51. data/spec/issue_verification_spec.rb +685 -0
  52. data/spec/json_rule_evaluator_spec.rb +15 -15
  53. data/spec/monitoring/alert_manager_spec.rb +378 -0
  54. data/spec/monitoring/metrics_collector_spec.rb +281 -0
  55. data/spec/monitoring/monitored_agent_spec.rb +222 -0
  56. data/spec/monitoring/prometheus_exporter_spec.rb +242 -0
  57. data/spec/replay_edge_cases_spec.rb +58 -58
  58. data/spec/replay_spec.rb +11 -11
  59. data/spec/rfc8785_canonicalization_spec.rb +215 -0
  60. data/spec/scoring_spec.rb +1 -1
  61. data/spec/spec_helper.rb +9 -0
  62. data/spec/thread_safety_spec.rb +482 -0
  63. data/spec/thread_safety_spec.rb.broken +878 -0
  64. data/spec/versioning_spec.rb +777 -0
  65. data/spec/web_ui_rack_spec.rb +135 -0
  66. metadata +84 -11
@@ -0,0 +1,281 @@
1
+ require "spec_helper"
2
+ require "decision_agent/monitoring/metrics_collector"
3
+
4
+ RSpec.describe DecisionAgent::Monitoring::MetricsCollector do
5
+ let(:collector) { described_class.new(window_size: 60) }
6
+ let(:decision) do
7
+ double(
8
+ "Decision",
9
+ decision: "approve",
10
+ confidence: 0.85,
11
+ evaluations: [
12
+ double("Evaluation", evaluator_name: "test_evaluator")
13
+ ]
14
+ )
15
+ end
16
+ let(:context) { double("Context", to_h: { user: "test" }) }
17
+
18
+ describe "#initialize" do
19
+ it "initializes with default window size" do
20
+ collector = described_class.new
21
+ expect(collector.window_size).to eq(3600)
22
+ end
23
+
24
+ it "initializes with custom window size" do
25
+ expect(collector.window_size).to eq(60)
26
+ end
27
+
28
+ it "initializes empty metrics" do
29
+ counts = collector.metrics_count
30
+ expect(counts[:decisions]).to eq(0)
31
+ expect(counts[:evaluations]).to eq(0)
32
+ expect(counts[:performance]).to eq(0)
33
+ expect(counts[:errors]).to eq(0)
34
+ end
35
+ end
36
+
37
+ describe "#record_decision" do
38
+ it "records a decision metric" do
39
+ metric = collector.record_decision(decision, context, duration_ms: 10.5)
40
+
41
+ expect(metric[:decision]).to eq("approve")
42
+ expect(metric[:confidence]).to eq(0.85)
43
+ expect(metric[:duration_ms]).to eq(10.5)
44
+ expect(metric[:context_size]).to eq(1)
45
+ expect(metric[:evaluations_count]).to eq(1)
46
+ expect(metric[:evaluator_names]).to eq(["test_evaluator"])
47
+ end
48
+
49
+ it "increments decision count" do
50
+ expect do
51
+ collector.record_decision(decision, context)
52
+ end.to change { collector.metrics_count[:decisions] }.by(1)
53
+ end
54
+
55
+ it "notifies observers" do
56
+ observed = []
57
+ collector.add_observer do |type, metric|
58
+ observed << [type, metric]
59
+ end
60
+
61
+ collector.record_decision(decision, context)
62
+
63
+ expect(observed.size).to eq(1)
64
+ expect(observed[0][0]).to eq(:decision)
65
+ expect(observed[0][1][:decision]).to eq("approve")
66
+ end
67
+ end
68
+
69
+ describe "#record_evaluation" do
70
+ let(:evaluation) do
71
+ double(
72
+ "Evaluation",
73
+ decision: "approve",
74
+ weight: 0.9,
75
+ evaluator_name: "test_evaluator"
76
+ )
77
+ end
78
+
79
+ it "records an evaluation metric" do
80
+ metric = collector.record_evaluation(evaluation)
81
+
82
+ expect(metric[:decision]).to eq("approve")
83
+ expect(metric[:weight]).to eq(0.9)
84
+ expect(metric[:evaluator_name]).to eq("test_evaluator")
85
+ end
86
+
87
+ it "increments evaluation count" do
88
+ expect do
89
+ collector.record_evaluation(evaluation)
90
+ end.to change { collector.metrics_count[:evaluations] }.by(1)
91
+ end
92
+ end
93
+
94
+ describe "#record_performance" do
95
+ it "records performance metrics" do
96
+ metric = collector.record_performance(
97
+ operation: "decide",
98
+ duration_ms: 25.5,
99
+ success: true,
100
+ metadata: { evaluators: 2 }
101
+ )
102
+
103
+ expect(metric[:operation]).to eq("decide")
104
+ expect(metric[:duration_ms]).to eq(25.5)
105
+ expect(metric[:success]).to be true
106
+ expect(metric[:metadata]).to eq({ evaluators: 2 })
107
+ end
108
+
109
+ it "records failed operations" do
110
+ metric = collector.record_performance(
111
+ operation: "decide",
112
+ duration_ms: 10.0,
113
+ success: false
114
+ )
115
+
116
+ expect(metric[:success]).to be false
117
+ end
118
+ end
119
+
120
+ describe "#record_error" do
121
+ let(:error) { StandardError.new("Test error") }
122
+
123
+ it "records error metrics" do
124
+ metric = collector.record_error(error, context: { user_id: 123 })
125
+
126
+ expect(metric[:error_class]).to eq("StandardError")
127
+ expect(metric[:error_message]).to eq("Test error")
128
+ expect(metric[:context]).to eq({ user_id: 123 })
129
+ end
130
+
131
+ it "increments error count" do
132
+ expect do
133
+ collector.record_error(error)
134
+ end.to change { collector.metrics_count[:errors] }.by(1)
135
+ end
136
+ end
137
+
138
+ describe "#statistics" do
139
+ before do
140
+ # Record some metrics
141
+ 5.times do |i|
142
+ collector.record_decision(decision, context, duration_ms: (i + 1) * 10)
143
+ end
144
+
145
+ 2.times do
146
+ collector.record_performance(operation: "decide", duration_ms: 15.0, success: true)
147
+ end
148
+ collector.record_performance(operation: "decide", duration_ms: 20.0, success: false)
149
+
150
+ collector.record_error(StandardError.new("Error 1"))
151
+ end
152
+
153
+ it "returns summary statistics" do
154
+ stats = collector.statistics
155
+
156
+ expect(stats[:summary][:total_decisions]).to eq(5)
157
+ expect(stats[:summary][:total_evaluations]).to eq(0)
158
+ expect(stats[:summary][:total_errors]).to eq(1)
159
+ end
160
+
161
+ it "computes decision statistics" do
162
+ stats = collector.statistics
163
+
164
+ expect(stats[:decisions][:total]).to eq(5)
165
+ expect(stats[:decisions][:avg_confidence]).to eq(0.85)
166
+ expect(stats[:decisions][:min_confidence]).to eq(0.85)
167
+ expect(stats[:decisions][:max_confidence]).to eq(0.85)
168
+ expect(stats[:decisions][:avg_duration_ms]).to be_within(0.1).of(30.0)
169
+ end
170
+
171
+ it "computes performance statistics" do
172
+ stats = collector.statistics
173
+
174
+ expect(stats[:performance][:total_operations]).to eq(3)
175
+ expect(stats[:performance][:successful]).to eq(2)
176
+ expect(stats[:performance][:failed]).to eq(1)
177
+ expect(stats[:performance][:success_rate]).to be_within(0.01).of(0.6667)
178
+ end
179
+
180
+ it "computes error statistics" do
181
+ stats = collector.statistics
182
+
183
+ expect(stats[:errors][:total]).to eq(1)
184
+ expect(stats[:errors][:by_type]["StandardError"]).to eq(1)
185
+ end
186
+
187
+ it "filters by time range" do
188
+ stats = collector.statistics(time_range: 30)
189
+ expect(stats[:summary][:time_range]).to eq("Last 30s")
190
+ end
191
+ end
192
+
193
+ describe "#time_series" do
194
+ before do
195
+ 10.times do
196
+ collector.record_decision(decision, context)
197
+ sleep 0.01 # Small delay to ensure different buckets
198
+ end
199
+ end
200
+
201
+ it "returns time series data" do
202
+ series = collector.time_series(metric_type: :decisions, bucket_size: 1, time_range: 60)
203
+
204
+ expect(series).to be_an(Array)
205
+ expect(series.first).to have_key(:timestamp)
206
+ expect(series.first).to have_key(:count)
207
+ expect(series.first).to have_key(:metrics)
208
+ end
209
+
210
+ it "buckets metrics by time" do
211
+ series = collector.time_series(metric_type: :decisions, bucket_size: 60, time_range: 3600)
212
+
213
+ total_count = series.sum { |s| s[:count] }
214
+ expect(total_count).to eq(10)
215
+ end
216
+ end
217
+
218
+ describe "#clear!" do
219
+ before do
220
+ collector.record_decision(decision, context)
221
+ collector.record_error(StandardError.new("Test"))
222
+ end
223
+
224
+ it "clears all metrics" do
225
+ collector.clear!
226
+
227
+ counts = collector.metrics_count
228
+ expect(counts[:decisions]).to eq(0)
229
+ expect(counts[:errors]).to eq(0)
230
+ end
231
+ end
232
+
233
+ describe "thread safety" do
234
+ it "handles concurrent writes safely" do
235
+ threads = 10.times.map do
236
+ Thread.new do
237
+ 10.times do
238
+ collector.record_decision(decision, context)
239
+ end
240
+ end
241
+ end
242
+
243
+ threads.each(&:join)
244
+
245
+ expect(collector.metrics_count[:decisions]).to eq(100)
246
+ end
247
+
248
+ it "handles concurrent reads and writes" do
249
+ writer = Thread.new do
250
+ 50.times do
251
+ collector.record_decision(decision, context)
252
+ sleep 0.001
253
+ end
254
+ end
255
+
256
+ reader = Thread.new do
257
+ 50.times do
258
+ collector.statistics
259
+ sleep 0.001
260
+ end
261
+ end
262
+
263
+ expect { writer.join && reader.join }.not_to raise_error
264
+ end
265
+ end
266
+
267
+ describe "metric cleanup" do
268
+ it "removes old metrics outside window" do
269
+ collector = described_class.new(window_size: 1)
270
+
271
+ collector.record_decision(decision, context)
272
+ expect(collector.metrics_count[:decisions]).to eq(1)
273
+
274
+ sleep 1.5
275
+
276
+ collector.record_decision(decision, context)
277
+ # Old metric should be cleaned up
278
+ expect(collector.metrics_count[:decisions]).to eq(1)
279
+ end
280
+ end
281
+ end
@@ -0,0 +1,222 @@
1
+ require "spec_helper"
2
+ require "decision_agent/monitoring/metrics_collector"
3
+ require "decision_agent/monitoring/monitored_agent"
4
+
5
+ RSpec.describe DecisionAgent::Monitoring::MonitoredAgent do
6
+ let(:collector) { DecisionAgent::Monitoring::MetricsCollector.new }
7
+ let(:evaluator) do
8
+ double(
9
+ "Evaluator",
10
+ evaluate: DecisionAgent::Evaluation.new(
11
+ decision: "approve",
12
+ weight: 0.9,
13
+ reason: "Test reason",
14
+ evaluator_name: "test_evaluator"
15
+ )
16
+ )
17
+ end
18
+ let(:agent) { DecisionAgent::Agent.new(evaluators: [evaluator]) }
19
+ let(:monitored_agent) { described_class.new(agent: agent, metrics_collector: collector) }
20
+
21
+ describe "#initialize" do
22
+ it "wraps an agent with metrics collection" do
23
+ expect(monitored_agent.agent).to eq(agent)
24
+ expect(monitored_agent.metrics_collector).to eq(collector)
25
+ end
26
+ end
27
+
28
+ describe "#decide" do
29
+ let(:context) { { amount: 1000 } }
30
+
31
+ it "makes a decision and records metrics" do
32
+ result = monitored_agent.decide(context: context)
33
+
34
+ expect(result).to be_a(DecisionAgent::Decision)
35
+ expect(result.decision).to eq("approve")
36
+ expect(collector.metrics_count[:decisions]).to eq(1)
37
+ expect(collector.metrics_count[:evaluations]).to eq(1)
38
+ expect(collector.metrics_count[:performance]).to eq(1)
39
+ end
40
+
41
+ it "records decision metrics with duration" do
42
+ monitored_agent.decide(context: context)
43
+
44
+ stats = collector.statistics
45
+ expect(stats[:decisions][:total]).to eq(1)
46
+ expect(stats[:decisions][:avg_duration_ms]).to be > 0
47
+ end
48
+
49
+ it "records evaluation metrics" do
50
+ monitored_agent.decide(context: context)
51
+
52
+ stats = collector.statistics
53
+ expect(stats[:evaluations][:total]).to eq(1)
54
+ expect(stats[:evaluations][:evaluator_distribution]["test_evaluator"]).to eq(1)
55
+ end
56
+
57
+ it "records performance metrics as successful" do
58
+ monitored_agent.decide(context: context)
59
+
60
+ stats = collector.statistics
61
+ expect(stats[:performance][:total_operations]).to eq(1)
62
+ expect(stats[:performance][:successful]).to eq(1)
63
+ expect(stats[:performance][:success_rate]).to eq(1.0)
64
+ end
65
+
66
+ it "includes metadata in performance metrics" do
67
+ monitored_agent.decide(context: context)
68
+
69
+ collector.statistics
70
+ perf_metric = collector.instance_variable_get(:@metrics)[:performance].first
71
+
72
+ expect(perf_metric[:metadata][:evaluators_count]).to eq(1)
73
+ expect(perf_metric[:metadata][:decision]).to eq("approve")
74
+ expect(perf_metric[:metadata][:confidence]).to be_a(Float)
75
+ end
76
+
77
+ context "when decision fails" do
78
+ before do
79
+ allow(agent).to receive(:decide).and_raise(StandardError.new("Test error"))
80
+ end
81
+
82
+ it "records error metrics" do
83
+ expect do
84
+ monitored_agent.decide(context: context)
85
+ end.to raise_error(StandardError, "Test error")
86
+
87
+ expect(collector.metrics_count[:errors]).to eq(1)
88
+ end
89
+
90
+ it "records failed performance metrics" do
91
+ expect do
92
+ monitored_agent.decide(context: context)
93
+ end.to raise_error(StandardError)
94
+
95
+ stats = collector.statistics
96
+ expect(stats[:performance][:total_operations]).to eq(1)
97
+ expect(stats[:performance][:failed]).to eq(1)
98
+ expect(stats[:performance][:success_rate]).to eq(0.0)
99
+ end
100
+
101
+ it "includes error details in metrics" do
102
+ expect do
103
+ monitored_agent.decide(context: context)
104
+ end.to raise_error(StandardError)
105
+
106
+ error_metric = collector.instance_variable_get(:@metrics)[:errors].first
107
+ expect(error_metric[:error_class]).to eq("StandardError")
108
+ expect(error_metric[:error_message]).to eq("Test error")
109
+ expect(error_metric[:context]).to eq(context)
110
+ end
111
+
112
+ it "re-raises the error" do
113
+ expect do
114
+ monitored_agent.decide(context: context)
115
+ end.to raise_error(StandardError, "Test error")
116
+ end
117
+ end
118
+
119
+ it "handles Context objects" do
120
+ ctx = DecisionAgent::Context.new(context)
121
+ result = monitored_agent.decide(context: ctx)
122
+
123
+ expect(result).to be_a(DecisionAgent::Decision)
124
+ expect(collector.metrics_count[:decisions]).to eq(1)
125
+ end
126
+
127
+ it "handles hash contexts" do
128
+ result = monitored_agent.decide(context: context)
129
+
130
+ expect(result).to be_a(DecisionAgent::Decision)
131
+ expect(collector.metrics_count[:decisions]).to eq(1)
132
+ end
133
+
134
+ it "measures decision duration accurately" do
135
+ # Mock agent to introduce delay
136
+ allow(agent).to receive(:decide) do |*args|
137
+ sleep 0.01 # 10ms delay
138
+ evaluator.evaluate(args.first)
139
+ DecisionAgent::Decision.new(
140
+ decision: "approve",
141
+ confidence: 0.9,
142
+ explanations: ["Test"],
143
+ evaluations: [evaluator.evaluate(args.first)],
144
+ audit_payload: {}
145
+ )
146
+ end
147
+
148
+ monitored_agent.decide(context: context)
149
+
150
+ stats = collector.statistics
151
+ expect(stats[:decisions][:avg_duration_ms]).to be >= 10
152
+ end
153
+ end
154
+
155
+ describe "method delegation" do
156
+ it "delegates methods to wrapped agent" do
157
+ expect(monitored_agent.evaluators).to eq(agent.evaluators)
158
+ expect(monitored_agent.scoring_strategy).to eq(agent.scoring_strategy)
159
+ expect(monitored_agent.audit_adapter).to eq(agent.audit_adapter)
160
+ end
161
+
162
+ it "responds to agent methods" do
163
+ expect(monitored_agent).to respond_to(:evaluators)
164
+ expect(monitored_agent).to respond_to(:scoring_strategy)
165
+ expect(monitored_agent).to respond_to(:audit_adapter)
166
+ end
167
+ end
168
+
169
+ describe "thread safety" do
170
+ it "handles concurrent decisions safely" do
171
+ # Materialize let variables before creating threads
172
+ test_context = { amount: 1000 }
173
+ test_monitored_agent = monitored_agent
174
+ test_collector = collector
175
+
176
+ threads = 10.times.map do
177
+ Thread.new do
178
+ 10.times do
179
+ test_monitored_agent.decide(context: test_context)
180
+ end
181
+ end
182
+ end
183
+
184
+ threads.each(&:join)
185
+
186
+ expect(test_collector.metrics_count[:decisions]).to eq(100)
187
+ expect(test_collector.metrics_count[:evaluations]).to eq(100)
188
+ expect(test_collector.metrics_count[:performance]).to eq(100)
189
+ end
190
+ end
191
+
192
+ describe "integration test" do
193
+ it "provides comprehensive metrics for multiple decisions" do
194
+ contexts = [
195
+ { amount: 500 },
196
+ { amount: 1500 },
197
+ { amount: 2000 }
198
+ ]
199
+
200
+ contexts.each do |ctx|
201
+ monitored_agent.decide(context: ctx)
202
+ end
203
+
204
+ stats = collector.statistics
205
+
206
+ # Summary
207
+ expect(stats[:summary][:total_decisions]).to eq(3)
208
+ expect(stats[:summary][:total_evaluations]).to eq(3)
209
+ expect(stats[:summary][:total_errors]).to eq(0)
210
+
211
+ # Decision stats
212
+ expect(stats[:decisions][:total]).to eq(3)
213
+ expect(stats[:decisions][:avg_confidence]).to be > 0
214
+ expect(stats[:decisions][:decision_distribution]["approve"]).to eq(3)
215
+
216
+ # Performance stats
217
+ expect(stats[:performance][:total_operations]).to eq(3)
218
+ expect(stats[:performance][:success_rate]).to eq(1.0)
219
+ expect(stats[:performance][:avg_duration_ms]).to be > 0
220
+ end
221
+ end
222
+ end