decision_agent 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +212 -35
- data/bin/decision_agent +3 -8
- data/lib/decision_agent/agent.rb +19 -26
- data/lib/decision_agent/audit/null_adapter.rb +1 -2
- data/lib/decision_agent/decision.rb +3 -1
- data/lib/decision_agent/dsl/condition_evaluator.rb +4 -3
- data/lib/decision_agent/dsl/rule_parser.rb +4 -6
- data/lib/decision_agent/dsl/schema_validator.rb +27 -31
- data/lib/decision_agent/errors.rb +11 -8
- data/lib/decision_agent/evaluation.rb +3 -1
- data/lib/decision_agent/evaluation_validator.rb +78 -0
- data/lib/decision_agent/evaluators/json_rule_evaluator.rb +26 -0
- data/lib/decision_agent/evaluators/static_evaluator.rb +2 -6
- data/lib/decision_agent/monitoring/alert_manager.rb +282 -0
- data/lib/decision_agent/monitoring/dashboard/public/dashboard.css +381 -0
- data/lib/decision_agent/monitoring/dashboard/public/dashboard.js +471 -0
- data/lib/decision_agent/monitoring/dashboard/public/index.html +161 -0
- data/lib/decision_agent/monitoring/dashboard_server.rb +340 -0
- data/lib/decision_agent/monitoring/metrics_collector.rb +278 -0
- data/lib/decision_agent/monitoring/monitored_agent.rb +71 -0
- data/lib/decision_agent/monitoring/prometheus_exporter.rb +247 -0
- data/lib/decision_agent/replay/replay.rb +12 -22
- data/lib/decision_agent/scoring/base.rb +1 -1
- data/lib/decision_agent/scoring/consensus.rb +5 -5
- data/lib/decision_agent/scoring/weighted_average.rb +1 -1
- data/lib/decision_agent/version.rb +1 -1
- data/lib/decision_agent/versioning/activerecord_adapter.rb +69 -33
- data/lib/decision_agent/versioning/adapter.rb +1 -3
- data/lib/decision_agent/versioning/file_storage_adapter.rb +143 -35
- data/lib/decision_agent/versioning/version_manager.rb +4 -12
- data/lib/decision_agent/web/public/index.html +1 -1
- data/lib/decision_agent/web/server.rb +19 -24
- data/lib/decision_agent.rb +7 -0
- data/lib/generators/decision_agent/install/install_generator.rb +5 -5
- data/lib/generators/decision_agent/install/templates/migration.rb +17 -6
- data/lib/generators/decision_agent/install/templates/rule.rb +3 -3
- data/lib/generators/decision_agent/install/templates/rule_version.rb +13 -7
- data/spec/activerecord_thread_safety_spec.rb +553 -0
- data/spec/agent_spec.rb +13 -13
- data/spec/api_contract_spec.rb +16 -16
- data/spec/audit_adapters_spec.rb +3 -3
- data/spec/comprehensive_edge_cases_spec.rb +86 -86
- data/spec/dsl_validation_spec.rb +83 -83
- data/spec/edge_cases_spec.rb +23 -23
- data/spec/examples/feedback_aware_evaluator_spec.rb +7 -7
- data/spec/examples.txt +548 -0
- data/spec/issue_verification_spec.rb +685 -0
- data/spec/json_rule_evaluator_spec.rb +15 -15
- data/spec/monitoring/alert_manager_spec.rb +378 -0
- data/spec/monitoring/metrics_collector_spec.rb +281 -0
- data/spec/monitoring/monitored_agent_spec.rb +222 -0
- data/spec/monitoring/prometheus_exporter_spec.rb +242 -0
- data/spec/replay_edge_cases_spec.rb +58 -58
- data/spec/replay_spec.rb +11 -11
- data/spec/rfc8785_canonicalization_spec.rb +215 -0
- data/spec/scoring_spec.rb +1 -1
- data/spec/spec_helper.rb +9 -0
- data/spec/thread_safety_spec.rb +482 -0
- data/spec/thread_safety_spec.rb.broken +878 -0
- data/spec/versioning_spec.rb +141 -37
- data/spec/web_ui_rack_spec.rb +135 -0
- metadata +69 -6
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
require "spec_helper"
|
|
2
|
+
require "decision_agent/monitoring/metrics_collector"
|
|
3
|
+
|
|
4
|
+
RSpec.describe DecisionAgent::Monitoring::MetricsCollector do
|
|
5
|
+
let(:collector) { described_class.new(window_size: 60) }
|
|
6
|
+
let(:decision) do
|
|
7
|
+
double(
|
|
8
|
+
"Decision",
|
|
9
|
+
decision: "approve",
|
|
10
|
+
confidence: 0.85,
|
|
11
|
+
evaluations: [
|
|
12
|
+
double("Evaluation", evaluator_name: "test_evaluator")
|
|
13
|
+
]
|
|
14
|
+
)
|
|
15
|
+
end
|
|
16
|
+
let(:context) { double("Context", to_h: { user: "test" }) }
|
|
17
|
+
|
|
18
|
+
describe "#initialize" do
|
|
19
|
+
it "initializes with default window size" do
|
|
20
|
+
collector = described_class.new
|
|
21
|
+
expect(collector.window_size).to eq(3600)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
it "initializes with custom window size" do
|
|
25
|
+
expect(collector.window_size).to eq(60)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it "initializes empty metrics" do
|
|
29
|
+
counts = collector.metrics_count
|
|
30
|
+
expect(counts[:decisions]).to eq(0)
|
|
31
|
+
expect(counts[:evaluations]).to eq(0)
|
|
32
|
+
expect(counts[:performance]).to eq(0)
|
|
33
|
+
expect(counts[:errors]).to eq(0)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
describe "#record_decision" do
|
|
38
|
+
it "records a decision metric" do
|
|
39
|
+
metric = collector.record_decision(decision, context, duration_ms: 10.5)
|
|
40
|
+
|
|
41
|
+
expect(metric[:decision]).to eq("approve")
|
|
42
|
+
expect(metric[:confidence]).to eq(0.85)
|
|
43
|
+
expect(metric[:duration_ms]).to eq(10.5)
|
|
44
|
+
expect(metric[:context_size]).to eq(1)
|
|
45
|
+
expect(metric[:evaluations_count]).to eq(1)
|
|
46
|
+
expect(metric[:evaluator_names]).to eq(["test_evaluator"])
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it "increments decision count" do
|
|
50
|
+
expect do
|
|
51
|
+
collector.record_decision(decision, context)
|
|
52
|
+
end.to change { collector.metrics_count[:decisions] }.by(1)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "notifies observers" do
|
|
56
|
+
observed = []
|
|
57
|
+
collector.add_observer do |type, metric|
|
|
58
|
+
observed << [type, metric]
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
collector.record_decision(decision, context)
|
|
62
|
+
|
|
63
|
+
expect(observed.size).to eq(1)
|
|
64
|
+
expect(observed[0][0]).to eq(:decision)
|
|
65
|
+
expect(observed[0][1][:decision]).to eq("approve")
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
describe "#record_evaluation" do
|
|
70
|
+
let(:evaluation) do
|
|
71
|
+
double(
|
|
72
|
+
"Evaluation",
|
|
73
|
+
decision: "approve",
|
|
74
|
+
weight: 0.9,
|
|
75
|
+
evaluator_name: "test_evaluator"
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "records an evaluation metric" do
|
|
80
|
+
metric = collector.record_evaluation(evaluation)
|
|
81
|
+
|
|
82
|
+
expect(metric[:decision]).to eq("approve")
|
|
83
|
+
expect(metric[:weight]).to eq(0.9)
|
|
84
|
+
expect(metric[:evaluator_name]).to eq("test_evaluator")
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it "increments evaluation count" do
|
|
88
|
+
expect do
|
|
89
|
+
collector.record_evaluation(evaluation)
|
|
90
|
+
end.to change { collector.metrics_count[:evaluations] }.by(1)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
describe "#record_performance" do
|
|
95
|
+
it "records performance metrics" do
|
|
96
|
+
metric = collector.record_performance(
|
|
97
|
+
operation: "decide",
|
|
98
|
+
duration_ms: 25.5,
|
|
99
|
+
success: true,
|
|
100
|
+
metadata: { evaluators: 2 }
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
expect(metric[:operation]).to eq("decide")
|
|
104
|
+
expect(metric[:duration_ms]).to eq(25.5)
|
|
105
|
+
expect(metric[:success]).to be true
|
|
106
|
+
expect(metric[:metadata]).to eq({ evaluators: 2 })
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it "records failed operations" do
|
|
110
|
+
metric = collector.record_performance(
|
|
111
|
+
operation: "decide",
|
|
112
|
+
duration_ms: 10.0,
|
|
113
|
+
success: false
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
expect(metric[:success]).to be false
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
describe "#record_error" do
|
|
121
|
+
let(:error) { StandardError.new("Test error") }
|
|
122
|
+
|
|
123
|
+
it "records error metrics" do
|
|
124
|
+
metric = collector.record_error(error, context: { user_id: 123 })
|
|
125
|
+
|
|
126
|
+
expect(metric[:error_class]).to eq("StandardError")
|
|
127
|
+
expect(metric[:error_message]).to eq("Test error")
|
|
128
|
+
expect(metric[:context]).to eq({ user_id: 123 })
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
it "increments error count" do
|
|
132
|
+
expect do
|
|
133
|
+
collector.record_error(error)
|
|
134
|
+
end.to change { collector.metrics_count[:errors] }.by(1)
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
describe "#statistics" do
|
|
139
|
+
before do
|
|
140
|
+
# Record some metrics
|
|
141
|
+
5.times do |i|
|
|
142
|
+
collector.record_decision(decision, context, duration_ms: (i + 1) * 10)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
2.times do
|
|
146
|
+
collector.record_performance(operation: "decide", duration_ms: 15.0, success: true)
|
|
147
|
+
end
|
|
148
|
+
collector.record_performance(operation: "decide", duration_ms: 20.0, success: false)
|
|
149
|
+
|
|
150
|
+
collector.record_error(StandardError.new("Error 1"))
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
it "returns summary statistics" do
|
|
154
|
+
stats = collector.statistics
|
|
155
|
+
|
|
156
|
+
expect(stats[:summary][:total_decisions]).to eq(5)
|
|
157
|
+
expect(stats[:summary][:total_evaluations]).to eq(0)
|
|
158
|
+
expect(stats[:summary][:total_errors]).to eq(1)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
it "computes decision statistics" do
|
|
162
|
+
stats = collector.statistics
|
|
163
|
+
|
|
164
|
+
expect(stats[:decisions][:total]).to eq(5)
|
|
165
|
+
expect(stats[:decisions][:avg_confidence]).to eq(0.85)
|
|
166
|
+
expect(stats[:decisions][:min_confidence]).to eq(0.85)
|
|
167
|
+
expect(stats[:decisions][:max_confidence]).to eq(0.85)
|
|
168
|
+
expect(stats[:decisions][:avg_duration_ms]).to be_within(0.1).of(30.0)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
it "computes performance statistics" do
|
|
172
|
+
stats = collector.statistics
|
|
173
|
+
|
|
174
|
+
expect(stats[:performance][:total_operations]).to eq(3)
|
|
175
|
+
expect(stats[:performance][:successful]).to eq(2)
|
|
176
|
+
expect(stats[:performance][:failed]).to eq(1)
|
|
177
|
+
expect(stats[:performance][:success_rate]).to be_within(0.01).of(0.6667)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
it "computes error statistics" do
|
|
181
|
+
stats = collector.statistics
|
|
182
|
+
|
|
183
|
+
expect(stats[:errors][:total]).to eq(1)
|
|
184
|
+
expect(stats[:errors][:by_type]["StandardError"]).to eq(1)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
it "filters by time range" do
|
|
188
|
+
stats = collector.statistics(time_range: 30)
|
|
189
|
+
expect(stats[:summary][:time_range]).to eq("Last 30s")
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
describe "#time_series" do
|
|
194
|
+
before do
|
|
195
|
+
10.times do
|
|
196
|
+
collector.record_decision(decision, context)
|
|
197
|
+
sleep 0.01 # Small delay to ensure different buckets
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
it "returns time series data" do
|
|
202
|
+
series = collector.time_series(metric_type: :decisions, bucket_size: 1, time_range: 60)
|
|
203
|
+
|
|
204
|
+
expect(series).to be_an(Array)
|
|
205
|
+
expect(series.first).to have_key(:timestamp)
|
|
206
|
+
expect(series.first).to have_key(:count)
|
|
207
|
+
expect(series.first).to have_key(:metrics)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
it "buckets metrics by time" do
|
|
211
|
+
series = collector.time_series(metric_type: :decisions, bucket_size: 60, time_range: 3600)
|
|
212
|
+
|
|
213
|
+
total_count = series.sum { |s| s[:count] }
|
|
214
|
+
expect(total_count).to eq(10)
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
describe "#clear!" do
|
|
219
|
+
before do
|
|
220
|
+
collector.record_decision(decision, context)
|
|
221
|
+
collector.record_error(StandardError.new("Test"))
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
it "clears all metrics" do
|
|
225
|
+
collector.clear!
|
|
226
|
+
|
|
227
|
+
counts = collector.metrics_count
|
|
228
|
+
expect(counts[:decisions]).to eq(0)
|
|
229
|
+
expect(counts[:errors]).to eq(0)
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
describe "thread safety" do
|
|
234
|
+
it "handles concurrent writes safely" do
|
|
235
|
+
threads = 10.times.map do
|
|
236
|
+
Thread.new do
|
|
237
|
+
10.times do
|
|
238
|
+
collector.record_decision(decision, context)
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
threads.each(&:join)
|
|
244
|
+
|
|
245
|
+
expect(collector.metrics_count[:decisions]).to eq(100)
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
it "handles concurrent reads and writes" do
|
|
249
|
+
writer = Thread.new do
|
|
250
|
+
50.times do
|
|
251
|
+
collector.record_decision(decision, context)
|
|
252
|
+
sleep 0.001
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
reader = Thread.new do
|
|
257
|
+
50.times do
|
|
258
|
+
collector.statistics
|
|
259
|
+
sleep 0.001
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
expect { writer.join && reader.join }.not_to raise_error
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
describe "metric cleanup" do
|
|
268
|
+
it "removes old metrics outside window" do
|
|
269
|
+
collector = described_class.new(window_size: 1)
|
|
270
|
+
|
|
271
|
+
collector.record_decision(decision, context)
|
|
272
|
+
expect(collector.metrics_count[:decisions]).to eq(1)
|
|
273
|
+
|
|
274
|
+
sleep 1.5
|
|
275
|
+
|
|
276
|
+
collector.record_decision(decision, context)
|
|
277
|
+
# Old metric should be cleaned up
|
|
278
|
+
expect(collector.metrics_count[:decisions]).to eq(1)
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
end
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
require "spec_helper"
|
|
2
|
+
require "decision_agent/monitoring/metrics_collector"
|
|
3
|
+
require "decision_agent/monitoring/monitored_agent"
|
|
4
|
+
|
|
5
|
+
RSpec.describe DecisionAgent::Monitoring::MonitoredAgent do
|
|
6
|
+
let(:collector) { DecisionAgent::Monitoring::MetricsCollector.new }
|
|
7
|
+
let(:evaluator) do
|
|
8
|
+
double(
|
|
9
|
+
"Evaluator",
|
|
10
|
+
evaluate: DecisionAgent::Evaluation.new(
|
|
11
|
+
decision: "approve",
|
|
12
|
+
weight: 0.9,
|
|
13
|
+
reason: "Test reason",
|
|
14
|
+
evaluator_name: "test_evaluator"
|
|
15
|
+
)
|
|
16
|
+
)
|
|
17
|
+
end
|
|
18
|
+
let(:agent) { DecisionAgent::Agent.new(evaluators: [evaluator]) }
|
|
19
|
+
let(:monitored_agent) { described_class.new(agent: agent, metrics_collector: collector) }
|
|
20
|
+
|
|
21
|
+
describe "#initialize" do
|
|
22
|
+
it "wraps an agent with metrics collection" do
|
|
23
|
+
expect(monitored_agent.agent).to eq(agent)
|
|
24
|
+
expect(monitored_agent.metrics_collector).to eq(collector)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
describe "#decide" do
|
|
29
|
+
let(:context) { { amount: 1000 } }
|
|
30
|
+
|
|
31
|
+
it "makes a decision and records metrics" do
|
|
32
|
+
result = monitored_agent.decide(context: context)
|
|
33
|
+
|
|
34
|
+
expect(result).to be_a(DecisionAgent::Decision)
|
|
35
|
+
expect(result.decision).to eq("approve")
|
|
36
|
+
expect(collector.metrics_count[:decisions]).to eq(1)
|
|
37
|
+
expect(collector.metrics_count[:evaluations]).to eq(1)
|
|
38
|
+
expect(collector.metrics_count[:performance]).to eq(1)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it "records decision metrics with duration" do
|
|
42
|
+
monitored_agent.decide(context: context)
|
|
43
|
+
|
|
44
|
+
stats = collector.statistics
|
|
45
|
+
expect(stats[:decisions][:total]).to eq(1)
|
|
46
|
+
expect(stats[:decisions][:avg_duration_ms]).to be > 0
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it "records evaluation metrics" do
|
|
50
|
+
monitored_agent.decide(context: context)
|
|
51
|
+
|
|
52
|
+
stats = collector.statistics
|
|
53
|
+
expect(stats[:evaluations][:total]).to eq(1)
|
|
54
|
+
expect(stats[:evaluations][:evaluator_distribution]["test_evaluator"]).to eq(1)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
it "records performance metrics as successful" do
|
|
58
|
+
monitored_agent.decide(context: context)
|
|
59
|
+
|
|
60
|
+
stats = collector.statistics
|
|
61
|
+
expect(stats[:performance][:total_operations]).to eq(1)
|
|
62
|
+
expect(stats[:performance][:successful]).to eq(1)
|
|
63
|
+
expect(stats[:performance][:success_rate]).to eq(1.0)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it "includes metadata in performance metrics" do
|
|
67
|
+
monitored_agent.decide(context: context)
|
|
68
|
+
|
|
69
|
+
collector.statistics
|
|
70
|
+
perf_metric = collector.instance_variable_get(:@metrics)[:performance].first
|
|
71
|
+
|
|
72
|
+
expect(perf_metric[:metadata][:evaluators_count]).to eq(1)
|
|
73
|
+
expect(perf_metric[:metadata][:decision]).to eq("approve")
|
|
74
|
+
expect(perf_metric[:metadata][:confidence]).to be_a(Float)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
context "when decision fails" do
|
|
78
|
+
before do
|
|
79
|
+
allow(agent).to receive(:decide).and_raise(StandardError.new("Test error"))
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it "records error metrics" do
|
|
83
|
+
expect do
|
|
84
|
+
monitored_agent.decide(context: context)
|
|
85
|
+
end.to raise_error(StandardError, "Test error")
|
|
86
|
+
|
|
87
|
+
expect(collector.metrics_count[:errors]).to eq(1)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
it "records failed performance metrics" do
|
|
91
|
+
expect do
|
|
92
|
+
monitored_agent.decide(context: context)
|
|
93
|
+
end.to raise_error(StandardError)
|
|
94
|
+
|
|
95
|
+
stats = collector.statistics
|
|
96
|
+
expect(stats[:performance][:total_operations]).to eq(1)
|
|
97
|
+
expect(stats[:performance][:failed]).to eq(1)
|
|
98
|
+
expect(stats[:performance][:success_rate]).to eq(0.0)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
it "includes error details in metrics" do
|
|
102
|
+
expect do
|
|
103
|
+
monitored_agent.decide(context: context)
|
|
104
|
+
end.to raise_error(StandardError)
|
|
105
|
+
|
|
106
|
+
error_metric = collector.instance_variable_get(:@metrics)[:errors].first
|
|
107
|
+
expect(error_metric[:error_class]).to eq("StandardError")
|
|
108
|
+
expect(error_metric[:error_message]).to eq("Test error")
|
|
109
|
+
expect(error_metric[:context]).to eq(context)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
it "re-raises the error" do
|
|
113
|
+
expect do
|
|
114
|
+
monitored_agent.decide(context: context)
|
|
115
|
+
end.to raise_error(StandardError, "Test error")
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
it "handles Context objects" do
|
|
120
|
+
ctx = DecisionAgent::Context.new(context)
|
|
121
|
+
result = monitored_agent.decide(context: ctx)
|
|
122
|
+
|
|
123
|
+
expect(result).to be_a(DecisionAgent::Decision)
|
|
124
|
+
expect(collector.metrics_count[:decisions]).to eq(1)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
it "handles hash contexts" do
|
|
128
|
+
result = monitored_agent.decide(context: context)
|
|
129
|
+
|
|
130
|
+
expect(result).to be_a(DecisionAgent::Decision)
|
|
131
|
+
expect(collector.metrics_count[:decisions]).to eq(1)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
it "measures decision duration accurately" do
|
|
135
|
+
# Mock agent to introduce delay
|
|
136
|
+
allow(agent).to receive(:decide) do |*args|
|
|
137
|
+
sleep 0.01 # 10ms delay
|
|
138
|
+
evaluator.evaluate(args.first)
|
|
139
|
+
DecisionAgent::Decision.new(
|
|
140
|
+
decision: "approve",
|
|
141
|
+
confidence: 0.9,
|
|
142
|
+
explanations: ["Test"],
|
|
143
|
+
evaluations: [evaluator.evaluate(args.first)],
|
|
144
|
+
audit_payload: {}
|
|
145
|
+
)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
monitored_agent.decide(context: context)
|
|
149
|
+
|
|
150
|
+
stats = collector.statistics
|
|
151
|
+
expect(stats[:decisions][:avg_duration_ms]).to be >= 10
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
describe "method delegation" do
|
|
156
|
+
it "delegates methods to wrapped agent" do
|
|
157
|
+
expect(monitored_agent.evaluators).to eq(agent.evaluators)
|
|
158
|
+
expect(monitored_agent.scoring_strategy).to eq(agent.scoring_strategy)
|
|
159
|
+
expect(monitored_agent.audit_adapter).to eq(agent.audit_adapter)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
it "responds to agent methods" do
|
|
163
|
+
expect(monitored_agent).to respond_to(:evaluators)
|
|
164
|
+
expect(monitored_agent).to respond_to(:scoring_strategy)
|
|
165
|
+
expect(monitored_agent).to respond_to(:audit_adapter)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
describe "thread safety" do
|
|
170
|
+
it "handles concurrent decisions safely" do
|
|
171
|
+
# Materialize let variables before creating threads
|
|
172
|
+
test_context = { amount: 1000 }
|
|
173
|
+
test_monitored_agent = monitored_agent
|
|
174
|
+
test_collector = collector
|
|
175
|
+
|
|
176
|
+
threads = 10.times.map do
|
|
177
|
+
Thread.new do
|
|
178
|
+
10.times do
|
|
179
|
+
test_monitored_agent.decide(context: test_context)
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
threads.each(&:join)
|
|
185
|
+
|
|
186
|
+
expect(test_collector.metrics_count[:decisions]).to eq(100)
|
|
187
|
+
expect(test_collector.metrics_count[:evaluations]).to eq(100)
|
|
188
|
+
expect(test_collector.metrics_count[:performance]).to eq(100)
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
describe "integration test" do
|
|
193
|
+
it "provides comprehensive metrics for multiple decisions" do
|
|
194
|
+
contexts = [
|
|
195
|
+
{ amount: 500 },
|
|
196
|
+
{ amount: 1500 },
|
|
197
|
+
{ amount: 2000 }
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
contexts.each do |ctx|
|
|
201
|
+
monitored_agent.decide(context: ctx)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
stats = collector.statistics
|
|
205
|
+
|
|
206
|
+
# Summary
|
|
207
|
+
expect(stats[:summary][:total_decisions]).to eq(3)
|
|
208
|
+
expect(stats[:summary][:total_evaluations]).to eq(3)
|
|
209
|
+
expect(stats[:summary][:total_errors]).to eq(0)
|
|
210
|
+
|
|
211
|
+
# Decision stats
|
|
212
|
+
expect(stats[:decisions][:total]).to eq(3)
|
|
213
|
+
expect(stats[:decisions][:avg_confidence]).to be > 0
|
|
214
|
+
expect(stats[:decisions][:decision_distribution]["approve"]).to eq(3)
|
|
215
|
+
|
|
216
|
+
# Performance stats
|
|
217
|
+
expect(stats[:performance][:total_operations]).to eq(3)
|
|
218
|
+
expect(stats[:performance][:success_rate]).to eq(1.0)
|
|
219
|
+
expect(stats[:performance][:avg_duration_ms]).to be > 0
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|