decision_agent 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -919
  3. data/bin/decision_agent +5 -5
  4. data/lib/decision_agent/agent.rb +19 -26
  5. data/lib/decision_agent/audit/null_adapter.rb +1 -2
  6. data/lib/decision_agent/decision.rb +3 -1
  7. data/lib/decision_agent/dsl/condition_evaluator.rb +4 -3
  8. data/lib/decision_agent/dsl/rule_parser.rb +4 -6
  9. data/lib/decision_agent/dsl/schema_validator.rb +27 -31
  10. data/lib/decision_agent/errors.rb +21 -6
  11. data/lib/decision_agent/evaluation.rb +3 -1
  12. data/lib/decision_agent/evaluation_validator.rb +78 -0
  13. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +26 -0
  14. data/lib/decision_agent/evaluators/static_evaluator.rb +2 -6
  15. data/lib/decision_agent/monitoring/alert_manager.rb +282 -0
  16. data/lib/decision_agent/monitoring/dashboard/public/dashboard.css +381 -0
  17. data/lib/decision_agent/monitoring/dashboard/public/dashboard.js +471 -0
  18. data/lib/decision_agent/monitoring/dashboard/public/index.html +161 -0
  19. data/lib/decision_agent/monitoring/dashboard_server.rb +340 -0
  20. data/lib/decision_agent/monitoring/metrics_collector.rb +278 -0
  21. data/lib/decision_agent/monitoring/monitored_agent.rb +71 -0
  22. data/lib/decision_agent/monitoring/prometheus_exporter.rb +247 -0
  23. data/lib/decision_agent/replay/replay.rb +12 -22
  24. data/lib/decision_agent/scoring/base.rb +1 -1
  25. data/lib/decision_agent/scoring/consensus.rb +5 -5
  26. data/lib/decision_agent/scoring/weighted_average.rb +1 -1
  27. data/lib/decision_agent/version.rb +1 -1
  28. data/lib/decision_agent/versioning/activerecord_adapter.rb +141 -0
  29. data/lib/decision_agent/versioning/adapter.rb +100 -0
  30. data/lib/decision_agent/versioning/file_storage_adapter.rb +290 -0
  31. data/lib/decision_agent/versioning/version_manager.rb +127 -0
  32. data/lib/decision_agent/web/public/app.js +318 -0
  33. data/lib/decision_agent/web/public/index.html +56 -1
  34. data/lib/decision_agent/web/public/styles.css +219 -0
  35. data/lib/decision_agent/web/server.rb +169 -9
  36. data/lib/decision_agent.rb +11 -0
  37. data/lib/generators/decision_agent/install/install_generator.rb +40 -0
  38. data/lib/generators/decision_agent/install/templates/README +47 -0
  39. data/lib/generators/decision_agent/install/templates/migration.rb +37 -0
  40. data/lib/generators/decision_agent/install/templates/rule.rb +30 -0
  41. data/lib/generators/decision_agent/install/templates/rule_version.rb +66 -0
  42. data/spec/activerecord_thread_safety_spec.rb +553 -0
  43. data/spec/agent_spec.rb +13 -13
  44. data/spec/api_contract_spec.rb +16 -16
  45. data/spec/audit_adapters_spec.rb +3 -3
  46. data/spec/comprehensive_edge_cases_spec.rb +86 -86
  47. data/spec/dsl_validation_spec.rb +83 -83
  48. data/spec/edge_cases_spec.rb +23 -23
  49. data/spec/examples/feedback_aware_evaluator_spec.rb +7 -7
  50. data/spec/examples.txt +548 -0
  51. data/spec/issue_verification_spec.rb +685 -0
  52. data/spec/json_rule_evaluator_spec.rb +15 -15
  53. data/spec/monitoring/alert_manager_spec.rb +378 -0
  54. data/spec/monitoring/metrics_collector_spec.rb +281 -0
  55. data/spec/monitoring/monitored_agent_spec.rb +222 -0
  56. data/spec/monitoring/prometheus_exporter_spec.rb +242 -0
  57. data/spec/replay_edge_cases_spec.rb +58 -58
  58. data/spec/replay_spec.rb +11 -11
  59. data/spec/rfc8785_canonicalization_spec.rb +215 -0
  60. data/spec/scoring_spec.rb +1 -1
  61. data/spec/spec_helper.rb +9 -0
  62. data/spec/thread_safety_spec.rb +482 -0
  63. data/spec/thread_safety_spec.rb.broken +878 -0
  64. data/spec/versioning_spec.rb +777 -0
  65. data/spec/web_ui_rack_spec.rb +135 -0
  66. metadata +84 -11
@@ -0,0 +1,242 @@
1
+ require "spec_helper"
2
+ require "decision_agent/monitoring/metrics_collector"
3
+ require "decision_agent/monitoring/prometheus_exporter"
4
+
5
+ RSpec.describe DecisionAgent::Monitoring::PrometheusExporter do
6
+ let(:collector) { DecisionAgent::Monitoring::MetricsCollector.new }
7
+ let(:exporter) { described_class.new(metrics_collector: collector, namespace: "test") }
8
+
9
+ let(:decision) do
10
+ double(
11
+ "Decision",
12
+ decision: "approve",
13
+ confidence: 0.85,
14
+ evaluations: [double("Evaluation", evaluator_name: "test_evaluator")]
15
+ )
16
+ end
17
+ let(:context) { double("Context", to_h: { user: "test" }) }
18
+
19
+ describe "#initialize" do
20
+ it "initializes with metrics collector" do
21
+ expect(exporter).to be_a(described_class)
22
+ end
23
+
24
+ it "uses default namespace" do
25
+ exporter = described_class.new(metrics_collector: collector)
26
+ output = exporter.export
27
+ expect(output).to include("decision_agent_")
28
+ end
29
+
30
+ it "uses custom namespace" do
31
+ output = exporter.export
32
+ expect(output).to include("test_")
33
+ end
34
+ end
35
+
36
+ describe "#export" do
37
+ before do
38
+ # Record some metrics
39
+ 3.times { collector.record_decision(decision, context, duration_ms: 10.0) }
40
+ collector.record_performance(operation: "decide", duration_ms: 15.0, success: true)
41
+ collector.record_error(StandardError.new("Test error"))
42
+ end
43
+
44
+ it "exports in Prometheus text format" do
45
+ output = exporter.export
46
+
47
+ expect(output).to be_a(String)
48
+ expect(output).to include("# DecisionAgent Metrics Export")
49
+ end
50
+
51
+ it "includes decision metrics" do
52
+ output = exporter.export
53
+
54
+ expect(output).to include("# HELP test_decisions_total")
55
+ expect(output).to include("# TYPE test_decisions_total counter")
56
+ expect(output).to include("test_decisions_total 3")
57
+ end
58
+
59
+ it "includes confidence metrics" do
60
+ output = exporter.export
61
+
62
+ expect(output).to include("# HELP test_decision_confidence_avg")
63
+ expect(output).to include("# TYPE test_decision_confidence_avg gauge")
64
+ expect(output).to include("test_decision_confidence_avg 0.85")
65
+ end
66
+
67
+ it "includes performance metrics" do
68
+ output = exporter.export
69
+
70
+ expect(output).to include("# HELP test_success_rate")
71
+ expect(output).to include("# TYPE test_success_rate gauge")
72
+ end
73
+
74
+ it "includes error metrics" do
75
+ output = exporter.export
76
+
77
+ expect(output).to include("# HELP test_errors_total")
78
+ expect(output).to include("# TYPE test_errors_total counter")
79
+ expect(output).to include("test_errors_total 1")
80
+ end
81
+
82
+ it "includes system info" do
83
+ output = exporter.export
84
+
85
+ expect(output).to include("# HELP test_info")
86
+ expect(output).to include("# TYPE test_info gauge")
87
+ expect(output).to include("version=\"#{DecisionAgent::VERSION}\"")
88
+ end
89
+
90
+ it "includes decision distribution" do
91
+ output = exporter.export
92
+
93
+ expect(output).to include("# HELP test_decisions_by_type")
94
+ expect(output).to include("test_decisions_by_type{decision=\"approve\"} 3")
95
+ end
96
+
97
+ it "includes error distribution by type" do
98
+ output = exporter.export
99
+
100
+ expect(output).to include("# HELP test_errors_by_type")
101
+ expect(output).to include("test_errors_by_type{error=\"StandardError\"} 1")
102
+ end
103
+
104
+ it "includes metrics count" do
105
+ output = exporter.export
106
+
107
+ expect(output).to include("# HELP test_metrics_stored")
108
+ expect(output).to include("test_metrics_stored{type=\"decisions\"} 3")
109
+ expect(output).to include("test_metrics_stored{type=\"errors\"} 1")
110
+ end
111
+ end
112
+
113
+ describe "#register_kpi" do
114
+ it "registers a custom KPI" do
115
+ exporter.register_kpi(
116
+ name: "custom_metric",
117
+ value: 42.5,
118
+ help: "A custom metric"
119
+ )
120
+
121
+ output = exporter.export
122
+ expect(output).to include("# HELP test_custom_metric A custom metric")
123
+ expect(output).to include("# TYPE test_custom_metric gauge")
124
+ expect(output).to include("test_custom_metric 42.5")
125
+ end
126
+
127
+ it "registers KPI with labels" do
128
+ exporter.register_kpi(
129
+ name: "requests",
130
+ value: 100,
131
+ labels: { endpoint: "/api/v1", method: "GET" }
132
+ )
133
+
134
+ output = exporter.export
135
+ expect(output).to include("test_requests{endpoint=\"/api/v1\",method=\"GET\"} 100")
136
+ end
137
+
138
+ it "sanitizes metric names" do
139
+ exporter.register_kpi(name: "my-custom.metric!", value: 10)
140
+
141
+ output = exporter.export
142
+ expect(output).to include("test_my_custom_metric_")
143
+ end
144
+
145
+ it "escapes label values" do
146
+ exporter.register_kpi(
147
+ name: "metric",
148
+ value: 1,
149
+ labels: { message: 'Contains "quotes"' }
150
+ )
151
+
152
+ output = exporter.export
153
+ expect(output).to include('message="Contains \"quotes\""')
154
+ end
155
+ end
156
+
157
+ describe "#metrics_hash" do
158
+ before do
159
+ collector.record_decision(decision, context, duration_ms: 10.0)
160
+ end
161
+
162
+ it "returns metrics as hash" do
163
+ metrics = exporter.metrics_hash
164
+
165
+ expect(metrics).to be_a(Hash)
166
+ expect(metrics).to have_key(:decisions)
167
+ expect(metrics).to have_key(:performance)
168
+ expect(metrics).to have_key(:errors)
169
+ expect(metrics).to have_key(:system)
170
+ end
171
+
172
+ it "includes metric types" do
173
+ metrics = exporter.metrics_hash
174
+
175
+ expect(metrics[:decisions][:total][:type]).to eq("counter")
176
+ expect(metrics[:decisions][:avg_confidence][:type]).to eq("gauge")
177
+ end
178
+
179
+ it "includes metric values" do
180
+ metrics = exporter.metrics_hash
181
+
182
+ expect(metrics[:decisions][:total][:value]).to eq(1)
183
+ expect(metrics[:decisions][:avg_confidence][:value]).to eq(0.85)
184
+ end
185
+ end
186
+
187
+ describe "thread safety" do
188
+ it "handles concurrent KPI registration" do
189
+ threads = 10.times.map do |i|
190
+ Thread.new do
191
+ 10.times do |j|
192
+ exporter.register_kpi(
193
+ name: "metric_#{i}_#{j}",
194
+ value: (i * 10) + j
195
+ )
196
+ end
197
+ end
198
+ end
199
+
200
+ expect { threads.each(&:join) }.not_to raise_error
201
+ end
202
+
203
+ it "handles concurrent exports" do
204
+ threads = 5.times.map do
205
+ Thread.new do
206
+ 10.times { exporter.export }
207
+ end
208
+ end
209
+
210
+ expect { threads.each(&:join) }.not_to raise_error
211
+ end
212
+ end
213
+
214
+ describe "performance metrics export" do
215
+ before do
216
+ 5.times do |i|
217
+ collector.record_performance(
218
+ operation: "decide",
219
+ duration_ms: (i + 1) * 10.0,
220
+ success: true
221
+ )
222
+ end
223
+ end
224
+
225
+ it "exports summary metrics" do
226
+ output = exporter.export
227
+
228
+ expect(output).to include("# TYPE test_operation_duration_ms summary")
229
+ expect(output).to include("test_operation_duration_ms{quantile=\"0.5\"}")
230
+ expect(output).to include("test_operation_duration_ms{quantile=\"0.95\"}")
231
+ expect(output).to include("test_operation_duration_ms{quantile=\"0.99\"}")
232
+ expect(output).to include("test_operation_duration_ms_sum")
233
+ expect(output).to include("test_operation_duration_ms_count")
234
+ end
235
+ end
236
+
237
+ describe "content type" do
238
+ it "defines Prometheus content type" do
239
+ expect(described_class::CONTENT_TYPE).to eq("text/plain; version=0.0.4")
240
+ end
241
+ end
242
+ end
@@ -23,7 +23,7 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
23
23
  rules: [
24
24
  {
25
25
  id: "auto_approve",
26
- if: { field: "score", op: "gte", value: 90 }, # Changed threshold
26
+ if: { field: "score", op: "gte", value: 90 }, # Changed threshold
27
27
  then: { decision: "approve", weight: 0.9, reason: "Very high score" }
28
28
  }
29
29
  ]
@@ -36,9 +36,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
36
36
 
37
37
  original_result = agent.decide(context: { score: 85 })
38
38
 
39
- expect {
39
+ expect do
40
40
  DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
41
- }.not_to raise_error
41
+ end.not_to raise_error
42
42
  end
43
43
 
44
44
  it "detects differences in strict mode when rules have changed" do
@@ -52,9 +52,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
52
52
 
53
53
  # Replay uses the stored evaluations (not re-evaluating rules)
54
54
  # So it should succeed because replay uses static evaluators from the audit payload
55
- expect {
55
+ expect do
56
56
  DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
57
- }.not_to raise_error
57
+ end.not_to raise_error
58
58
 
59
59
  # The replayed result should match the original
60
60
  replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
@@ -69,9 +69,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
69
69
  original_result = agent.decide(context: { score: 85 })
70
70
 
71
71
  # In non-strict mode, differences are logged but don't raise errors
72
- expect {
72
+ expect do
73
73
  DecisionAgent::Replay.run(original_result.audit_payload, strict: false)
74
- }.not_to raise_error
74
+ end.not_to raise_error
75
75
  end
76
76
  end
77
77
 
@@ -124,9 +124,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
124
124
  # StaticEvaluator adds type: "static" by default
125
125
  expect(original_result.evaluations.first.metadata).to eq({ type: "static" })
126
126
 
127
- expect {
127
+ expect do
128
128
  DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
129
- }.not_to raise_error
129
+ end.not_to raise_error
130
130
 
131
131
  replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
132
132
  expect(replayed_result.evaluations.first.metadata).to eq({ type: "static" })
@@ -140,7 +140,7 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
140
140
  evaluator_name: "CustomEvaluator",
141
141
  metadata: {
142
142
  user: { id: 123, role: "admin" },
143
- tags: ["urgent", "important"],
143
+ tags: %w[urgent important],
144
144
  history: [
145
145
  { action: "created", timestamp: "2025-01-01" },
146
146
  { action: "updated", timestamp: "2025-01-02" }
@@ -183,21 +183,21 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
183
183
  decision: "allow",
184
184
  weight: 0.9,
185
185
  reason: "User authenticated successfully",
186
- evaluator_name: "DeletedCustomAuthEvaluator", # This evaluator no longer exists
186
+ evaluator_name: "DeletedCustomAuthEvaluator", # This evaluator no longer exists
187
187
  metadata: { auth_method: "oauth", provider: "google" }
188
188
  }
189
189
  ],
190
190
  decision: "allow",
191
- confidence: 1.0, # WeightedAverage normalizes single eval to 1.0
191
+ confidence: 1.0, # WeightedAverage normalizes single eval to 1.0
192
192
  scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
193
193
  agent_version: "0.1.0",
194
194
  deterministic_hash: "abc123"
195
195
  }
196
196
 
197
197
  # Replay should work because it uses StaticEvaluator, not the original evaluator
198
- expect {
198
+ expect do
199
199
  DecisionAgent::Replay.run(audit_payload, strict: true)
200
- }.not_to raise_error
200
+ end.not_to raise_error
201
201
 
202
202
  replayed_result = DecisionAgent::Replay.run(audit_payload, strict: true)
203
203
 
@@ -224,12 +224,12 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
224
224
  decision: "approve",
225
225
  weight: 0.7,
226
226
  reason: "ML model prediction",
227
- evaluator_name: "NonExistentMLEvaluator", # Missing evaluator
227
+ evaluator_name: "NonExistentMLEvaluator", # Missing evaluator
228
228
  metadata: { model_version: "v2.1" }
229
229
  }
230
230
  ],
231
231
  decision: "approve",
232
- confidence: 1.0, # Both agree, so 100% confidence
232
+ confidence: 1.0, # Both agree, so 100% confidence
233
233
  scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
234
234
  agent_version: "0.1.0",
235
235
  deterministic_hash: "def456"
@@ -260,15 +260,15 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
260
260
  ],
261
261
  decision: "approve",
262
262
  confidence: 0.9,
263
- scoring_strategy: "DecisionAgent::Scoring::DeprecatedBayesianStrategy", # Doesn't exist
263
+ scoring_strategy: "DecisionAgent::Scoring::DeprecatedBayesianStrategy", # Doesn't exist
264
264
  agent_version: "0.1.0",
265
265
  deterministic_hash: "ghi789"
266
266
  }
267
267
 
268
268
  # Should fall back to WeightedAverage
269
- expect {
269
+ expect do
270
270
  DecisionAgent::Replay.run(audit_payload, strict: false)
271
- }.not_to raise_error
271
+ end.not_to raise_error
272
272
 
273
273
  replayed_result = DecisionAgent::Replay.run(audit_payload, strict: false)
274
274
  expect(replayed_result.decision).to eq("approve")
@@ -291,9 +291,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
291
291
 
292
292
  # Replay uses the stored scoring strategy from the audit payload
293
293
  # So it should replay successfully
294
- expect {
294
+ expect do
295
295
  DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
296
- }.not_to raise_error
296
+ end.not_to raise_error
297
297
  end
298
298
  end
299
299
 
@@ -305,9 +305,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
305
305
  confidence: 0.5
306
306
  }
307
307
 
308
- expect {
308
+ expect do
309
309
  DecisionAgent::Replay.run(incomplete_payload, strict: false)
310
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: context/)
310
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: context/)
311
311
  end
312
312
 
313
313
  it "requires evaluations field" do
@@ -317,9 +317,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
317
317
  confidence: 0.5
318
318
  }
319
319
 
320
- expect {
320
+ expect do
321
321
  DecisionAgent::Replay.run(incomplete_payload, strict: false)
322
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: evaluations/)
322
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: evaluations/)
323
323
  end
324
324
 
325
325
  it "requires decision field" do
@@ -329,9 +329,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
329
329
  confidence: 0.5
330
330
  }
331
331
 
332
- expect {
332
+ expect do
333
333
  DecisionAgent::Replay.run(incomplete_payload, strict: false)
334
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: decision/)
334
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: decision/)
335
335
  end
336
336
 
337
337
  it "requires confidence field" do
@@ -341,9 +341,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
341
341
  decision: "test"
342
342
  }
343
343
 
344
- expect {
344
+ expect do
345
345
  DecisionAgent::Replay.run(incomplete_payload, strict: false)
346
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: confidence/)
346
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: confidence/)
347
347
  end
348
348
 
349
349
  it "accepts both symbol and string keys" do
@@ -366,9 +366,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
366
366
  "scoring_strategy" => "DecisionAgent::Scoring::MaxWeight"
367
367
  }
368
368
 
369
- expect {
369
+ expect do
370
370
  DecisionAgent::Replay.run(payload_with_strings, strict: true)
371
- }.not_to raise_error
371
+ end.not_to raise_error
372
372
  end
373
373
  end
374
374
 
@@ -460,12 +460,12 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
460
460
 
461
461
  # Modify agent_version
462
462
  modified_payload = original_result.audit_payload.dup
463
- modified_payload[:agent_version] = "99.0.0" # Different version
463
+ modified_payload[:agent_version] = "99.0.0" # Different version
464
464
 
465
465
  # Non-strict mode should log but not raise
466
- expect {
466
+ expect do
467
467
  DecisionAgent::Replay.run(modified_payload, strict: false)
468
- }.not_to raise_error
468
+ end.not_to raise_error
469
469
 
470
470
  # Should successfully replay despite version difference
471
471
  replayed_result = DecisionAgent::Replay.run(modified_payload, strict: false)
@@ -489,7 +489,7 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
489
489
  decision: "approve",
490
490
  confidence: 1.0,
491
491
  scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
492
- agent_version: "0.0.1", # Old version
492
+ agent_version: "0.0.1", # Old version
493
493
  deterministic_hash: "old_hash"
494
494
  }
495
495
 
@@ -514,9 +514,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
514
514
 
515
515
  # Strict mode should still work because version is not part of deterministic comparison
516
516
  # (only decision and confidence are compared in strict mode)
517
- expect {
517
+ expect do
518
518
  DecisionAgent::Replay.run(modified_payload, strict: true)
519
- }.not_to raise_error
519
+ end.not_to raise_error
520
520
  end
521
521
  end
522
522
 
@@ -543,9 +543,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
543
543
  }
544
544
 
545
545
  # Should not raise error, just creates new hash during replay
546
- expect {
546
+ expect do
547
547
  DecisionAgent::Replay.run(audit_payload, strict: false)
548
- }.not_to raise_error
548
+ end.not_to raise_error
549
549
 
550
550
  result = DecisionAgent::Replay.run(audit_payload, strict: false)
551
551
  expect(result.decision).to eq("approve")
@@ -582,24 +582,24 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
582
582
 
583
583
  it "validates required fields before replay" do
584
584
  # Missing context
585
- expect {
585
+ expect do
586
586
  DecisionAgent::Replay.run({ decision: "test", confidence: 0.5, evaluations: [] }, strict: true)
587
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
587
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
588
588
 
589
589
  # Missing evaluations
590
- expect {
590
+ expect do
591
591
  DecisionAgent::Replay.run({ context: {}, decision: "test", confidence: 0.5 }, strict: true)
592
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
592
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
593
593
 
594
594
  # Missing decision
595
- expect {
595
+ expect do
596
596
  DecisionAgent::Replay.run({ context: {}, evaluations: [], confidence: 0.5 }, strict: true)
597
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
597
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
598
598
 
599
599
  # Missing confidence
600
- expect {
600
+ expect do
601
601
  DecisionAgent::Replay.run({ context: {}, evaluations: [], decision: "test" }, strict: true)
602
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
602
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
603
603
  end
604
604
 
605
605
  it "handles evaluation with invalid weight" do
@@ -610,7 +610,7 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
610
610
  evaluations: [
611
611
  {
612
612
  decision: "approve",
613
- weight: 2.5, # Weight > 1.0, invalid
613
+ weight: 2.5, # Weight > 1.0, invalid
614
614
  reason: "Test",
615
615
  evaluator_name: "TestEvaluator",
616
616
  metadata: {}
@@ -622,21 +622,21 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
622
622
  }
623
623
 
624
624
  # Invalid weight (> 1.0) should raise error when creating Evaluation
625
- expect {
625
+ expect do
626
626
  DecisionAgent::Replay.run(audit_payload, strict: false)
627
- }.to raise_error(DecisionAgent::InvalidWeightError)
627
+ end.to raise_error(DecisionAgent::InvalidWeightError)
628
628
  end
629
629
 
630
630
  it "handles completely empty audit payload" do
631
- expect {
631
+ expect do
632
632
  DecisionAgent::Replay.run({}, strict: false)
633
- }.to raise_error(DecisionAgent::InvalidRuleDslError)
633
+ end.to raise_error(DecisionAgent::InvalidRuleDslError)
634
634
  end
635
635
 
636
636
  it "handles nil audit payload" do
637
- expect {
637
+ expect do
638
638
  DecisionAgent::Replay.run(nil, strict: false)
639
- }.to raise_error
639
+ end.to raise_error
640
640
  end
641
641
  end
642
642
 
@@ -657,14 +657,14 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
657
657
  ],
658
658
  decision: "approve",
659
659
  confidence: 0.9,
660
- scoring_strategy: "DecisionAgent::Scoring::OldStrategyName", # Renamed or deleted
660
+ scoring_strategy: "DecisionAgent::Scoring::OldStrategyName", # Renamed or deleted
661
661
  agent_version: "0.1.0"
662
662
  }
663
663
 
664
664
  # Should fall back to default strategy (WeightedAverage)
665
- expect {
665
+ expect do
666
666
  DecisionAgent::Replay.run(audit_payload, strict: false)
667
- }.not_to raise_error
667
+ end.not_to raise_error
668
668
 
669
669
  result = DecisionAgent::Replay.run(audit_payload, strict: false)
670
670
  expect(result.decision).to eq("approve")
@@ -686,7 +686,7 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
686
686
  ],
687
687
  decision: "approve",
688
688
  confidence: 0.85,
689
- scoring_strategy: "MyCompany::CustomMLBasedScoringStrategy", # Custom strategy
689
+ scoring_strategy: "MyCompany::CustomMLBasedScoringStrategy", # Custom strategy
690
690
  agent_version: "0.1.0"
691
691
  }
692
692
 
data/spec/replay_spec.rb CHANGED
@@ -34,9 +34,9 @@ RSpec.describe DecisionAgent::Replay do
34
34
  modified_payload = original_result.audit_payload.dup
35
35
  modified_payload[:decision] = "reject"
36
36
 
37
- expect {
37
+ expect do
38
38
  DecisionAgent::Replay.run(modified_payload, strict: true)
39
- }.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
39
+ end.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
40
40
  expect(error.differences).to include(/decision mismatch/)
41
41
  expect(error.expected[:decision]).to eq("reject")
42
42
  expect(error.actual[:decision]).to eq("approve")
@@ -50,9 +50,9 @@ RSpec.describe DecisionAgent::Replay do
50
50
  modified_payload = original_result.audit_payload.dup
51
51
  modified_payload[:confidence] = 0.5
52
52
 
53
- expect {
53
+ expect do
54
54
  DecisionAgent::Replay.run(modified_payload, strict: true)
55
- }.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
55
+ end.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
56
56
  expect(error.differences).to include(/confidence mismatch/)
57
57
  end
58
58
  end
@@ -64,9 +64,9 @@ RSpec.describe DecisionAgent::Replay do
64
64
  modified_payload = original_result.audit_payload.dup
65
65
  modified_payload[:decision] = "reject"
66
66
 
67
- expect {
67
+ expect do
68
68
  DecisionAgent::Replay.run(modified_payload, strict: false)
69
- }.not_to raise_error
69
+ end.not_to raise_error
70
70
  end
71
71
 
72
72
  it "logs differences in non-strict mode" do
@@ -76,17 +76,17 @@ RSpec.describe DecisionAgent::Replay do
76
76
  modified_payload = original_result.audit_payload.dup
77
77
  modified_payload[:decision] = "reject"
78
78
 
79
- expect {
79
+ expect do
80
80
  DecisionAgent::Replay.run(modified_payload, strict: false)
81
- }.to output(/Decision changed/).to_stderr
81
+ end.to output(/Decision changed/).to_stderr
82
82
  end
83
83
 
84
84
  it "validates required fields in audit payload" do
85
85
  invalid_payload = { context: {} }
86
86
 
87
- expect {
87
+ expect do
88
88
  DecisionAgent::Replay.run(invalid_payload, strict: true)
89
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key/)
89
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key/)
90
90
  end
91
91
 
92
92
  it "reconstructs evaluations from audit payload" do
@@ -112,7 +112,7 @@ RSpec.describe DecisionAgent::Replay do
112
112
  )
113
113
 
114
114
  expect(replayed_result.evaluations.size).to eq(2)
115
- expect(replayed_result.evaluations.map(&:evaluator_name)).to match_array(["Evaluator1", "Evaluator2"])
115
+ expect(replayed_result.evaluations.map(&:evaluator_name)).to match_array(%w[Evaluator1 Evaluator2])
116
116
  end
117
117
 
118
118
  it "uses correct scoring strategy from audit payload" do