decision_agent 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +212 -35
  3. data/bin/decision_agent +3 -8
  4. data/lib/decision_agent/ab_testing/ab_test.rb +197 -0
  5. data/lib/decision_agent/ab_testing/ab_test_assignment.rb +76 -0
  6. data/lib/decision_agent/ab_testing/ab_test_manager.rb +317 -0
  7. data/lib/decision_agent/ab_testing/ab_testing_agent.rb +152 -0
  8. data/lib/decision_agent/ab_testing/storage/activerecord_adapter.rb +155 -0
  9. data/lib/decision_agent/ab_testing/storage/adapter.rb +67 -0
  10. data/lib/decision_agent/ab_testing/storage/memory_adapter.rb +116 -0
  11. data/lib/decision_agent/agent.rb +19 -26
  12. data/lib/decision_agent/audit/null_adapter.rb +1 -2
  13. data/lib/decision_agent/decision.rb +3 -1
  14. data/lib/decision_agent/dsl/condition_evaluator.rb +4 -3
  15. data/lib/decision_agent/dsl/rule_parser.rb +4 -6
  16. data/lib/decision_agent/dsl/schema_validator.rb +27 -31
  17. data/lib/decision_agent/errors.rb +11 -8
  18. data/lib/decision_agent/evaluation.rb +3 -1
  19. data/lib/decision_agent/evaluation_validator.rb +78 -0
  20. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +26 -0
  21. data/lib/decision_agent/evaluators/static_evaluator.rb +2 -6
  22. data/lib/decision_agent/monitoring/alert_manager.rb +282 -0
  23. data/lib/decision_agent/monitoring/dashboard/public/dashboard.css +381 -0
  24. data/lib/decision_agent/monitoring/dashboard/public/dashboard.js +471 -0
  25. data/lib/decision_agent/monitoring/dashboard/public/index.html +161 -0
  26. data/lib/decision_agent/monitoring/dashboard_server.rb +340 -0
  27. data/lib/decision_agent/monitoring/metrics_collector.rb +423 -0
  28. data/lib/decision_agent/monitoring/monitored_agent.rb +71 -0
  29. data/lib/decision_agent/monitoring/prometheus_exporter.rb +247 -0
  30. data/lib/decision_agent/monitoring/storage/activerecord_adapter.rb +253 -0
  31. data/lib/decision_agent/monitoring/storage/base_adapter.rb +90 -0
  32. data/lib/decision_agent/monitoring/storage/memory_adapter.rb +222 -0
  33. data/lib/decision_agent/replay/replay.rb +12 -22
  34. data/lib/decision_agent/scoring/base.rb +1 -1
  35. data/lib/decision_agent/scoring/consensus.rb +5 -5
  36. data/lib/decision_agent/scoring/weighted_average.rb +1 -1
  37. data/lib/decision_agent/version.rb +1 -1
  38. data/lib/decision_agent/versioning/activerecord_adapter.rb +69 -33
  39. data/lib/decision_agent/versioning/adapter.rb +1 -3
  40. data/lib/decision_agent/versioning/file_storage_adapter.rb +143 -35
  41. data/lib/decision_agent/versioning/version_manager.rb +4 -12
  42. data/lib/decision_agent/web/public/index.html +1 -1
  43. data/lib/decision_agent/web/server.rb +19 -24
  44. data/lib/decision_agent.rb +14 -0
  45. data/lib/generators/decision_agent/install/install_generator.rb +42 -5
  46. data/lib/generators/decision_agent/install/templates/ab_test_assignment_model.rb +45 -0
  47. data/lib/generators/decision_agent/install/templates/ab_test_model.rb +54 -0
  48. data/lib/generators/decision_agent/install/templates/ab_testing_migration.rb +43 -0
  49. data/lib/generators/decision_agent/install/templates/ab_testing_tasks.rake +189 -0
  50. data/lib/generators/decision_agent/install/templates/decision_agent_tasks.rake +114 -0
  51. data/lib/generators/decision_agent/install/templates/decision_log.rb +57 -0
  52. data/lib/generators/decision_agent/install/templates/error_metric.rb +53 -0
  53. data/lib/generators/decision_agent/install/templates/evaluation_metric.rb +43 -0
  54. data/lib/generators/decision_agent/install/templates/migration.rb +17 -6
  55. data/lib/generators/decision_agent/install/templates/monitoring_migration.rb +109 -0
  56. data/lib/generators/decision_agent/install/templates/performance_metric.rb +76 -0
  57. data/lib/generators/decision_agent/install/templates/rule.rb +3 -3
  58. data/lib/generators/decision_agent/install/templates/rule_version.rb +13 -7
  59. data/spec/ab_testing/ab_test_manager_spec.rb +330 -0
  60. data/spec/ab_testing/ab_test_spec.rb +270 -0
  61. data/spec/activerecord_thread_safety_spec.rb +553 -0
  62. data/spec/agent_spec.rb +13 -13
  63. data/spec/api_contract_spec.rb +16 -16
  64. data/spec/audit_adapters_spec.rb +3 -3
  65. data/spec/comprehensive_edge_cases_spec.rb +86 -86
  66. data/spec/dsl_validation_spec.rb +83 -83
  67. data/spec/edge_cases_spec.rb +23 -23
  68. data/spec/examples/feedback_aware_evaluator_spec.rb +7 -7
  69. data/spec/examples.txt +612 -0
  70. data/spec/issue_verification_spec.rb +759 -0
  71. data/spec/json_rule_evaluator_spec.rb +15 -15
  72. data/spec/monitoring/alert_manager_spec.rb +378 -0
  73. data/spec/monitoring/metrics_collector_spec.rb +281 -0
  74. data/spec/monitoring/monitored_agent_spec.rb +222 -0
  75. data/spec/monitoring/prometheus_exporter_spec.rb +242 -0
  76. data/spec/monitoring/storage/activerecord_adapter_spec.rb +346 -0
  77. data/spec/monitoring/storage/memory_adapter_spec.rb +247 -0
  78. data/spec/replay_edge_cases_spec.rb +58 -58
  79. data/spec/replay_spec.rb +11 -11
  80. data/spec/rfc8785_canonicalization_spec.rb +215 -0
  81. data/spec/scoring_spec.rb +1 -1
  82. data/spec/spec_helper.rb +9 -0
  83. data/spec/thread_safety_spec.rb +482 -0
  84. data/spec/thread_safety_spec.rb.broken +878 -0
  85. data/spec/versioning_spec.rb +141 -37
  86. data/spec/web_ui_rack_spec.rb +135 -0
  87. metadata +93 -6
@@ -0,0 +1,247 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+ require "decision_agent/monitoring/storage/memory_adapter"
5
+
6
+ RSpec.describe DecisionAgent::Monitoring::Storage::MemoryAdapter do
7
+ let(:adapter) { described_class.new(window_size: 3600) }
8
+
9
+ describe ".available?" do
10
+ it "is always available" do
11
+ expect(described_class.available?).to be true
12
+ end
13
+ end
14
+
15
+ describe "#record_decision" do
16
+ it "stores decision in memory" do
17
+ expect do
18
+ adapter.record_decision(
19
+ "approve_loan",
20
+ { user_id: 123, amount: 10_000 },
21
+ confidence: 0.85,
22
+ evaluations_count: 3,
23
+ duration_ms: 45.5,
24
+ status: "success"
25
+ )
26
+ end.to change { adapter.metrics_count[:decisions] }.by(1)
27
+ end
28
+ end
29
+
30
+ describe "#record_evaluation" do
31
+ it "stores evaluation in memory" do
32
+ expect do
33
+ adapter.record_evaluation(
34
+ "CreditScoreEvaluator",
35
+ score: 0.92,
36
+ success: true,
37
+ duration_ms: 12.3,
38
+ details: { credit_score: 750 }
39
+ )
40
+ end.to change { adapter.metrics_count[:evaluations] }.by(1)
41
+ end
42
+ end
43
+
44
+ describe "#record_performance" do
45
+ it "stores performance metric in memory" do
46
+ expect do
47
+ adapter.record_performance(
48
+ "database_query",
49
+ duration_ms: 150.5,
50
+ status: "success",
51
+ metadata: { query: "SELECT * FROM users" }
52
+ )
53
+ end.to change { adapter.metrics_count[:performance] }.by(1)
54
+ end
55
+ end
56
+
57
+ describe "#record_error" do
58
+ it "stores error in memory" do
59
+ expect do
60
+ adapter.record_error(
61
+ "ArgumentError",
62
+ message: "Invalid input",
63
+ stack_trace: ["line 1", "line 2"],
64
+ severity: "medium",
65
+ context: { input: "bad_value" }
66
+ )
67
+ end.to change { adapter.metrics_count[:errors] }.by(1)
68
+ end
69
+ end
70
+
71
+ describe "#statistics" do
72
+ before do
73
+ # Create test data
74
+ 5.times do |i|
75
+ adapter.record_decision(
76
+ "decision_#{i}",
77
+ { index: i },
78
+ confidence: 0.5 + (i * 0.05),
79
+ evaluations_count: 2,
80
+ duration_ms: 100,
81
+ status: i.even? ? "success" : "failure"
82
+ )
83
+ end
84
+
85
+ 3.times do |i|
86
+ adapter.record_evaluation(
87
+ "Evaluator#{i}",
88
+ score: 0.8 + (i * 0.05),
89
+ success: true
90
+ )
91
+ end
92
+
93
+ 6.times do |i|
94
+ adapter.record_performance(
95
+ "operation",
96
+ duration_ms: 100 + (i * 20),
97
+ status: "success"
98
+ )
99
+ end
100
+
101
+ 2.times do
102
+ adapter.record_error("RuntimeError", severity: "critical")
103
+ end
104
+ end
105
+
106
+ it "returns comprehensive statistics" do
107
+ stats = adapter.statistics(time_range: 3600)
108
+
109
+ expect(stats[:decisions][:total]).to eq(5)
110
+ expect(stats[:decisions][:average_confidence]).to be_within(0.01).of(0.6)
111
+ expect(stats[:decisions][:success_rate]).to eq(0.6) # 3 out of 5
112
+
113
+ expect(stats[:evaluations][:total]).to eq(3)
114
+ expect(stats[:evaluations][:average_score]).to be_within(0.01).of(0.85)
115
+
116
+ expect(stats[:performance][:total]).to eq(6)
117
+ expect(stats[:performance][:average_duration_ms]).to eq(150.0)
118
+ expect(stats[:performance][:success_rate]).to eq(1.0)
119
+
120
+ expect(stats[:errors][:total]).to eq(2)
121
+ expect(stats[:errors][:critical_count]).to eq(2)
122
+ end
123
+
124
+ it "filters by time range" do
125
+ # Record an old metric that should be filtered out
126
+ adapter.instance_variable_get(:@metrics)[:decisions] << {
127
+ decision: "old_decision",
128
+ confidence: 0.5,
129
+ timestamp: Time.now - 7200 # 2 hours ago
130
+ }
131
+
132
+ stats = adapter.statistics(time_range: 3600) # Last hour only
133
+
134
+ expect(stats[:decisions][:total]).to eq(5) # Doesn't include the old one
135
+ end
136
+ end
137
+
138
+ describe "#time_series" do
139
+ before do
140
+ # Create metrics at different times
141
+ now = Time.now
142
+ adapter.instance_variable_get(:@metrics)[:decisions] << { timestamp: now - 120 }
143
+ adapter.instance_variable_get(:@metrics)[:decisions] << { timestamp: now - 70 }
144
+ adapter.instance_variable_get(:@metrics)[:decisions] << { timestamp: now - 10 }
145
+ end
146
+
147
+ it "groups metrics into time buckets" do
148
+ series = adapter.time_series(:decisions, bucket_size: 60, time_range: 200)
149
+
150
+ expect(series[:timestamps]).to be_an(Array)
151
+ expect(series[:data]).to be_an(Array)
152
+ expect(series[:data].sum).to eq(3) # All 3 metrics
153
+ end
154
+
155
+ it "uses correct bucket size" do
156
+ series = adapter.time_series(:decisions, bucket_size: 60, time_range: 200)
157
+
158
+ # Metrics should be grouped into 60-second buckets
159
+ expect(series[:data].max).to be <= 2 # No bucket should have more than 2
160
+ end
161
+ end
162
+
163
+ describe "#metrics_count" do
164
+ before do
165
+ adapter.record_decision("test", {}, confidence: 0.8)
166
+ adapter.record_decision("test2", {}, confidence: 0.9)
167
+ adapter.record_evaluation("eval1", score: 0.85)
168
+ adapter.record_performance("perf1", duration_ms: 100)
169
+ adapter.record_error("Error1")
170
+ end
171
+
172
+ it "returns count for each metric type" do
173
+ counts = adapter.metrics_count
174
+
175
+ expect(counts[:decisions]).to eq(2)
176
+ expect(counts[:evaluations]).to eq(1)
177
+ expect(counts[:performance]).to eq(1)
178
+ expect(counts[:errors]).to eq(1)
179
+ end
180
+ end
181
+
182
+ describe "#cleanup" do
183
+ let(:long_window_adapter) { described_class.new(window_size: 30 * 24 * 3_600) } # 30 day window
184
+
185
+ before do
186
+ now = Time.now
187
+
188
+ # Add old metrics (8 days ago) to adapter with long window
189
+ long_window_adapter.instance_variable_get(:@metrics)[:decisions] << {
190
+ decision: "old",
191
+ timestamp: now - (8 * 24 * 3600)
192
+ }
193
+ long_window_adapter.instance_variable_get(:@metrics)[:evaluations] << {
194
+ evaluator_name: "old",
195
+ timestamp: now - (8 * 24 * 3600)
196
+ }
197
+
198
+ # Add recent metrics
199
+ long_window_adapter.record_decision("recent", {}, confidence: 0.8)
200
+ long_window_adapter.record_evaluation("recent", score: 0.9)
201
+ end
202
+
203
+ it "removes old metrics and returns count" do
204
+ count = long_window_adapter.cleanup(older_than: 7 * 24 * 3600) # 7 days
205
+
206
+ expect(count).to eq(2) # 2 old metrics removed
207
+ expect(long_window_adapter.metrics_count[:decisions]).to eq(1) # Only recent one
208
+ expect(long_window_adapter.metrics_count[:evaluations]).to eq(1)
209
+ end
210
+ end
211
+
212
+ describe "window-based cleanup" do
213
+ let(:short_window_adapter) { described_class.new(window_size: 60) } # 1 minute window
214
+
215
+ it "automatically removes metrics older than window_size" do
216
+ now = Time.now
217
+
218
+ # Add old metric
219
+ short_window_adapter.instance_variable_get(:@metrics)[:decisions] << {
220
+ decision: "old",
221
+ timestamp: now - 120 # 2 minutes ago
222
+ }
223
+
224
+ # Add new metric (this should trigger cleanup)
225
+ short_window_adapter.record_decision("new", {}, confidence: 0.8)
226
+
227
+ # Only the new metric should remain
228
+ expect(short_window_adapter.metrics_count[:decisions]).to eq(1)
229
+ end
230
+ end
231
+
232
+ describe "thread safety" do
233
+ it "handles concurrent writes" do
234
+ threads = 10.times.map do
235
+ Thread.new do
236
+ 100.times do |i|
237
+ adapter.record_decision("concurrent_#{i}", {}, confidence: 0.8)
238
+ end
239
+ end
240
+ end
241
+
242
+ threads.each(&:join)
243
+
244
+ expect(adapter.metrics_count[:decisions]).to eq(1000)
245
+ end
246
+ end
247
+ end
@@ -23,7 +23,7 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
23
23
  rules: [
24
24
  {
25
25
  id: "auto_approve",
26
- if: { field: "score", op: "gte", value: 90 }, # Changed threshold
26
+ if: { field: "score", op: "gte", value: 90 }, # Changed threshold
27
27
  then: { decision: "approve", weight: 0.9, reason: "Very high score" }
28
28
  }
29
29
  ]
@@ -36,9 +36,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
36
36
 
37
37
  original_result = agent.decide(context: { score: 85 })
38
38
 
39
- expect {
39
+ expect do
40
40
  DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
41
- }.not_to raise_error
41
+ end.not_to raise_error
42
42
  end
43
43
 
44
44
  it "detects differences in strict mode when rules have changed" do
@@ -52,9 +52,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
52
52
 
53
53
  # Replay uses the stored evaluations (not re-evaluating rules)
54
54
  # So it should succeed because replay uses static evaluators from the audit payload
55
- expect {
55
+ expect do
56
56
  DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
57
- }.not_to raise_error
57
+ end.not_to raise_error
58
58
 
59
59
  # The replayed result should match the original
60
60
  replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
@@ -69,9 +69,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
69
69
  original_result = agent.decide(context: { score: 85 })
70
70
 
71
71
  # In non-strict mode, differences are logged but don't raise errors
72
- expect {
72
+ expect do
73
73
  DecisionAgent::Replay.run(original_result.audit_payload, strict: false)
74
- }.not_to raise_error
74
+ end.not_to raise_error
75
75
  end
76
76
  end
77
77
 
@@ -124,9 +124,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
124
124
  # StaticEvaluator adds type: "static" by default
125
125
  expect(original_result.evaluations.first.metadata).to eq({ type: "static" })
126
126
 
127
- expect {
127
+ expect do
128
128
  DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
129
- }.not_to raise_error
129
+ end.not_to raise_error
130
130
 
131
131
  replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
132
132
  expect(replayed_result.evaluations.first.metadata).to eq({ type: "static" })
@@ -140,7 +140,7 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
140
140
  evaluator_name: "CustomEvaluator",
141
141
  metadata: {
142
142
  user: { id: 123, role: "admin" },
143
- tags: ["urgent", "important"],
143
+ tags: %w[urgent important],
144
144
  history: [
145
145
  { action: "created", timestamp: "2025-01-01" },
146
146
  { action: "updated", timestamp: "2025-01-02" }
@@ -183,21 +183,21 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
183
183
  decision: "allow",
184
184
  weight: 0.9,
185
185
  reason: "User authenticated successfully",
186
- evaluator_name: "DeletedCustomAuthEvaluator", # This evaluator no longer exists
186
+ evaluator_name: "DeletedCustomAuthEvaluator", # This evaluator no longer exists
187
187
  metadata: { auth_method: "oauth", provider: "google" }
188
188
  }
189
189
  ],
190
190
  decision: "allow",
191
- confidence: 1.0, # WeightedAverage normalizes single eval to 1.0
191
+ confidence: 1.0, # WeightedAverage normalizes single eval to 1.0
192
192
  scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
193
193
  agent_version: "0.1.0",
194
194
  deterministic_hash: "abc123"
195
195
  }
196
196
 
197
197
  # Replay should work because it uses StaticEvaluator, not the original evaluator
198
- expect {
198
+ expect do
199
199
  DecisionAgent::Replay.run(audit_payload, strict: true)
200
- }.not_to raise_error
200
+ end.not_to raise_error
201
201
 
202
202
  replayed_result = DecisionAgent::Replay.run(audit_payload, strict: true)
203
203
 
@@ -224,12 +224,12 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
224
224
  decision: "approve",
225
225
  weight: 0.7,
226
226
  reason: "ML model prediction",
227
- evaluator_name: "NonExistentMLEvaluator", # Missing evaluator
227
+ evaluator_name: "NonExistentMLEvaluator", # Missing evaluator
228
228
  metadata: { model_version: "v2.1" }
229
229
  }
230
230
  ],
231
231
  decision: "approve",
232
- confidence: 1.0, # Both agree, so 100% confidence
232
+ confidence: 1.0, # Both agree, so 100% confidence
233
233
  scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
234
234
  agent_version: "0.1.0",
235
235
  deterministic_hash: "def456"
@@ -260,15 +260,15 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
260
260
  ],
261
261
  decision: "approve",
262
262
  confidence: 0.9,
263
- scoring_strategy: "DecisionAgent::Scoring::DeprecatedBayesianStrategy", # Doesn't exist
263
+ scoring_strategy: "DecisionAgent::Scoring::DeprecatedBayesianStrategy", # Doesn't exist
264
264
  agent_version: "0.1.0",
265
265
  deterministic_hash: "ghi789"
266
266
  }
267
267
 
268
268
  # Should fall back to WeightedAverage
269
- expect {
269
+ expect do
270
270
  DecisionAgent::Replay.run(audit_payload, strict: false)
271
- }.not_to raise_error
271
+ end.not_to raise_error
272
272
 
273
273
  replayed_result = DecisionAgent::Replay.run(audit_payload, strict: false)
274
274
  expect(replayed_result.decision).to eq("approve")
@@ -291,9 +291,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
291
291
 
292
292
  # Replay uses the stored scoring strategy from the audit payload
293
293
  # So it should replay successfully
294
- expect {
294
+ expect do
295
295
  DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
296
- }.not_to raise_error
296
+ end.not_to raise_error
297
297
  end
298
298
  end
299
299
 
@@ -305,9 +305,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
305
305
  confidence: 0.5
306
306
  }
307
307
 
308
- expect {
308
+ expect do
309
309
  DecisionAgent::Replay.run(incomplete_payload, strict: false)
310
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: context/)
310
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: context/)
311
311
  end
312
312
 
313
313
  it "requires evaluations field" do
@@ -317,9 +317,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
317
317
  confidence: 0.5
318
318
  }
319
319
 
320
- expect {
320
+ expect do
321
321
  DecisionAgent::Replay.run(incomplete_payload, strict: false)
322
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: evaluations/)
322
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: evaluations/)
323
323
  end
324
324
 
325
325
  it "requires decision field" do
@@ -329,9 +329,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
329
329
  confidence: 0.5
330
330
  }
331
331
 
332
- expect {
332
+ expect do
333
333
  DecisionAgent::Replay.run(incomplete_payload, strict: false)
334
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: decision/)
334
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: decision/)
335
335
  end
336
336
 
337
337
  it "requires confidence field" do
@@ -341,9 +341,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
341
341
  decision: "test"
342
342
  }
343
343
 
344
- expect {
344
+ expect do
345
345
  DecisionAgent::Replay.run(incomplete_payload, strict: false)
346
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: confidence/)
346
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: confidence/)
347
347
  end
348
348
 
349
349
  it "accepts both symbol and string keys" do
@@ -366,9 +366,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
366
366
  "scoring_strategy" => "DecisionAgent::Scoring::MaxWeight"
367
367
  }
368
368
 
369
- expect {
369
+ expect do
370
370
  DecisionAgent::Replay.run(payload_with_strings, strict: true)
371
- }.not_to raise_error
371
+ end.not_to raise_error
372
372
  end
373
373
  end
374
374
 
@@ -460,12 +460,12 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
460
460
 
461
461
  # Modify agent_version
462
462
  modified_payload = original_result.audit_payload.dup
463
- modified_payload[:agent_version] = "99.0.0" # Different version
463
+ modified_payload[:agent_version] = "99.0.0" # Different version
464
464
 
465
465
  # Non-strict mode should log but not raise
466
- expect {
466
+ expect do
467
467
  DecisionAgent::Replay.run(modified_payload, strict: false)
468
- }.not_to raise_error
468
+ end.not_to raise_error
469
469
 
470
470
  # Should successfully replay despite version difference
471
471
  replayed_result = DecisionAgent::Replay.run(modified_payload, strict: false)
@@ -489,7 +489,7 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
489
489
  decision: "approve",
490
490
  confidence: 1.0,
491
491
  scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
492
- agent_version: "0.0.1", # Old version
492
+ agent_version: "0.0.1", # Old version
493
493
  deterministic_hash: "old_hash"
494
494
  }
495
495
 
@@ -514,9 +514,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
514
514
 
515
515
  # Strict mode should still work because version is not part of deterministic comparison
516
516
  # (only decision and confidence are compared in strict mode)
517
- expect {
517
+ expect do
518
518
  DecisionAgent::Replay.run(modified_payload, strict: true)
519
- }.not_to raise_error
519
+ end.not_to raise_error
520
520
  end
521
521
  end
522
522
 
@@ -543,9 +543,9 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
543
543
  }
544
544
 
545
545
  # Should not raise error, just creates new hash during replay
546
- expect {
546
+ expect do
547
547
  DecisionAgent::Replay.run(audit_payload, strict: false)
548
- }.not_to raise_error
548
+ end.not_to raise_error
549
549
 
550
550
  result = DecisionAgent::Replay.run(audit_payload, strict: false)
551
551
  expect(result.decision).to eq("approve")
@@ -582,24 +582,24 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
582
582
 
583
583
  it "validates required fields before replay" do
584
584
  # Missing context
585
- expect {
585
+ expect do
586
586
  DecisionAgent::Replay.run({ decision: "test", confidence: 0.5, evaluations: [] }, strict: true)
587
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
587
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
588
588
 
589
589
  # Missing evaluations
590
- expect {
590
+ expect do
591
591
  DecisionAgent::Replay.run({ context: {}, decision: "test", confidence: 0.5 }, strict: true)
592
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
592
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
593
593
 
594
594
  # Missing decision
595
- expect {
595
+ expect do
596
596
  DecisionAgent::Replay.run({ context: {}, evaluations: [], confidence: 0.5 }, strict: true)
597
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
597
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
598
598
 
599
599
  # Missing confidence
600
- expect {
600
+ expect do
601
601
  DecisionAgent::Replay.run({ context: {}, evaluations: [], decision: "test" }, strict: true)
602
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
602
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
603
603
  end
604
604
 
605
605
  it "handles evaluation with invalid weight" do
@@ -610,7 +610,7 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
610
610
  evaluations: [
611
611
  {
612
612
  decision: "approve",
613
- weight: 2.5, # Weight > 1.0, invalid
613
+ weight: 2.5, # Weight > 1.0, invalid
614
614
  reason: "Test",
615
615
  evaluator_name: "TestEvaluator",
616
616
  metadata: {}
@@ -622,21 +622,21 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
622
622
  }
623
623
 
624
624
  # Invalid weight (> 1.0) should raise error when creating Evaluation
625
- expect {
625
+ expect do
626
626
  DecisionAgent::Replay.run(audit_payload, strict: false)
627
- }.to raise_error(DecisionAgent::InvalidWeightError)
627
+ end.to raise_error(DecisionAgent::InvalidWeightError)
628
628
  end
629
629
 
630
630
  it "handles completely empty audit payload" do
631
- expect {
631
+ expect do
632
632
  DecisionAgent::Replay.run({}, strict: false)
633
- }.to raise_error(DecisionAgent::InvalidRuleDslError)
633
+ end.to raise_error(DecisionAgent::InvalidRuleDslError)
634
634
  end
635
635
 
636
636
  it "handles nil audit payload" do
637
- expect {
637
+ expect do
638
638
  DecisionAgent::Replay.run(nil, strict: false)
639
- }.to raise_error
639
+ end.to raise_error
640
640
  end
641
641
  end
642
642
 
@@ -657,14 +657,14 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
657
657
  ],
658
658
  decision: "approve",
659
659
  confidence: 0.9,
660
- scoring_strategy: "DecisionAgent::Scoring::OldStrategyName", # Renamed or deleted
660
+ scoring_strategy: "DecisionAgent::Scoring::OldStrategyName", # Renamed or deleted
661
661
  agent_version: "0.1.0"
662
662
  }
663
663
 
664
664
  # Should fall back to default strategy (WeightedAverage)
665
- expect {
665
+ expect do
666
666
  DecisionAgent::Replay.run(audit_payload, strict: false)
667
- }.not_to raise_error
667
+ end.not_to raise_error
668
668
 
669
669
  result = DecisionAgent::Replay.run(audit_payload, strict: false)
670
670
  expect(result.decision).to eq("approve")
@@ -686,7 +686,7 @@ RSpec.describe "DecisionAgent::Replay Edge Cases" do
686
686
  ],
687
687
  decision: "approve",
688
688
  confidence: 0.85,
689
- scoring_strategy: "MyCompany::CustomMLBasedScoringStrategy", # Custom strategy
689
+ scoring_strategy: "MyCompany::CustomMLBasedScoringStrategy", # Custom strategy
690
690
  agent_version: "0.1.0"
691
691
  }
692
692
 
data/spec/replay_spec.rb CHANGED
@@ -34,9 +34,9 @@ RSpec.describe DecisionAgent::Replay do
34
34
  modified_payload = original_result.audit_payload.dup
35
35
  modified_payload[:decision] = "reject"
36
36
 
37
- expect {
37
+ expect do
38
38
  DecisionAgent::Replay.run(modified_payload, strict: true)
39
- }.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
39
+ end.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
40
40
  expect(error.differences).to include(/decision mismatch/)
41
41
  expect(error.expected[:decision]).to eq("reject")
42
42
  expect(error.actual[:decision]).to eq("approve")
@@ -50,9 +50,9 @@ RSpec.describe DecisionAgent::Replay do
50
50
  modified_payload = original_result.audit_payload.dup
51
51
  modified_payload[:confidence] = 0.5
52
52
 
53
- expect {
53
+ expect do
54
54
  DecisionAgent::Replay.run(modified_payload, strict: true)
55
- }.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
55
+ end.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
56
56
  expect(error.differences).to include(/confidence mismatch/)
57
57
  end
58
58
  end
@@ -64,9 +64,9 @@ RSpec.describe DecisionAgent::Replay do
64
64
  modified_payload = original_result.audit_payload.dup
65
65
  modified_payload[:decision] = "reject"
66
66
 
67
- expect {
67
+ expect do
68
68
  DecisionAgent::Replay.run(modified_payload, strict: false)
69
- }.not_to raise_error
69
+ end.not_to raise_error
70
70
  end
71
71
 
72
72
  it "logs differences in non-strict mode" do
@@ -76,17 +76,17 @@ RSpec.describe DecisionAgent::Replay do
76
76
  modified_payload = original_result.audit_payload.dup
77
77
  modified_payload[:decision] = "reject"
78
78
 
79
- expect {
79
+ expect do
80
80
  DecisionAgent::Replay.run(modified_payload, strict: false)
81
- }.to output(/Decision changed/).to_stderr
81
+ end.to output(/Decision changed/).to_stderr
82
82
  end
83
83
 
84
84
  it "validates required fields in audit payload" do
85
85
  invalid_payload = { context: {} }
86
86
 
87
- expect {
87
+ expect do
88
88
  DecisionAgent::Replay.run(invalid_payload, strict: true)
89
- }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key/)
89
+ end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key/)
90
90
  end
91
91
 
92
92
  it "reconstructs evaluations from audit payload" do
@@ -112,7 +112,7 @@ RSpec.describe DecisionAgent::Replay do
112
112
  )
113
113
 
114
114
  expect(replayed_result.evaluations.size).to eq(2)
115
- expect(replayed_result.evaluations.map(&:evaluator_name)).to match_array(["Evaluator1", "Evaluator2"])
115
+ expect(replayed_result.evaluations.map(&:evaluator_name)).to match_array(%w[Evaluator1 Evaluator2])
116
116
  end
117
117
 
118
118
  it "uses correct scoring strategy from audit payload" do