decision_agent 0.3.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +272 -7
  3. data/lib/decision_agent/agent.rb +72 -1
  4. data/lib/decision_agent/context.rb +1 -0
  5. data/lib/decision_agent/data_enrichment/cache/memory_adapter.rb +86 -0
  6. data/lib/decision_agent/data_enrichment/cache_adapter.rb +49 -0
  7. data/lib/decision_agent/data_enrichment/circuit_breaker.rb +135 -0
  8. data/lib/decision_agent/data_enrichment/client.rb +220 -0
  9. data/lib/decision_agent/data_enrichment/config.rb +78 -0
  10. data/lib/decision_agent/data_enrichment/errors.rb +36 -0
  11. data/lib/decision_agent/decision.rb +102 -2
  12. data/lib/decision_agent/dmn/feel/evaluator.rb +28 -6
  13. data/lib/decision_agent/dsl/condition_evaluator.rb +982 -839
  14. data/lib/decision_agent/dsl/schema_validator.rb +51 -13
  15. data/lib/decision_agent/evaluators/dmn_evaluator.rb +106 -19
  16. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +69 -9
  17. data/lib/decision_agent/explainability/condition_trace.rb +83 -0
  18. data/lib/decision_agent/explainability/explainability_result.rb +52 -0
  19. data/lib/decision_agent/explainability/rule_trace.rb +39 -0
  20. data/lib/decision_agent/explainability/trace_collector.rb +24 -0
  21. data/lib/decision_agent/monitoring/alert_manager.rb +5 -1
  22. data/lib/decision_agent/simulation/errors.rb +18 -0
  23. data/lib/decision_agent/simulation/impact_analyzer.rb +498 -0
  24. data/lib/decision_agent/simulation/monte_carlo_simulator.rb +635 -0
  25. data/lib/decision_agent/simulation/replay_engine.rb +486 -0
  26. data/lib/decision_agent/simulation/scenario_engine.rb +318 -0
  27. data/lib/decision_agent/simulation/scenario_library.rb +163 -0
  28. data/lib/decision_agent/simulation/shadow_test_engine.rb +287 -0
  29. data/lib/decision_agent/simulation/what_if_analyzer.rb +1002 -0
  30. data/lib/decision_agent/simulation.rb +17 -0
  31. data/lib/decision_agent/version.rb +1 -1
  32. data/lib/decision_agent/versioning/activerecord_adapter.rb +23 -8
  33. data/lib/decision_agent/web/public/app.js +119 -0
  34. data/lib/decision_agent/web/public/index.html +49 -0
  35. data/lib/decision_agent/web/public/simulation.html +130 -0
  36. data/lib/decision_agent/web/public/simulation_impact.html +478 -0
  37. data/lib/decision_agent/web/public/simulation_replay.html +551 -0
  38. data/lib/decision_agent/web/public/simulation_shadow.html +546 -0
  39. data/lib/decision_agent/web/public/simulation_whatif.html +532 -0
  40. data/lib/decision_agent/web/public/styles.css +65 -0
  41. data/lib/decision_agent/web/server.rb +594 -23
  42. data/lib/decision_agent.rb +60 -2
  43. metadata +53 -73
  44. data/spec/ab_testing/ab_test_assignment_spec.rb +0 -253
  45. data/spec/ab_testing/ab_test_manager_spec.rb +0 -612
  46. data/spec/ab_testing/ab_test_spec.rb +0 -270
  47. data/spec/ab_testing/ab_testing_agent_spec.rb +0 -655
  48. data/spec/ab_testing/storage/adapter_spec.rb +0 -64
  49. data/spec/ab_testing/storage/memory_adapter_spec.rb +0 -485
  50. data/spec/activerecord_thread_safety_spec.rb +0 -553
  51. data/spec/advanced_operators_spec.rb +0 -3150
  52. data/spec/agent_spec.rb +0 -289
  53. data/spec/api_contract_spec.rb +0 -430
  54. data/spec/audit_adapters_spec.rb +0 -92
  55. data/spec/auth/access_audit_logger_spec.rb +0 -394
  56. data/spec/auth/authenticator_spec.rb +0 -112
  57. data/spec/auth/password_reset_spec.rb +0 -294
  58. data/spec/auth/permission_checker_spec.rb +0 -207
  59. data/spec/auth/permission_spec.rb +0 -73
  60. data/spec/auth/rbac_adapter_spec.rb +0 -778
  61. data/spec/auth/rbac_config_spec.rb +0 -82
  62. data/spec/auth/role_spec.rb +0 -51
  63. data/spec/auth/session_manager_spec.rb +0 -172
  64. data/spec/auth/session_spec.rb +0 -112
  65. data/spec/auth/user_spec.rb +0 -130
  66. data/spec/comprehensive_edge_cases_spec.rb +0 -1777
  67. data/spec/context_spec.rb +0 -127
  68. data/spec/decision_agent_spec.rb +0 -96
  69. data/spec/decision_spec.rb +0 -423
  70. data/spec/dmn/decision_graph_spec.rb +0 -282
  71. data/spec/dmn/decision_tree_spec.rb +0 -203
  72. data/spec/dmn/feel/errors_spec.rb +0 -18
  73. data/spec/dmn/feel/functions_spec.rb +0 -400
  74. data/spec/dmn/feel/simple_parser_spec.rb +0 -274
  75. data/spec/dmn/feel/types_spec.rb +0 -176
  76. data/spec/dmn/feel_parser_spec.rb +0 -489
  77. data/spec/dmn/hit_policy_spec.rb +0 -202
  78. data/spec/dmn/integration_spec.rb +0 -226
  79. data/spec/dsl/condition_evaluator_spec.rb +0 -774
  80. data/spec/dsl_validation_spec.rb +0 -648
  81. data/spec/edge_cases_spec.rb +0 -353
  82. data/spec/evaluation_spec.rb +0 -364
  83. data/spec/evaluation_validator_spec.rb +0 -165
  84. data/spec/examples/feedback_aware_evaluator_spec.rb +0 -460
  85. data/spec/examples.txt +0 -1909
  86. data/spec/fixtures/dmn/complex_decision.dmn +0 -81
  87. data/spec/fixtures/dmn/invalid_structure.dmn +0 -31
  88. data/spec/fixtures/dmn/simple_decision.dmn +0 -40
  89. data/spec/issue_verification_spec.rb +0 -759
  90. data/spec/json_rule_evaluator_spec.rb +0 -587
  91. data/spec/monitoring/alert_manager_spec.rb +0 -378
  92. data/spec/monitoring/metrics_collector_spec.rb +0 -501
  93. data/spec/monitoring/monitored_agent_spec.rb +0 -225
  94. data/spec/monitoring/prometheus_exporter_spec.rb +0 -242
  95. data/spec/monitoring/storage/activerecord_adapter_spec.rb +0 -498
  96. data/spec/monitoring/storage/base_adapter_spec.rb +0 -61
  97. data/spec/monitoring/storage/memory_adapter_spec.rb +0 -247
  98. data/spec/performance_optimizations_spec.rb +0 -493
  99. data/spec/replay_edge_cases_spec.rb +0 -699
  100. data/spec/replay_spec.rb +0 -210
  101. data/spec/rfc8785_canonicalization_spec.rb +0 -215
  102. data/spec/scoring_spec.rb +0 -225
  103. data/spec/spec_helper.rb +0 -60
  104. data/spec/testing/batch_test_importer_spec.rb +0 -693
  105. data/spec/testing/batch_test_runner_spec.rb +0 -307
  106. data/spec/testing/test_coverage_analyzer_spec.rb +0 -292
  107. data/spec/testing/test_result_comparator_spec.rb +0 -392
  108. data/spec/testing/test_scenario_spec.rb +0 -113
  109. data/spec/thread_safety_spec.rb +0 -490
  110. data/spec/thread_safety_spec.rb.broken +0 -878
  111. data/spec/versioning/adapter_spec.rb +0 -156
  112. data/spec/versioning_spec.rb +0 -1030
  113. data/spec/web/middleware/auth_middleware_spec.rb +0 -133
  114. data/spec/web/middleware/permission_middleware_spec.rb +0 -247
  115. data/spec/web_ui_rack_spec.rb +0 -2134
@@ -1,699 +0,0 @@
1
- require "spec_helper"
2
-
3
- RSpec.describe "DecisionAgent::Replay Edge Cases" do
4
- describe "handling rule changes" do
5
- let(:original_rules) do
6
- {
7
- version: "1.0",
8
- ruleset: "approval",
9
- rules: [
10
- {
11
- id: "auto_approve",
12
- if: { field: "score", op: "gte", value: 80 },
13
- then: { decision: "approve", weight: 0.9, reason: "High score" }
14
- }
15
- ]
16
- }
17
- end
18
-
19
- let(:modified_rules) do
20
- {
21
- version: "2.0",
22
- ruleset: "approval",
23
- rules: [
24
- {
25
- id: "auto_approve",
26
- if: { field: "score", op: "gte", value: 90 }, # Changed threshold
27
- then: { decision: "approve", weight: 0.9, reason: "Very high score" }
28
- }
29
- ]
30
- }
31
- end
32
-
33
- it "successfully replays with strict mode when rules haven't changed" do
34
- evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
35
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
36
-
37
- original_result = agent.decide(context: { score: 85 })
38
-
39
- expect do
40
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
41
- end.not_to raise_error
42
- end
43
-
44
- it "detects differences in strict mode when rules have changed" do
45
- # Original decision with old rules
46
- evaluator_v1 = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
47
- agent_v1 = DecisionAgent::Agent.new(evaluators: [evaluator_v1])
48
- original_result = agent_v1.decide(context: { score: 85 })
49
-
50
- # Now the rules have changed (threshold increased from 80 to 90)
51
- # Score of 85 no longer matches, so replay should detect a difference
52
-
53
- # Replay uses the stored evaluations (not re-evaluating rules)
54
- # So it should succeed because replay uses static evaluators from the audit payload
55
- expect do
56
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
57
- end.not_to raise_error
58
-
59
- # The replayed result should match the original
60
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
61
- expect(replayed_result.decision).to eq(original_result.decision)
62
- expect(replayed_result.confidence).to eq(original_result.confidence)
63
- end
64
-
65
- it "allows evolution in non-strict mode" do
66
- evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
67
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
68
-
69
- original_result = agent.decide(context: { score: 85 })
70
-
71
- # In non-strict mode, differences are logged but don't raise errors
72
- expect do
73
- DecisionAgent::Replay.run(original_result.audit_payload, strict: false)
74
- end.not_to raise_error
75
- end
76
- end
77
-
78
- describe "metadata comparison" do
79
- it "preserves and replays metadata correctly" do
80
- rules = {
81
- version: "1.0",
82
- ruleset: "test",
83
- rules: [
84
- {
85
- id: "metadata_test_rule",
86
- if: { field: "user", op: "eq", value: "alice" },
87
- then: {
88
- decision: "approve",
89
- weight: 0.8,
90
- reason: "Trusted user"
91
- }
92
- }
93
- ]
94
- }
95
-
96
- evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
97
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
98
-
99
- original_result = agent.decide(context: { user: "alice" })
100
-
101
- # Verify metadata is in the audit payload
102
- expect(original_result.audit_payload[:evaluations].first[:metadata]).to include(
103
- rule_id: "metadata_test_rule"
104
- )
105
-
106
- # Replay should preserve metadata
107
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
108
-
109
- expect(replayed_result.evaluations.first.metadata).to eq(
110
- original_result.evaluations.first.metadata
111
- )
112
- end
113
-
114
- it "handles metadata from static evaluators" do
115
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
116
- decision: "approve",
117
- weight: 0.7,
118
- reason: "No custom metadata"
119
- )
120
-
121
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
122
- original_result = agent.decide(context: { user: "bob" })
123
-
124
- # StaticEvaluator adds type: "static" by default
125
- expect(original_result.evaluations.first.metadata).to eq({ type: "static" })
126
-
127
- expect do
128
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
129
- end.not_to raise_error
130
-
131
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
132
- expect(replayed_result.evaluations.first.metadata).to eq({ type: "static" })
133
- end
134
-
135
- it "handles complex nested metadata" do
136
- evaluation = DecisionAgent::Evaluation.new(
137
- decision: "escalate",
138
- weight: 0.85,
139
- reason: "Complex case",
140
- evaluator_name: "CustomEvaluator",
141
- metadata: {
142
- user: { id: 123, role: "admin" },
143
- tags: %w[urgent important],
144
- history: [
145
- { action: "created", timestamp: "2025-01-01" },
146
- { action: "updated", timestamp: "2025-01-02" }
147
- ]
148
- }
149
- )
150
-
151
- static_eval = DecisionAgent::Evaluators::StaticEvaluator.new(
152
- decision: evaluation.decision,
153
- weight: evaluation.weight,
154
- reason: evaluation.reason
155
- )
156
-
157
- agent = DecisionAgent::Agent.new(evaluators: [static_eval])
158
- original_result = agent.decide(context: { test: true })
159
-
160
- # Manually construct audit payload with complex metadata
161
- payload = original_result.audit_payload.dup
162
- payload[:evaluations] = [evaluation.to_h]
163
-
164
- replayed_result = DecisionAgent::Replay.run(payload, strict: false)
165
-
166
- expect(replayed_result.evaluations.first.metadata).to be_a(Hash)
167
- end
168
- end
169
-
170
- describe "handling missing evaluators in replay" do
171
- it "replays successfully even if original evaluator class doesn't exist" do
172
- # This simulates a scenario where we had a CustomEvaluator that no longer exists
173
- # but we can still replay the decision from the audit log
174
-
175
- # WeightedAverage normalizes confidence: with one eval of weight 0.9, confidence = 0.9/0.9 = 1.0
176
- # So we need to use the correct confidence value that WeightedAverage would produce
177
- audit_payload = {
178
- timestamp: "2025-01-15T10:00:00.123456Z",
179
- context: { user: "charlie", action: "login" },
180
- feedback: {},
181
- evaluations: [
182
- {
183
- decision: "allow",
184
- weight: 0.9,
185
- reason: "User authenticated successfully",
186
- evaluator_name: "DeletedCustomAuthEvaluator", # This evaluator no longer exists
187
- metadata: { auth_method: "oauth", provider: "google" }
188
- }
189
- ],
190
- decision: "allow",
191
- confidence: 1.0, # WeightedAverage normalizes single eval to 1.0
192
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
193
- agent_version: "0.1.0",
194
- deterministic_hash: "abc123"
195
- }
196
-
197
- # Replay should work because it uses StaticEvaluator, not the original evaluator
198
- expect do
199
- DecisionAgent::Replay.run(audit_payload, strict: true)
200
- end.not_to raise_error
201
-
202
- replayed_result = DecisionAgent::Replay.run(audit_payload, strict: true)
203
-
204
- expect(replayed_result.decision).to eq("allow")
205
- expect(replayed_result.confidence).to eq(1.0)
206
- expect(replayed_result.evaluations.first.evaluator_name).to eq("DeletedCustomAuthEvaluator")
207
- end
208
-
209
- it "handles multiple evaluators where some are missing" do
210
- # WeightedAverage with two evals agreeing: confidence = (0.8 + 0.7) / (0.8 + 0.7) = 1.0
211
- audit_payload = {
212
- timestamp: "2025-01-15T10:00:00.123456Z",
213
- context: { user: "dave" },
214
- feedback: {},
215
- evaluations: [
216
- {
217
- decision: "approve",
218
- weight: 0.8,
219
- reason: "Rule matched",
220
- evaluator_name: "RuleEngine",
221
- metadata: { rule_id: "rule_123" }
222
- },
223
- {
224
- decision: "approve",
225
- weight: 0.7,
226
- reason: "ML model prediction",
227
- evaluator_name: "NonExistentMLEvaluator", # Missing evaluator
228
- metadata: { model_version: "v2.1" }
229
- }
230
- ],
231
- decision: "approve",
232
- confidence: 1.0, # Both agree, so 100% confidence
233
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
234
- agent_version: "0.1.0",
235
- deterministic_hash: "def456"
236
- }
237
-
238
- replayed_result = DecisionAgent::Replay.run(audit_payload, strict: true)
239
-
240
- expect(replayed_result.decision).to eq("approve")
241
- expect(replayed_result.evaluations.size).to eq(2)
242
- expect(replayed_result.evaluations.map(&:evaluator_name)).to include("NonExistentMLEvaluator")
243
- end
244
- end
245
-
246
- describe "scoring strategy evolution" do
247
- it "handles unknown scoring strategies gracefully" do
248
- audit_payload = {
249
- timestamp: "2025-01-15T10:00:00.123456Z",
250
- context: { test: true },
251
- feedback: {},
252
- evaluations: [
253
- {
254
- decision: "approve",
255
- weight: 0.9,
256
- reason: "Test",
257
- evaluator_name: "TestEvaluator",
258
- metadata: {}
259
- }
260
- ],
261
- decision: "approve",
262
- confidence: 0.9,
263
- scoring_strategy: "DecisionAgent::Scoring::DeprecatedBayesianStrategy", # Doesn't exist
264
- agent_version: "0.1.0",
265
- deterministic_hash: "ghi789"
266
- }
267
-
268
- # Should fall back to WeightedAverage
269
- expect do
270
- DecisionAgent::Replay.run(audit_payload, strict: false)
271
- end.not_to raise_error
272
-
273
- replayed_result = DecisionAgent::Replay.run(audit_payload, strict: false)
274
- expect(replayed_result.decision).to eq("approve")
275
- end
276
-
277
- it "detects scoring strategy mismatch in strict mode" do
278
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
279
- decision: "approve",
280
- weight: 0.6,
281
- reason: "Test"
282
- )
283
-
284
- # Create decision with WeightedAverage
285
- agent_weighted = DecisionAgent::Agent.new(
286
- evaluators: [evaluator],
287
- scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
288
- )
289
-
290
- original_result = agent_weighted.decide(context: { test: true })
291
-
292
- # Replay uses the stored scoring strategy from the audit payload
293
- # So it should replay successfully
294
- expect do
295
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
296
- end.not_to raise_error
297
- end
298
- end
299
-
300
- describe "audit payload validation" do
301
- it "requires context field" do
302
- incomplete_payload = {
303
- evaluations: [],
304
- decision: "test",
305
- confidence: 0.5
306
- }
307
-
308
- expect do
309
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
310
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: context/)
311
- end
312
-
313
- it "requires evaluations field" do
314
- incomplete_payload = {
315
- context: { test: true },
316
- decision: "test",
317
- confidence: 0.5
318
- }
319
-
320
- expect do
321
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
322
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: evaluations/)
323
- end
324
-
325
- it "requires decision field" do
326
- incomplete_payload = {
327
- context: { test: true },
328
- evaluations: [],
329
- confidence: 0.5
330
- }
331
-
332
- expect do
333
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
334
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: decision/)
335
- end
336
-
337
- it "requires confidence field" do
338
- incomplete_payload = {
339
- context: { test: true },
340
- evaluations: [],
341
- decision: "test"
342
- }
343
-
344
- expect do
345
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
346
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: confidence/)
347
- end
348
-
349
- it "accepts both symbol and string keys" do
350
- # Use MaxWeight strategy which preserves the original weight as confidence
351
- payload_with_strings = {
352
- "timestamp" => "2025-01-15T10:00:00.123456Z",
353
- "context" => { "test" => true },
354
- "feedback" => {},
355
- "evaluations" => [
356
- {
357
- "decision" => "approve",
358
- "weight" => 0.9,
359
- "reason" => "Test",
360
- "evaluator_name" => "TestEvaluator",
361
- "metadata" => {}
362
- }
363
- ],
364
- "decision" => "approve",
365
- "confidence" => 0.9,
366
- "scoring_strategy" => "DecisionAgent::Scoring::MaxWeight"
367
- }
368
-
369
- expect do
370
- DecisionAgent::Replay.run(payload_with_strings, strict: true)
371
- end.not_to raise_error
372
- end
373
- end
374
-
375
- describe "deterministic hash verification" do
376
- it "can verify replay produced the same deterministic hash" do
377
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
378
- decision: "approve",
379
- weight: 0.8,
380
- reason: "Test"
381
- )
382
-
383
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
384
- original_result = agent.decide(context: { user: "test" })
385
-
386
- original_hash = original_result.audit_payload[:deterministic_hash]
387
-
388
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
389
- replayed_hash = replayed_result.audit_payload[:deterministic_hash]
390
-
391
- # Hashes should match because same context, evaluations, decision, confidence, and strategy
392
- expect(replayed_hash).to eq(original_hash)
393
- end
394
-
395
- it "hash changes when context changes" do
396
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
397
- decision: "approve",
398
- weight: 0.8,
399
- reason: "Test"
400
- )
401
-
402
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
403
-
404
- result1 = agent.decide(context: { user: "alice" })
405
- result2 = agent.decide(context: { user: "bob" })
406
-
407
- expect(result1.audit_payload[:deterministic_hash]).not_to eq(
408
- result2.audit_payload[:deterministic_hash]
409
- )
410
- end
411
- end
412
-
413
- describe "feedback preservation in replay" do
414
- it "preserves original feedback in replay" do
415
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
416
- decision: "approve",
417
- weight: 0.8,
418
- reason: "Test"
419
- )
420
-
421
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
422
-
423
- original_feedback = { user_id: "manager_123", source: "manual_review" }
424
- original_result = agent.decide(context: { test: true }, feedback: original_feedback)
425
-
426
- expect(original_result.audit_payload[:feedback]).to eq(original_feedback)
427
-
428
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
429
-
430
- expect(replayed_result.audit_payload[:feedback]).to eq(original_feedback)
431
- end
432
-
433
- it "handles empty feedback" do
434
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
435
- decision: "approve",
436
- weight: 0.8,
437
- reason: "Test"
438
- )
439
-
440
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
441
- original_result = agent.decide(context: { test: true })
442
-
443
- expect(original_result.audit_payload[:feedback]).to eq({})
444
-
445
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
446
- expect(replayed_result.audit_payload[:feedback]).to eq({})
447
- end
448
- end
449
-
450
- describe "version mismatch scenarios" do
451
- it "logs warning when agent_version differs in non-strict mode" do
452
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
453
- decision: "approve",
454
- weight: 0.8,
455
- reason: "Test"
456
- )
457
-
458
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
459
- original_result = agent.decide(context: { test: true })
460
-
461
- # Modify agent_version
462
- modified_payload = original_result.audit_payload.dup
463
- modified_payload[:agent_version] = "99.0.0" # Different version
464
-
465
- # Non-strict mode should log but not raise
466
- expect do
467
- DecisionAgent::Replay.run(modified_payload, strict: false)
468
- end.not_to raise_error
469
-
470
- # Should successfully replay despite version difference
471
- replayed_result = DecisionAgent::Replay.run(modified_payload, strict: false)
472
- expect(replayed_result.decision).to eq("approve")
473
- end
474
-
475
- it "accepts different agent_version in non-strict mode" do
476
- audit_payload = {
477
- timestamp: "2025-01-15T10:00:00.123456Z",
478
- context: { test: true },
479
- feedback: {},
480
- evaluations: [
481
- {
482
- decision: "approve",
483
- weight: 0.9,
484
- reason: "Test",
485
- evaluator_name: "TestEvaluator",
486
- metadata: {}
487
- }
488
- ],
489
- decision: "approve",
490
- confidence: 1.0,
491
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
492
- agent_version: "0.0.1", # Old version
493
- deterministic_hash: "old_hash"
494
- }
495
-
496
- # Should accept and replay successfully
497
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
498
- expect(result.decision).to eq("approve")
499
- end
500
-
501
- it "replays successfully in strict mode regardless of version" do
502
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
503
- decision: "approve",
504
- weight: 0.8,
505
- reason: "Test"
506
- )
507
-
508
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
509
- original_result = agent.decide(context: { test: true })
510
-
511
- # Modify agent_version
512
- modified_payload = original_result.audit_payload.dup
513
- modified_payload[:agent_version] = "2.0.0"
514
-
515
- # Strict mode should still work because version is not part of deterministic comparison
516
- # (only decision and confidence are compared in strict mode)
517
- expect do
518
- DecisionAgent::Replay.run(modified_payload, strict: true)
519
- end.not_to raise_error
520
- end
521
- end
522
-
523
- describe "corrupted audit payload scenarios" do
524
- it "handles missing deterministic_hash gracefully" do
525
- audit_payload = {
526
- timestamp: "2025-01-15T10:00:00.123456Z",
527
- context: { test: true },
528
- feedback: {},
529
- evaluations: [
530
- {
531
- decision: "approve",
532
- weight: 0.9,
533
- reason: "Test",
534
- evaluator_name: "TestEvaluator",
535
- metadata: {}
536
- }
537
- ],
538
- decision: "approve",
539
- confidence: 1.0,
540
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
541
- agent_version: "0.1.0"
542
- # deterministic_hash is missing
543
- }
544
-
545
- # Should not raise error, just creates new hash during replay
546
- expect do
547
- DecisionAgent::Replay.run(audit_payload, strict: false)
548
- end.not_to raise_error
549
-
550
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
551
- expect(result.decision).to eq("approve")
552
- expect(result.audit_payload[:deterministic_hash]).to be_a(String)
553
- end
554
-
555
- it "handles invalid deterministic_hash gracefully" do
556
- audit_payload = {
557
- timestamp: "2025-01-15T10:00:00.123456Z",
558
- context: { test: true },
559
- feedback: {},
560
- evaluations: [
561
- {
562
- decision: "approve",
563
- weight: 0.9,
564
- reason: "Test",
565
- evaluator_name: "TestEvaluator",
566
- metadata: {}
567
- }
568
- ],
569
- decision: "approve",
570
- confidence: 1.0,
571
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
572
- agent_version: "0.1.0",
573
- deterministic_hash: "corrupted_invalid_hash_12345"
574
- }
575
-
576
- # Should replay successfully, generating new hash
577
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
578
- expect(result.decision).to eq("approve")
579
- # New hash should be different from corrupted one
580
- expect(result.audit_payload[:deterministic_hash]).not_to eq("corrupted_invalid_hash_12345")
581
- end
582
-
583
- it "validates required fields before replay" do
584
- # Missing context
585
- expect do
586
- DecisionAgent::Replay.run({ decision: "test", confidence: 0.5, evaluations: [] }, strict: true)
587
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
588
-
589
- # Missing evaluations
590
- expect do
591
- DecisionAgent::Replay.run({ context: {}, decision: "test", confidence: 0.5 }, strict: true)
592
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
593
-
594
- # Missing decision
595
- expect do
596
- DecisionAgent::Replay.run({ context: {}, evaluations: [], confidence: 0.5 }, strict: true)
597
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
598
-
599
- # Missing confidence
600
- expect do
601
- DecisionAgent::Replay.run({ context: {}, evaluations: [], decision: "test" }, strict: true)
602
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
603
- end
604
-
605
- it "handles evaluation with invalid weight" do
606
- audit_payload = {
607
- timestamp: "2025-01-15T10:00:00.123456Z",
608
- context: { test: true },
609
- feedback: {},
610
- evaluations: [
611
- {
612
- decision: "approve",
613
- weight: 2.5, # Weight > 1.0, invalid
614
- reason: "Test",
615
- evaluator_name: "TestEvaluator",
616
- metadata: {}
617
- }
618
- ],
619
- decision: "approve",
620
- confidence: 1.0,
621
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage"
622
- }
623
-
624
- # Invalid weight (> 1.0) should raise error when creating Evaluation
625
- expect do
626
- DecisionAgent::Replay.run(audit_payload, strict: false)
627
- end.to raise_error(DecisionAgent::InvalidWeightError)
628
- end
629
-
630
- it "handles completely empty audit payload" do
631
- expect do
632
- DecisionAgent::Replay.run({}, strict: false)
633
- end.to raise_error(DecisionAgent::InvalidRuleDslError)
634
- end
635
-
636
- it "handles nil audit payload" do
637
- expect do
638
- DecisionAgent::Replay.run(nil, strict: false)
639
- end.to raise_error
640
- end
641
- end
642
-
643
- describe "scoring strategy class rename scenarios" do
644
- it "handles renamed scoring strategy class in non-strict mode" do
645
- audit_payload = {
646
- timestamp: "2025-01-15T10:00:00.123456Z",
647
- context: { test: true },
648
- feedback: {},
649
- evaluations: [
650
- {
651
- decision: "approve",
652
- weight: 0.9,
653
- reason: "Test",
654
- evaluator_name: "TestEvaluator",
655
- metadata: {}
656
- }
657
- ],
658
- decision: "approve",
659
- confidence: 0.9,
660
- scoring_strategy: "DecisionAgent::Scoring::OldStrategyName", # Renamed or deleted
661
- agent_version: "0.1.0"
662
- }
663
-
664
- # Should fall back to default strategy (WeightedAverage)
665
- expect do
666
- DecisionAgent::Replay.run(audit_payload, strict: false)
667
- end.not_to raise_error
668
-
669
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
670
- expect(result.decision).to eq("approve")
671
- end
672
-
673
- it "handles custom scoring strategy not in current codebase" do
674
- audit_payload = {
675
- timestamp: "2025-01-15T10:00:00.123456Z",
676
- context: { test: true },
677
- feedback: {},
678
- evaluations: [
679
- {
680
- decision: "approve",
681
- weight: 0.85,
682
- reason: "Test",
683
- evaluator_name: "TestEvaluator",
684
- metadata: {}
685
- }
686
- ],
687
- decision: "approve",
688
- confidence: 0.85,
689
- scoring_strategy: "MyCompany::CustomMLBasedScoringStrategy", # Custom strategy
690
- agent_version: "0.1.0"
691
- }
692
-
693
- # Should use fallback strategy
694
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
695
- expect(result).not_to be_nil
696
- expect(result.decision).to eq("approve")
697
- end
698
- end
699
- end