decision_agent 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +1060 -0
  4. data/bin/decision_agent +104 -0
  5. data/lib/decision_agent/agent.rb +147 -0
  6. data/lib/decision_agent/audit/adapter.rb +9 -0
  7. data/lib/decision_agent/audit/logger_adapter.rb +27 -0
  8. data/lib/decision_agent/audit/null_adapter.rb +8 -0
  9. data/lib/decision_agent/context.rb +42 -0
  10. data/lib/decision_agent/decision.rb +51 -0
  11. data/lib/decision_agent/dsl/condition_evaluator.rb +133 -0
  12. data/lib/decision_agent/dsl/rule_parser.rb +36 -0
  13. data/lib/decision_agent/dsl/schema_validator.rb +275 -0
  14. data/lib/decision_agent/errors.rb +62 -0
  15. data/lib/decision_agent/evaluation.rb +52 -0
  16. data/lib/decision_agent/evaluators/base.rb +15 -0
  17. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +51 -0
  18. data/lib/decision_agent/evaluators/static_evaluator.rb +31 -0
  19. data/lib/decision_agent/replay/replay.rb +147 -0
  20. data/lib/decision_agent/scoring/base.rb +19 -0
  21. data/lib/decision_agent/scoring/consensus.rb +40 -0
  22. data/lib/decision_agent/scoring/max_weight.rb +16 -0
  23. data/lib/decision_agent/scoring/threshold.rb +40 -0
  24. data/lib/decision_agent/scoring/weighted_average.rb +26 -0
  25. data/lib/decision_agent/version.rb +3 -0
  26. data/lib/decision_agent/web/public/app.js +580 -0
  27. data/lib/decision_agent/web/public/index.html +190 -0
  28. data/lib/decision_agent/web/public/styles.css +558 -0
  29. data/lib/decision_agent/web/server.rb +255 -0
  30. data/lib/decision_agent.rb +29 -0
  31. data/spec/agent_spec.rb +249 -0
  32. data/spec/api_contract_spec.rb +430 -0
  33. data/spec/audit_adapters_spec.rb +74 -0
  34. data/spec/comprehensive_edge_cases_spec.rb +1777 -0
  35. data/spec/context_spec.rb +84 -0
  36. data/spec/dsl_validation_spec.rb +648 -0
  37. data/spec/edge_cases_spec.rb +353 -0
  38. data/spec/examples/feedback_aware_evaluator_spec.rb +460 -0
  39. data/spec/json_rule_evaluator_spec.rb +587 -0
  40. data/spec/replay_edge_cases_spec.rb +699 -0
  41. data/spec/replay_spec.rb +210 -0
  42. data/spec/scoring_spec.rb +225 -0
  43. data/spec/spec_helper.rb +28 -0
  44. metadata +133 -0
@@ -0,0 +1,699 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe "DecisionAgent::Replay Edge Cases" do
4
+ describe "handling rule changes" do
5
+ let(:original_rules) do
6
+ {
7
+ version: "1.0",
8
+ ruleset: "approval",
9
+ rules: [
10
+ {
11
+ id: "auto_approve",
12
+ if: { field: "score", op: "gte", value: 80 },
13
+ then: { decision: "approve", weight: 0.9, reason: "High score" }
14
+ }
15
+ ]
16
+ }
17
+ end
18
+
19
+ let(:modified_rules) do
20
+ {
21
+ version: "2.0",
22
+ ruleset: "approval",
23
+ rules: [
24
+ {
25
+ id: "auto_approve",
26
+ if: { field: "score", op: "gte", value: 90 }, # Changed threshold
27
+ then: { decision: "approve", weight: 0.9, reason: "Very high score" }
28
+ }
29
+ ]
30
+ }
31
+ end
32
+
33
+ it "successfully replays with strict mode when rules haven't changed" do
34
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
35
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
36
+
37
+ original_result = agent.decide(context: { score: 85 })
38
+
39
+ expect {
40
+ DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
41
+ }.not_to raise_error
42
+ end
43
+
44
+ it "detects differences in strict mode when rules have changed" do
45
+ # Original decision with old rules
46
+ evaluator_v1 = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
47
+ agent_v1 = DecisionAgent::Agent.new(evaluators: [evaluator_v1])
48
+ original_result = agent_v1.decide(context: { score: 85 })
49
+
50
+ # Now the rules have changed (threshold increased from 80 to 90)
51
+ # Score of 85 no longer matches, so replay should detect a difference
52
+
53
+ # Replay uses the stored evaluations (not re-evaluating rules)
54
+ # So it should succeed because replay uses static evaluators from the audit payload
55
+ expect {
56
+ DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
57
+ }.not_to raise_error
58
+
59
+ # The replayed result should match the original
60
+ replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
61
+ expect(replayed_result.decision).to eq(original_result.decision)
62
+ expect(replayed_result.confidence).to eq(original_result.confidence)
63
+ end
64
+
65
+ it "allows evolution in non-strict mode" do
66
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
67
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
68
+
69
+ original_result = agent.decide(context: { score: 85 })
70
+
71
+ # In non-strict mode, differences are logged but don't raise errors
72
+ expect {
73
+ DecisionAgent::Replay.run(original_result.audit_payload, strict: false)
74
+ }.not_to raise_error
75
+ end
76
+ end
77
+
78
+ describe "metadata comparison" do
79
+ it "preserves and replays metadata correctly" do
80
+ rules = {
81
+ version: "1.0",
82
+ ruleset: "test",
83
+ rules: [
84
+ {
85
+ id: "metadata_test_rule",
86
+ if: { field: "user", op: "eq", value: "alice" },
87
+ then: {
88
+ decision: "approve",
89
+ weight: 0.8,
90
+ reason: "Trusted user"
91
+ }
92
+ }
93
+ ]
94
+ }
95
+
96
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
97
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
98
+
99
+ original_result = agent.decide(context: { user: "alice" })
100
+
101
+ # Verify metadata is in the audit payload
102
+ expect(original_result.audit_payload[:evaluations].first[:metadata]).to include(
103
+ rule_id: "metadata_test_rule"
104
+ )
105
+
106
+ # Replay should preserve metadata
107
+ replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
108
+
109
+ expect(replayed_result.evaluations.first.metadata).to eq(
110
+ original_result.evaluations.first.metadata
111
+ )
112
+ end
113
+
114
+ it "handles metadata from static evaluators" do
115
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
116
+ decision: "approve",
117
+ weight: 0.7,
118
+ reason: "No custom metadata"
119
+ )
120
+
121
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
122
+ original_result = agent.decide(context: { user: "bob" })
123
+
124
+ # StaticEvaluator adds type: "static" by default
125
+ expect(original_result.evaluations.first.metadata).to eq({ type: "static" })
126
+
127
+ expect {
128
+ DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
129
+ }.not_to raise_error
130
+
131
+ replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
132
+ expect(replayed_result.evaluations.first.metadata).to eq({ type: "static" })
133
+ end
134
+
135
+ it "handles complex nested metadata" do
136
+ evaluation = DecisionAgent::Evaluation.new(
137
+ decision: "escalate",
138
+ weight: 0.85,
139
+ reason: "Complex case",
140
+ evaluator_name: "CustomEvaluator",
141
+ metadata: {
142
+ user: { id: 123, role: "admin" },
143
+ tags: ["urgent", "important"],
144
+ history: [
145
+ { action: "created", timestamp: "2025-01-01" },
146
+ { action: "updated", timestamp: "2025-01-02" }
147
+ ]
148
+ }
149
+ )
150
+
151
+ static_eval = DecisionAgent::Evaluators::StaticEvaluator.new(
152
+ decision: evaluation.decision,
153
+ weight: evaluation.weight,
154
+ reason: evaluation.reason
155
+ )
156
+
157
+ agent = DecisionAgent::Agent.new(evaluators: [static_eval])
158
+ original_result = agent.decide(context: { test: true })
159
+
160
+ # Manually construct audit payload with complex metadata
161
+ payload = original_result.audit_payload.dup
162
+ payload[:evaluations] = [evaluation.to_h]
163
+
164
+ replayed_result = DecisionAgent::Replay.run(payload, strict: false)
165
+
166
+ expect(replayed_result.evaluations.first.metadata).to be_a(Hash)
167
+ end
168
+ end
169
+
170
+ describe "handling missing evaluators in replay" do
171
+ it "replays successfully even if original evaluator class doesn't exist" do
172
+ # This simulates a scenario where we had a CustomEvaluator that no longer exists
173
+ # but we can still replay the decision from the audit log
174
+
175
+ # WeightedAverage normalizes confidence: with one eval of weight 0.9, confidence = 0.9/0.9 = 1.0
176
+ # So we need to use the correct confidence value that WeightedAverage would produce
177
+ audit_payload = {
178
+ timestamp: "2025-01-15T10:00:00.123456Z",
179
+ context: { user: "charlie", action: "login" },
180
+ feedback: {},
181
+ evaluations: [
182
+ {
183
+ decision: "allow",
184
+ weight: 0.9,
185
+ reason: "User authenticated successfully",
186
+ evaluator_name: "DeletedCustomAuthEvaluator", # This evaluator no longer exists
187
+ metadata: { auth_method: "oauth", provider: "google" }
188
+ }
189
+ ],
190
+ decision: "allow",
191
+ confidence: 1.0, # WeightedAverage normalizes single eval to 1.0
192
+ scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
193
+ agent_version: "0.1.0",
194
+ deterministic_hash: "abc123"
195
+ }
196
+
197
+ # Replay should work because it uses StaticEvaluator, not the original evaluator
198
+ expect {
199
+ DecisionAgent::Replay.run(audit_payload, strict: true)
200
+ }.not_to raise_error
201
+
202
+ replayed_result = DecisionAgent::Replay.run(audit_payload, strict: true)
203
+
204
+ expect(replayed_result.decision).to eq("allow")
205
+ expect(replayed_result.confidence).to eq(1.0)
206
+ expect(replayed_result.evaluations.first.evaluator_name).to eq("DeletedCustomAuthEvaluator")
207
+ end
208
+
209
+ it "handles multiple evaluators where some are missing" do
210
+ # WeightedAverage with two evals agreeing: confidence = (0.8 + 0.7) / (0.8 + 0.7) = 1.0
211
+ audit_payload = {
212
+ timestamp: "2025-01-15T10:00:00.123456Z",
213
+ context: { user: "dave" },
214
+ feedback: {},
215
+ evaluations: [
216
+ {
217
+ decision: "approve",
218
+ weight: 0.8,
219
+ reason: "Rule matched",
220
+ evaluator_name: "RuleEngine",
221
+ metadata: { rule_id: "rule_123" }
222
+ },
223
+ {
224
+ decision: "approve",
225
+ weight: 0.7,
226
+ reason: "ML model prediction",
227
+ evaluator_name: "NonExistentMLEvaluator", # Missing evaluator
228
+ metadata: { model_version: "v2.1" }
229
+ }
230
+ ],
231
+ decision: "approve",
232
+ confidence: 1.0, # Both agree, so 100% confidence
233
+ scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
234
+ agent_version: "0.1.0",
235
+ deterministic_hash: "def456"
236
+ }
237
+
238
+ replayed_result = DecisionAgent::Replay.run(audit_payload, strict: true)
239
+
240
+ expect(replayed_result.decision).to eq("approve")
241
+ expect(replayed_result.evaluations.size).to eq(2)
242
+ expect(replayed_result.evaluations.map(&:evaluator_name)).to include("NonExistentMLEvaluator")
243
+ end
244
+ end
245
+
246
+ describe "scoring strategy evolution" do
247
+ it "handles unknown scoring strategies gracefully" do
248
+ audit_payload = {
249
+ timestamp: "2025-01-15T10:00:00.123456Z",
250
+ context: { test: true },
251
+ feedback: {},
252
+ evaluations: [
253
+ {
254
+ decision: "approve",
255
+ weight: 0.9,
256
+ reason: "Test",
257
+ evaluator_name: "TestEvaluator",
258
+ metadata: {}
259
+ }
260
+ ],
261
+ decision: "approve",
262
+ confidence: 0.9,
263
+ scoring_strategy: "DecisionAgent::Scoring::DeprecatedBayesianStrategy", # Doesn't exist
264
+ agent_version: "0.1.0",
265
+ deterministic_hash: "ghi789"
266
+ }
267
+
268
+ # Should fall back to WeightedAverage
269
+ expect {
270
+ DecisionAgent::Replay.run(audit_payload, strict: false)
271
+ }.not_to raise_error
272
+
273
+ replayed_result = DecisionAgent::Replay.run(audit_payload, strict: false)
274
+ expect(replayed_result.decision).to eq("approve")
275
+ end
276
+
277
+ it "detects scoring strategy mismatch in strict mode" do
278
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
279
+ decision: "approve",
280
+ weight: 0.6,
281
+ reason: "Test"
282
+ )
283
+
284
+ # Create decision with WeightedAverage
285
+ agent_weighted = DecisionAgent::Agent.new(
286
+ evaluators: [evaluator],
287
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
288
+ )
289
+
290
+ original_result = agent_weighted.decide(context: { test: true })
291
+
292
+ # Replay uses the stored scoring strategy from the audit payload
293
+ # So it should replay successfully
294
+ expect {
295
+ DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
296
+ }.not_to raise_error
297
+ end
298
+ end
299
+
300
+ describe "audit payload validation" do
301
+ it "requires context field" do
302
+ incomplete_payload = {
303
+ evaluations: [],
304
+ decision: "test",
305
+ confidence: 0.5
306
+ }
307
+
308
+ expect {
309
+ DecisionAgent::Replay.run(incomplete_payload, strict: false)
310
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: context/)
311
+ end
312
+
313
+ it "requires evaluations field" do
314
+ incomplete_payload = {
315
+ context: { test: true },
316
+ decision: "test",
317
+ confidence: 0.5
318
+ }
319
+
320
+ expect {
321
+ DecisionAgent::Replay.run(incomplete_payload, strict: false)
322
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: evaluations/)
323
+ end
324
+
325
+ it "requires decision field" do
326
+ incomplete_payload = {
327
+ context: { test: true },
328
+ evaluations: [],
329
+ confidence: 0.5
330
+ }
331
+
332
+ expect {
333
+ DecisionAgent::Replay.run(incomplete_payload, strict: false)
334
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: decision/)
335
+ end
336
+
337
+ it "requires confidence field" do
338
+ incomplete_payload = {
339
+ context: { test: true },
340
+ evaluations: [],
341
+ decision: "test"
342
+ }
343
+
344
+ expect {
345
+ DecisionAgent::Replay.run(incomplete_payload, strict: false)
346
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: confidence/)
347
+ end
348
+
349
+ it "accepts both symbol and string keys" do
350
+ # Use MaxWeight strategy which preserves the original weight as confidence
351
+ payload_with_strings = {
352
+ "timestamp" => "2025-01-15T10:00:00.123456Z",
353
+ "context" => { "test" => true },
354
+ "feedback" => {},
355
+ "evaluations" => [
356
+ {
357
+ "decision" => "approve",
358
+ "weight" => 0.9,
359
+ "reason" => "Test",
360
+ "evaluator_name" => "TestEvaluator",
361
+ "metadata" => {}
362
+ }
363
+ ],
364
+ "decision" => "approve",
365
+ "confidence" => 0.9,
366
+ "scoring_strategy" => "DecisionAgent::Scoring::MaxWeight"
367
+ }
368
+
369
+ expect {
370
+ DecisionAgent::Replay.run(payload_with_strings, strict: true)
371
+ }.not_to raise_error
372
+ end
373
+ end
374
+
375
+ describe "deterministic hash verification" do
376
+ it "can verify replay produced the same deterministic hash" do
377
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
378
+ decision: "approve",
379
+ weight: 0.8,
380
+ reason: "Test"
381
+ )
382
+
383
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
384
+ original_result = agent.decide(context: { user: "test" })
385
+
386
+ original_hash = original_result.audit_payload[:deterministic_hash]
387
+
388
+ replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
389
+ replayed_hash = replayed_result.audit_payload[:deterministic_hash]
390
+
391
+ # Hashes should match because same context, evaluations, decision, confidence, and strategy
392
+ expect(replayed_hash).to eq(original_hash)
393
+ end
394
+
395
+ it "hash changes when context changes" do
396
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
397
+ decision: "approve",
398
+ weight: 0.8,
399
+ reason: "Test"
400
+ )
401
+
402
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
403
+
404
+ result1 = agent.decide(context: { user: "alice" })
405
+ result2 = agent.decide(context: { user: "bob" })
406
+
407
+ expect(result1.audit_payload[:deterministic_hash]).not_to eq(
408
+ result2.audit_payload[:deterministic_hash]
409
+ )
410
+ end
411
+ end
412
+
413
+ describe "feedback preservation in replay" do
414
+ it "preserves original feedback in replay" do
415
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
416
+ decision: "approve",
417
+ weight: 0.8,
418
+ reason: "Test"
419
+ )
420
+
421
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
422
+
423
+ original_feedback = { user_id: "manager_123", source: "manual_review" }
424
+ original_result = agent.decide(context: { test: true }, feedback: original_feedback)
425
+
426
+ expect(original_result.audit_payload[:feedback]).to eq(original_feedback)
427
+
428
+ replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
429
+
430
+ expect(replayed_result.audit_payload[:feedback]).to eq(original_feedback)
431
+ end
432
+
433
+ it "handles empty feedback" do
434
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
435
+ decision: "approve",
436
+ weight: 0.8,
437
+ reason: "Test"
438
+ )
439
+
440
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
441
+ original_result = agent.decide(context: { test: true })
442
+
443
+ expect(original_result.audit_payload[:feedback]).to eq({})
444
+
445
+ replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
446
+ expect(replayed_result.audit_payload[:feedback]).to eq({})
447
+ end
448
+ end
449
+
450
+ describe "version mismatch scenarios" do
451
+ it "logs warning when agent_version differs in non-strict mode" do
452
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
453
+ decision: "approve",
454
+ weight: 0.8,
455
+ reason: "Test"
456
+ )
457
+
458
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
459
+ original_result = agent.decide(context: { test: true })
460
+
461
+ # Modify agent_version
462
+ modified_payload = original_result.audit_payload.dup
463
+ modified_payload[:agent_version] = "99.0.0" # Different version
464
+
465
+ # Non-strict mode should log but not raise
466
+ expect {
467
+ DecisionAgent::Replay.run(modified_payload, strict: false)
468
+ }.not_to raise_error
469
+
470
+ # Should successfully replay despite version difference
471
+ replayed_result = DecisionAgent::Replay.run(modified_payload, strict: false)
472
+ expect(replayed_result.decision).to eq("approve")
473
+ end
474
+
475
+ it "accepts different agent_version in non-strict mode" do
476
+ audit_payload = {
477
+ timestamp: "2025-01-15T10:00:00.123456Z",
478
+ context: { test: true },
479
+ feedback: {},
480
+ evaluations: [
481
+ {
482
+ decision: "approve",
483
+ weight: 0.9,
484
+ reason: "Test",
485
+ evaluator_name: "TestEvaluator",
486
+ metadata: {}
487
+ }
488
+ ],
489
+ decision: "approve",
490
+ confidence: 1.0,
491
+ scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
492
+ agent_version: "0.0.1", # Old version
493
+ deterministic_hash: "old_hash"
494
+ }
495
+
496
+ # Should accept and replay successfully
497
+ result = DecisionAgent::Replay.run(audit_payload, strict: false)
498
+ expect(result.decision).to eq("approve")
499
+ end
500
+
501
+ it "replays successfully in strict mode regardless of version" do
502
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
503
+ decision: "approve",
504
+ weight: 0.8,
505
+ reason: "Test"
506
+ )
507
+
508
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
509
+ original_result = agent.decide(context: { test: true })
510
+
511
+ # Modify agent_version
512
+ modified_payload = original_result.audit_payload.dup
513
+ modified_payload[:agent_version] = "2.0.0"
514
+
515
+ # Strict mode should still work because version is not part of deterministic comparison
516
+ # (only decision and confidence are compared in strict mode)
517
+ expect {
518
+ DecisionAgent::Replay.run(modified_payload, strict: true)
519
+ }.not_to raise_error
520
+ end
521
+ end
522
+
523
+ describe "corrupted audit payload scenarios" do
524
+ it "handles missing deterministic_hash gracefully" do
525
+ audit_payload = {
526
+ timestamp: "2025-01-15T10:00:00.123456Z",
527
+ context: { test: true },
528
+ feedback: {},
529
+ evaluations: [
530
+ {
531
+ decision: "approve",
532
+ weight: 0.9,
533
+ reason: "Test",
534
+ evaluator_name: "TestEvaluator",
535
+ metadata: {}
536
+ }
537
+ ],
538
+ decision: "approve",
539
+ confidence: 1.0,
540
+ scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
541
+ agent_version: "0.1.0"
542
+ # deterministic_hash is missing
543
+ }
544
+
545
+ # Should not raise error, just creates new hash during replay
546
+ expect {
547
+ DecisionAgent::Replay.run(audit_payload, strict: false)
548
+ }.not_to raise_error
549
+
550
+ result = DecisionAgent::Replay.run(audit_payload, strict: false)
551
+ expect(result.decision).to eq("approve")
552
+ expect(result.audit_payload[:deterministic_hash]).to be_a(String)
553
+ end
554
+
555
+ it "handles invalid deterministic_hash gracefully" do
556
+ audit_payload = {
557
+ timestamp: "2025-01-15T10:00:00.123456Z",
558
+ context: { test: true },
559
+ feedback: {},
560
+ evaluations: [
561
+ {
562
+ decision: "approve",
563
+ weight: 0.9,
564
+ reason: "Test",
565
+ evaluator_name: "TestEvaluator",
566
+ metadata: {}
567
+ }
568
+ ],
569
+ decision: "approve",
570
+ confidence: 1.0,
571
+ scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
572
+ agent_version: "0.1.0",
573
+ deterministic_hash: "corrupted_invalid_hash_12345"
574
+ }
575
+
576
+ # Should replay successfully, generating new hash
577
+ result = DecisionAgent::Replay.run(audit_payload, strict: false)
578
+ expect(result.decision).to eq("approve")
579
+ # New hash should be different from corrupted one
580
+ expect(result.audit_payload[:deterministic_hash]).not_to eq("corrupted_invalid_hash_12345")
581
+ end
582
+
583
+ it "validates required fields before replay" do
584
+ # Missing context
585
+ expect {
586
+ DecisionAgent::Replay.run({ decision: "test", confidence: 0.5, evaluations: [] }, strict: true)
587
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
588
+
589
+ # Missing evaluations
590
+ expect {
591
+ DecisionAgent::Replay.run({ context: {}, decision: "test", confidence: 0.5 }, strict: true)
592
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
593
+
594
+ # Missing decision
595
+ expect {
596
+ DecisionAgent::Replay.run({ context: {}, evaluations: [], confidence: 0.5 }, strict: true)
597
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
598
+
599
+ # Missing confidence
600
+ expect {
601
+ DecisionAgent::Replay.run({ context: {}, evaluations: [], decision: "test" }, strict: true)
602
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
603
+ end
604
+
605
+ it "handles evaluation with invalid weight" do
606
+ audit_payload = {
607
+ timestamp: "2025-01-15T10:00:00.123456Z",
608
+ context: { test: true },
609
+ feedback: {},
610
+ evaluations: [
611
+ {
612
+ decision: "approve",
613
+ weight: 2.5, # Weight > 1.0, invalid
614
+ reason: "Test",
615
+ evaluator_name: "TestEvaluator",
616
+ metadata: {}
617
+ }
618
+ ],
619
+ decision: "approve",
620
+ confidence: 1.0,
621
+ scoring_strategy: "DecisionAgent::Scoring::WeightedAverage"
622
+ }
623
+
624
+ # Invalid weight (> 1.0) should raise error when creating Evaluation
625
+ expect {
626
+ DecisionAgent::Replay.run(audit_payload, strict: false)
627
+ }.to raise_error(DecisionAgent::InvalidWeightError)
628
+ end
629
+
630
+ it "handles completely empty audit payload" do
631
+ expect {
632
+ DecisionAgent::Replay.run({}, strict: false)
633
+ }.to raise_error(DecisionAgent::InvalidRuleDslError)
634
+ end
635
+
636
+ it "handles nil audit payload" do
637
+ expect {
638
+ DecisionAgent::Replay.run(nil, strict: false)
639
+ }.to raise_error
640
+ end
641
+ end
642
+
643
+ describe "scoring strategy class rename scenarios" do
644
+ it "handles renamed scoring strategy class in non-strict mode" do
645
+ audit_payload = {
646
+ timestamp: "2025-01-15T10:00:00.123456Z",
647
+ context: { test: true },
648
+ feedback: {},
649
+ evaluations: [
650
+ {
651
+ decision: "approve",
652
+ weight: 0.9,
653
+ reason: "Test",
654
+ evaluator_name: "TestEvaluator",
655
+ metadata: {}
656
+ }
657
+ ],
658
+ decision: "approve",
659
+ confidence: 0.9,
660
+ scoring_strategy: "DecisionAgent::Scoring::OldStrategyName", # Renamed or deleted
661
+ agent_version: "0.1.0"
662
+ }
663
+
664
+ # Should fall back to default strategy (WeightedAverage)
665
+ expect {
666
+ DecisionAgent::Replay.run(audit_payload, strict: false)
667
+ }.not_to raise_error
668
+
669
+ result = DecisionAgent::Replay.run(audit_payload, strict: false)
670
+ expect(result.decision).to eq("approve")
671
+ end
672
+
673
+ it "handles custom scoring strategy not in current codebase" do
674
+ audit_payload = {
675
+ timestamp: "2025-01-15T10:00:00.123456Z",
676
+ context: { test: true },
677
+ feedback: {},
678
+ evaluations: [
679
+ {
680
+ decision: "approve",
681
+ weight: 0.85,
682
+ reason: "Test",
683
+ evaluator_name: "TestEvaluator",
684
+ metadata: {}
685
+ }
686
+ ],
687
+ decision: "approve",
688
+ confidence: 0.85,
689
+ scoring_strategy: "MyCompany::CustomMLBasedScoringStrategy", # Custom strategy
690
+ agent_version: "0.1.0"
691
+ }
692
+
693
+ # Should use fallback strategy
694
+ result = DecisionAgent::Replay.run(audit_payload, strict: false)
695
+ expect(result).not_to be_nil
696
+ expect(result.decision).to eq("approve")
697
+ end
698
+ end
699
+ end