decision_agent 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +1060 -0
  4. data/bin/decision_agent +104 -0
  5. data/lib/decision_agent/agent.rb +147 -0
  6. data/lib/decision_agent/audit/adapter.rb +9 -0
  7. data/lib/decision_agent/audit/logger_adapter.rb +27 -0
  8. data/lib/decision_agent/audit/null_adapter.rb +8 -0
  9. data/lib/decision_agent/context.rb +42 -0
  10. data/lib/decision_agent/decision.rb +51 -0
  11. data/lib/decision_agent/dsl/condition_evaluator.rb +133 -0
  12. data/lib/decision_agent/dsl/rule_parser.rb +36 -0
  13. data/lib/decision_agent/dsl/schema_validator.rb +275 -0
  14. data/lib/decision_agent/errors.rb +62 -0
  15. data/lib/decision_agent/evaluation.rb +52 -0
  16. data/lib/decision_agent/evaluators/base.rb +15 -0
  17. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +51 -0
  18. data/lib/decision_agent/evaluators/static_evaluator.rb +31 -0
  19. data/lib/decision_agent/replay/replay.rb +147 -0
  20. data/lib/decision_agent/scoring/base.rb +19 -0
  21. data/lib/decision_agent/scoring/consensus.rb +40 -0
  22. data/lib/decision_agent/scoring/max_weight.rb +16 -0
  23. data/lib/decision_agent/scoring/threshold.rb +40 -0
  24. data/lib/decision_agent/scoring/weighted_average.rb +26 -0
  25. data/lib/decision_agent/version.rb +3 -0
  26. data/lib/decision_agent/web/public/app.js +580 -0
  27. data/lib/decision_agent/web/public/index.html +190 -0
  28. data/lib/decision_agent/web/public/styles.css +558 -0
  29. data/lib/decision_agent/web/server.rb +255 -0
  30. data/lib/decision_agent.rb +29 -0
  31. data/spec/agent_spec.rb +249 -0
  32. data/spec/api_contract_spec.rb +430 -0
  33. data/spec/audit_adapters_spec.rb +74 -0
  34. data/spec/comprehensive_edge_cases_spec.rb +1777 -0
  35. data/spec/context_spec.rb +84 -0
  36. data/spec/dsl_validation_spec.rb +648 -0
  37. data/spec/edge_cases_spec.rb +353 -0
  38. data/spec/examples/feedback_aware_evaluator_spec.rb +460 -0
  39. data/spec/json_rule_evaluator_spec.rb +587 -0
  40. data/spec/replay_edge_cases_spec.rb +699 -0
  41. data/spec/replay_spec.rb +210 -0
  42. data/spec/scoring_spec.rb +225 -0
  43. data/spec/spec_helper.rb +28 -0
  44. metadata +133 -0
@@ -0,0 +1,1777 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe "Comprehensive Edge Cases" do
4
+ # ============================================================
5
+ # JSON Rule DSL Edge Cases
6
+ # ============================================================
7
+
8
+ describe "JSON Rule DSL edge cases" do
9
+ describe "invalid operators" do
10
+ it "raises error when operator is unknown" do
11
+ rules = {
12
+ version: "1.0",
13
+ ruleset: "test",
14
+ rules: [
15
+ {
16
+ id: "rule_1",
17
+ if: { field: "status", op: "unknown_op", value: "active" },
18
+ then: { decision: "approve" }
19
+ }
20
+ ]
21
+ }
22
+
23
+ expect {
24
+ DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
25
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /Unsupported operator 'unknown_op'/)
26
+ end
27
+
28
+ it "raises error when operator is misspelled" do
29
+ rules = {
30
+ version: "1.0",
31
+ ruleset: "test",
32
+ rules: [
33
+ {
34
+ id: "rule_1",
35
+ if: { field: "score", op: "greather_than", value: 10 },
36
+ then: { decision: "pass" }
37
+ }
38
+ ]
39
+ }
40
+
41
+ expect {
42
+ DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
43
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /Unsupported operator 'greather_than'/)
44
+ end
45
+
46
+ it "raises error when operator is nil" do
47
+ rules = {
48
+ version: "1.0",
49
+ ruleset: "test",
50
+ rules: [
51
+ {
52
+ id: "rule_1",
53
+ if: { field: "status", op: nil, value: "active" },
54
+ then: { decision: "approve" }
55
+ }
56
+ ]
57
+ }
58
+
59
+ expect {
60
+ DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
61
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /missing 'op'/)
62
+ end
63
+ end
64
+
65
+ describe "deeply nested conditions" do
66
+ it "handles deeply nested all/any combinations" do
67
+ rules = {
68
+ version: "1.0",
69
+ ruleset: "test",
70
+ rules: [
71
+ {
72
+ id: "rule_1",
73
+ if: {
74
+ all: [
75
+ {
76
+ any: [
77
+ { field: "a", op: "eq", value: 1 },
78
+ {
79
+ all: [
80
+ { field: "b", op: "eq", value: 2 },
81
+ { field: "c", op: "eq", value: 3 }
82
+ ]
83
+ }
84
+ ]
85
+ },
86
+ { field: "d", op: "eq", value: 4 }
87
+ ]
88
+ },
89
+ then: { decision: "complex_match" }
90
+ }
91
+ ]
92
+ }
93
+
94
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
95
+
96
+ # Should match via first branch of 'any'
97
+ context1 = DecisionAgent::Context.new({ a: 1, d: 4 })
98
+ expect(evaluator.evaluate(context1)).not_to be_nil
99
+
100
+ # Should match via second branch of 'any'
101
+ context2 = DecisionAgent::Context.new({ b: 2, c: 3, d: 4 })
102
+ expect(evaluator.evaluate(context2)).not_to be_nil
103
+
104
+ # Should not match (missing 'd')
105
+ context3 = DecisionAgent::Context.new({ a: 1 })
106
+ expect(evaluator.evaluate(context3)).to be_nil
107
+ end
108
+
109
+ it "handles empty nested conditions gracefully" do
110
+ rules = {
111
+ version: "1.0",
112
+ ruleset: "test",
113
+ rules: [
114
+ {
115
+ id: "rule_1",
116
+ if: {
117
+ all: [
118
+ { field: "status", op: "eq", value: "active" },
119
+ { any: [] }
120
+ ]
121
+ },
122
+ then: { decision: "approve" }
123
+ }
124
+ ]
125
+ }
126
+
127
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
128
+ context = DecisionAgent::Context.new({ status: "active" })
129
+
130
+ # any: [] should return false, making the all condition false
131
+ evaluation = evaluator.evaluate(context)
132
+ expect(evaluation).to be_nil
133
+ end
134
+ end
135
+
136
+ describe "missing fields in nested structures" do
137
+ it "handles missing intermediate nested fields" do
138
+ rules = {
139
+ version: "1.0",
140
+ ruleset: "test",
141
+ rules: [
142
+ {
143
+ id: "rule_1",
144
+ if: { field: "user.profile.role", op: "eq", value: "admin" },
145
+ then: { decision: "allow" }
146
+ }
147
+ ]
148
+ }
149
+
150
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
151
+
152
+ # Missing profile
153
+ context1 = DecisionAgent::Context.new({ user: {} })
154
+ expect(evaluator.evaluate(context1)).to be_nil
155
+
156
+ # user is nil
157
+ context2 = DecisionAgent::Context.new({ user: nil })
158
+ expect(evaluator.evaluate(context2)).to be_nil
159
+
160
+ # profile is nil
161
+ context3 = DecisionAgent::Context.new({ user: { profile: nil } })
162
+ expect(evaluator.evaluate(context3)).to be_nil
163
+ end
164
+
165
+ it "handles array-like nested access attempts" do
166
+ rules = {
167
+ version: "1.0",
168
+ ruleset: "test",
169
+ rules: [
170
+ {
171
+ id: "rule_1",
172
+ if: { field: "items.0.name", op: "eq", value: "first" },
173
+ then: { decision: "match" }
174
+ }
175
+ ]
176
+ }
177
+
178
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
179
+
180
+ # This should gracefully fail since array access isn't supported in current implementation
181
+ context = DecisionAgent::Context.new({ items: [{ name: "first" }] })
182
+ evaluation = evaluator.evaluate(context)
183
+
184
+ # Current implementation doesn't support array indexing
185
+ expect(evaluation).to be_nil
186
+ end
187
+
188
+ it "returns nil for very deeply missing nested fields" do
189
+ rules = {
190
+ version: "1.0",
191
+ ruleset: "test",
192
+ rules: [
193
+ {
194
+ id: "rule_1",
195
+ if: { field: "a.b.c.d.e.f.g.h", op: "eq", value: "deep" },
196
+ then: { decision: "found" }
197
+ }
198
+ ]
199
+ }
200
+
201
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
202
+
203
+ # Missing at various levels
204
+ expect(evaluator.evaluate(DecisionAgent::Context.new({}))).to be_nil
205
+ expect(evaluator.evaluate(DecisionAgent::Context.new({ a: {} }))).to be_nil
206
+ expect(evaluator.evaluate(DecisionAgent::Context.new({ a: { b: { c: {} } } }))).to be_nil
207
+ end
208
+
209
+ it "handles partial path matches gracefully" do
210
+ rules = {
211
+ version: "1.0",
212
+ ruleset: "test",
213
+ rules: [
214
+ {
215
+ id: "rule_1",
216
+ if: { field: "user.settings.theme", op: "eq", value: "dark" },
217
+ then: { decision: "dark_mode" }
218
+ }
219
+ ]
220
+ }
221
+
222
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
223
+
224
+ # Path exists partially but value is wrong type
225
+ context = DecisionAgent::Context.new({ user: { settings: "not_a_hash" } })
226
+ expect(evaluator.evaluate(context)).to be_nil
227
+ end
228
+ end
229
+
230
+ describe "very deep nesting (5+ levels)" do
231
+ it "evaluates 5-level nested all/any combinations" do
232
+ rules = {
233
+ version: "1.0",
234
+ ruleset: "test",
235
+ rules: [
236
+ {
237
+ id: "rule_1",
238
+ if: {
239
+ all: [
240
+ {
241
+ any: [
242
+ {
243
+ all: [
244
+ {
245
+ any: [
246
+ {
247
+ all: [
248
+ { field: "a", op: "eq", value: 1 },
249
+ { field: "b", op: "eq", value: 2 }
250
+ ]
251
+ }
252
+ ]
253
+ }
254
+ ]
255
+ }
256
+ ]
257
+ }
258
+ ]
259
+ },
260
+ then: { decision: "very_nested_match" }
261
+ }
262
+ ]
263
+ }
264
+
265
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
266
+
267
+ # Should match
268
+ context1 = DecisionAgent::Context.new({ a: 1, b: 2 })
269
+ result = evaluator.evaluate(context1)
270
+ expect(result).not_to be_nil
271
+ expect(result.decision).to eq("very_nested_match")
272
+
273
+ # Should not match (missing b)
274
+ context2 = DecisionAgent::Context.new({ a: 1 })
275
+ expect(evaluator.evaluate(context2)).to be_nil
276
+ end
277
+
278
+ it "evaluates 7-level nested structures" do
279
+ rules = {
280
+ version: "1.0",
281
+ ruleset: "test",
282
+ rules: [
283
+ {
284
+ id: "rule_1",
285
+ if: {
286
+ all: [
287
+ {
288
+ any: [
289
+ {
290
+ all: [
291
+ {
292
+ any: [
293
+ {
294
+ all: [
295
+ {
296
+ any: [
297
+ {
298
+ all: [
299
+ { field: "x", op: "eq", value: true }
300
+ ]
301
+ }
302
+ ]
303
+ }
304
+ ]
305
+ }
306
+ ]
307
+ }
308
+ ]
309
+ }
310
+ ]
311
+ }
312
+ ]
313
+ },
314
+ then: { decision: "extremely_nested" }
315
+ }
316
+ ]
317
+ }
318
+
319
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
320
+
321
+ context = DecisionAgent::Context.new({ x: true })
322
+ result = evaluator.evaluate(context)
323
+ expect(result).not_to be_nil
324
+ expect(result.decision).to eq("extremely_nested")
325
+ end
326
+
327
+ it "handles mixed all/any at each level" do
328
+ rules = {
329
+ version: "1.0",
330
+ ruleset: "test",
331
+ rules: [
332
+ {
333
+ id: "rule_1",
334
+ if: {
335
+ all: [
336
+ { field: "level1", op: "eq", value: 1 },
337
+ {
338
+ any: [
339
+ { field: "level2a", op: "eq", value: 2 },
340
+ {
341
+ all: [
342
+ { field: "level3a", op: "eq", value: 3 },
343
+ {
344
+ any: [
345
+ { field: "level4a", op: "eq", value: 4 },
346
+ {
347
+ all: [
348
+ { field: "level5a", op: "eq", value: 5 },
349
+ { field: "level5b", op: "eq", value: 6 }
350
+ ]
351
+ }
352
+ ]
353
+ }
354
+ ]
355
+ }
356
+ ]
357
+ }
358
+ ]
359
+ },
360
+ then: { decision: "mixed_deep_match" }
361
+ }
362
+ ]
363
+ }
364
+
365
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
366
+
367
+ # Match via first branch (level2a)
368
+ context1 = DecisionAgent::Context.new({ level1: 1, level2a: 2 })
369
+ expect(evaluator.evaluate(context1)&.decision).to eq("mixed_deep_match")
370
+
371
+ # Match via nested path (level4a)
372
+ context2 = DecisionAgent::Context.new({ level1: 1, level3a: 3, level4a: 4 })
373
+ expect(evaluator.evaluate(context2)&.decision).to eq("mixed_deep_match")
374
+
375
+ # Match via deepest path
376
+ context3 = DecisionAgent::Context.new({ level1: 1, level3a: 3, level5a: 5, level5b: 6 })
377
+ expect(evaluator.evaluate(context3)&.decision).to eq("mixed_deep_match")
378
+
379
+ # No match (missing level1)
380
+ context4 = DecisionAgent::Context.new({ level2a: 2 })
381
+ expect(evaluator.evaluate(context4)).to be_nil
382
+ end
383
+ end
384
+
385
+ describe "large rule sets" do
386
+ it "evaluates 100 rules efficiently (first-match semantics)" do
387
+ rules_array = 100.times.map do |i|
388
+ {
389
+ id: "rule_#{i}",
390
+ if: { field: "number", op: "eq", value: i },
391
+ then: { decision: "matched_#{i}", weight: 0.5 + (i / 200.0) }
392
+ }
393
+ end
394
+
395
+ rules = {
396
+ version: "1.0",
397
+ ruleset: "large_set",
398
+ rules: rules_array
399
+ }
400
+
401
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
402
+
403
+ # Should match first rule
404
+ context1 = DecisionAgent::Context.new({ number: 0 })
405
+ result1 = evaluator.evaluate(context1)
406
+ expect(result1.decision).to eq("matched_0")
407
+ expect(result1.metadata[:rule_id]).to eq("rule_0")
408
+
409
+ # Should match middle rule
410
+ context2 = DecisionAgent::Context.new({ number: 50 })
411
+ result2 = evaluator.evaluate(context2)
412
+ expect(result2.decision).to eq("matched_50")
413
+
414
+ # Should match last rule
415
+ context3 = DecisionAgent::Context.new({ number: 99 })
416
+ result3 = evaluator.evaluate(context3)
417
+ expect(result3.decision).to eq("matched_99")
418
+
419
+ # Should not match any rule
420
+ context4 = DecisionAgent::Context.new({ number: 100 })
421
+ expect(evaluator.evaluate(context4)).to be_nil
422
+ end
423
+
424
+ it "handles 500 rules without stack overflow" do
425
+ rules_array = 500.times.map do |i|
426
+ {
427
+ id: "rule_#{i}",
428
+ if: {
429
+ all: [
430
+ { field: "category", op: "eq", value: "test" },
431
+ { field: "id", op: "eq", value: i }
432
+ ]
433
+ },
434
+ then: { decision: "rule_#{i}" }
435
+ }
436
+ end
437
+
438
+ rules = {
439
+ version: "1.0",
440
+ ruleset: "very_large_set",
441
+ rules: rules_array
442
+ }
443
+
444
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
445
+
446
+ # Should evaluate without error
447
+ context = DecisionAgent::Context.new({ category: "test", id: 250 })
448
+ result = evaluator.evaluate(context)
449
+ expect(result.decision).to eq("rule_250")
450
+ end
451
+ end
452
+
453
+ describe "unicode support" do
454
+ it "handles unicode field names" do
455
+ rules = {
456
+ version: "1.0",
457
+ ruleset: "test",
458
+ rules: [
459
+ {
460
+ id: "rule_1",
461
+ if: { field: "user.名前", op: "eq", value: "太郎" },
462
+ then: { decision: "japanese_match" }
463
+ }
464
+ ]
465
+ }
466
+
467
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
468
+
469
+ context = DecisionAgent::Context.new({ user: { "名前": "太郎" } })
470
+ result = evaluator.evaluate(context)
471
+ expect(result).not_to be_nil
472
+ expect(result.decision).to eq("japanese_match")
473
+ end
474
+
475
+ it "compares unicode values correctly" do
476
+ rules = {
477
+ version: "1.0",
478
+ ruleset: "test",
479
+ rules: [
480
+ {
481
+ id: "rule_1",
482
+ if: { field: "message", op: "eq", value: "Héllo Wörld 🌍" },
483
+ then: { decision: "unicode_match" }
484
+ }
485
+ ]
486
+ }
487
+
488
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
489
+
490
+ context = DecisionAgent::Context.new({ message: "Héllo Wörld 🌍" })
491
+ result = evaluator.evaluate(context)
492
+ expect(result).not_to be_nil
493
+ expect(result.decision).to eq("unicode_match")
494
+
495
+ # Should not match with different unicode
496
+ context2 = DecisionAgent::Context.new({ message: "Hello World 🌍" })
497
+ expect(evaluator.evaluate(context2)).to be_nil
498
+ end
499
+
500
+ it "handles emoji in decision values" do
501
+ rules = {
502
+ version: "1.0",
503
+ ruleset: "test",
504
+ rules: [
505
+ {
506
+ id: "rule_1",
507
+ if: { field: "status", op: "eq", value: "happy" },
508
+ then: { decision: "😊_approved", reason: "User is happy 🎉" }
509
+ }
510
+ ]
511
+ }
512
+
513
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
514
+
515
+ context = DecisionAgent::Context.new({ status: "happy" })
516
+ result = evaluator.evaluate(context)
517
+ expect(result.decision).to eq("😊_approved")
518
+ expect(result.reason).to eq("User is happy 🎉")
519
+ end
520
+
521
+ it "handles mixed unicode in nested field paths" do
522
+ rules = {
523
+ version: "1.0",
524
+ ruleset: "test",
525
+ rules: [
526
+ {
527
+ id: "rule_1",
528
+ if: { field: "用户.配置.语言", op: "eq", value: "中文" },
529
+ then: { decision: "chinese_locale" }
530
+ }
531
+ ]
532
+ }
533
+
534
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
535
+
536
+ context = DecisionAgent::Context.new({
537
+ "用户": {
538
+ "配置": {
539
+ "语言": "中文"
540
+ }
541
+ }
542
+ })
543
+ result = evaluator.evaluate(context)
544
+ expect(result).not_to be_nil
545
+ expect(result.decision).to eq("chinese_locale")
546
+ end
547
+ end
548
+
549
+ describe "malformed dot notation edge cases" do
550
+ it "rejects leading dots in field paths" do
551
+ rules = {
552
+ version: "1.0",
553
+ ruleset: "test",
554
+ rules: [
555
+ {
556
+ id: "rule_1",
557
+ if: { field: ".field", op: "eq", value: "test" },
558
+ then: { decision: "match" }
559
+ }
560
+ ]
561
+ }
562
+
563
+ # Validator catches empty segments and raises error
564
+ expect {
565
+ DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
566
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /empty segments/)
567
+ end
568
+
569
+ it "handles trailing dots in field paths" do
570
+ rules = {
571
+ version: "1.0",
572
+ ruleset: "test",
573
+ rules: [
574
+ {
575
+ id: "rule_1",
576
+ if: { field: "field.nested.", op: "eq", value: "test" },
577
+ then: { decision: "match" }
578
+ }
579
+ ]
580
+ }
581
+
582
+ # Trailing dots might be accepted but won't match in practice
583
+ # Or they might be rejected - test actual behavior
584
+ evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
585
+ context = DecisionAgent::Context.new({ field: { nested: { "": "test" } } })
586
+
587
+ # Evaluation behavior depends on implementation
588
+ # Just verify it doesn't crash
589
+ result = evaluator.evaluate(context)
590
+ # Result may be nil or match depending on how empty string keys are handled
591
+ expect(result).to be_a(DecisionAgent::Evaluation).or be_nil
592
+ end
593
+
594
+ it "rejects consecutive dots in field paths" do
595
+ rules = {
596
+ version: "1.0",
597
+ ruleset: "test",
598
+ rules: [
599
+ {
600
+ id: "rule_1",
601
+ if: { field: "field..nested", op: "eq", value: "test" },
602
+ then: { decision: "match" }
603
+ }
604
+ ]
605
+ }
606
+
607
+ # Validator catches empty segments and raises error
608
+ expect {
609
+ DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
610
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /empty segments/)
611
+ end
612
+
613
+ it "rejects multiple consecutive dots in field paths" do
614
+ rules = {
615
+ version: "1.0",
616
+ ruleset: "test",
617
+ rules: [
618
+ {
619
+ id: "rule_1",
620
+ if: { field: "a..b..c", op: "eq", value: "test" },
621
+ then: { decision: "match" }
622
+ }
623
+ ]
624
+ }
625
+
626
+ # Validator catches empty segments and raises error
627
+ expect {
628
+ DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
629
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /empty segments/)
630
+ end
631
+ end
632
+ end
633
+
634
+ # ============================================================
635
+ # Evaluator Behavior Edge Cases
636
+ # ============================================================
637
+
638
+ describe "evaluator behavior edge cases" do
639
+ describe "nil returns and empty evaluations" do
640
+ it "handles all evaluators returning nil" do
641
+ nil_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
642
+ def evaluate(context, feedback: {})
643
+ nil
644
+ end
645
+ end
646
+
647
+ agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator.new])
648
+
649
+ expect {
650
+ agent.decide(context: {})
651
+ }.to raise_error(DecisionAgent::NoEvaluationsError)
652
+ end
653
+
654
+ it "handles mix of nil and valid evaluations" do
655
+ nil_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
656
+ def evaluate(context, feedback: {})
657
+ nil
658
+ end
659
+ end
660
+
661
+ valid_evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
662
+ decision: "approve",
663
+ weight: 0.8
664
+ )
665
+
666
+ agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator.new, valid_evaluator])
667
+
668
+ result = agent.decide(context: {})
669
+
670
+ expect(result.decision).to eq("approve")
671
+ expect(result.evaluations.size).to eq(1)
672
+ end
673
+ end
674
+
675
+ describe "zero weight handling" do
676
+ it "handles evaluator with zero weight" do
677
+ zero_weight_evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
678
+ decision: "approve",
679
+ weight: 0.0,
680
+ reason: "Zero weight"
681
+ )
682
+
683
+ agent = DecisionAgent::Agent.new(evaluators: [zero_weight_evaluator])
684
+
685
+ result = agent.decide(context: {})
686
+
687
+ expect(result.decision).to eq("approve")
688
+ expect(result.evaluations.first.weight).to eq(0.0)
689
+ end
690
+
691
+ it "handles mix of zero and non-zero weights" do
692
+ zero_weight = DecisionAgent::Evaluators::StaticEvaluator.new(
693
+ decision: "reject",
694
+ weight: 0.0,
695
+ name: "ZeroWeight"
696
+ )
697
+
698
+ normal_weight = DecisionAgent::Evaluators::StaticEvaluator.new(
699
+ decision: "approve",
700
+ weight: 0.8,
701
+ name: "NormalWeight"
702
+ )
703
+
704
+ agent = DecisionAgent::Agent.new(
705
+ evaluators: [zero_weight, normal_weight],
706
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
707
+ )
708
+
709
+ result = agent.decide(context: {})
710
+
711
+ # With weighted average, the non-zero weight should dominate
712
+ expect(result.decision).to eq("approve")
713
+ end
714
+
715
+ it "handles all evaluators with zero weight" do
716
+ zero_weight1 = DecisionAgent::Evaluators::StaticEvaluator.new(
717
+ decision: "approve",
718
+ weight: 0.0
719
+ )
720
+
721
+ zero_weight2 = DecisionAgent::Evaluators::StaticEvaluator.new(
722
+ decision: "reject",
723
+ weight: 0.0
724
+ )
725
+
726
+ agent = DecisionAgent::Agent.new(evaluators: [zero_weight1, zero_weight2])
727
+
728
+ result = agent.decide(context: {})
729
+
730
+ # Should still make a decision even with all zero weights
731
+ expect(result.decision).to be_a(String)
732
+ expect(result.confidence).to be_between(0.0, 1.0)
733
+ end
734
+ end
735
+ end
736
+
737
+ # ============================================================
738
+ # Conflict Resolution Edge Cases
739
+ # ============================================================
740
+
741
+ describe "conflict resolution edge cases" do
742
+ describe "equal weights" do
743
+ it "handles equal weights in WeightedAverage" do
744
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
745
+ decision: "approve",
746
+ weight: 0.5,
747
+ name: "Eval1"
748
+ )
749
+
750
+ eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
751
+ decision: "reject",
752
+ weight: 0.5,
753
+ name: "Eval2"
754
+ )
755
+
756
+ agent = DecisionAgent::Agent.new(
757
+ evaluators: [eval1, eval2],
758
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
759
+ )
760
+
761
+ result = agent.decide(context: {})
762
+
763
+ # Should choose one decision
764
+ expect(["approve", "reject"]).to include(result.decision)
765
+ # Confidence should reflect the tie
766
+ expect(result.confidence).to eq(0.5)
767
+ end
768
+
769
+ it "handles equal weights in MaxWeight" do
770
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
771
+ decision: "option_a",
772
+ weight: 0.7,
773
+ name: "Eval1"
774
+ )
775
+
776
+ eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
777
+ decision: "option_b",
778
+ weight: 0.7,
779
+ name: "Eval2"
780
+ )
781
+
782
+ eval3 = DecisionAgent::Evaluators::StaticEvaluator.new(
783
+ decision: "option_c",
784
+ weight: 0.7,
785
+ name: "Eval3"
786
+ )
787
+
788
+ agent = DecisionAgent::Agent.new(
789
+ evaluators: [eval1, eval2, eval3],
790
+ scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
791
+ )
792
+
793
+ result = agent.decide(context: {})
794
+
795
+ # Should choose one of the options
796
+ expect(["option_a", "option_b", "option_c"]).to include(result.decision)
797
+ expect(result.confidence).to eq(0.7)
798
+ end
799
+
800
+ it "handles equal weights in Consensus" do
801
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
802
+ decision: "approve",
803
+ weight: 0.6,
804
+ name: "Eval1"
805
+ )
806
+
807
+ eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
808
+ decision: "reject",
809
+ weight: 0.6,
810
+ name: "Eval2"
811
+ )
812
+
813
+ agent = DecisionAgent::Agent.new(
814
+ evaluators: [eval1, eval2],
815
+ scoring_strategy: DecisionAgent::Scoring::Consensus.new
816
+ )
817
+
818
+ result = agent.decide(context: {})
819
+
820
+ # Should choose one decision, likely with reduced confidence
821
+ expect(["approve", "reject"]).to include(result.decision)
822
+ end
823
+ end
824
+
825
+ describe "mixed decisions" do
826
+ it "handles three-way split in decisions" do
827
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
828
+ decision: "approve",
829
+ weight: 0.5,
830
+ name: "Eval1"
831
+ )
832
+
833
+ eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
834
+ decision: "reject",
835
+ weight: 0.5,
836
+ name: "Eval2"
837
+ )
838
+
839
+ eval3 = DecisionAgent::Evaluators::StaticEvaluator.new(
840
+ decision: "manual_review",
841
+ weight: 0.5,
842
+ name: "Eval3"
843
+ )
844
+
845
+ agent = DecisionAgent::Agent.new(
846
+ evaluators: [eval1, eval2, eval3],
847
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
848
+ )
849
+
850
+ result = agent.decide(context: {})
851
+
852
+ expect(["approve", "reject", "manual_review"]).to include(result.decision)
853
+ end
854
+
855
+ it "handles many evaluators with diverse decisions" do
856
+ evaluators = 10.times.map do |i|
857
+ DecisionAgent::Evaluators::StaticEvaluator.new(
858
+ decision: "decision_#{i % 5}", # 5 different decisions
859
+ weight: 0.1 * (i + 1), # Varying weights
860
+ name: "Eval#{i}"
861
+ )
862
+ end
863
+
864
+ agent = DecisionAgent::Agent.new(
865
+ evaluators: evaluators,
866
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
867
+ )
868
+
869
+ result = agent.decide(context: {})
870
+
871
+ expect(result.decision).to be_a(String)
872
+ expect(result.confidence).to be_between(0.0, 1.0)
873
+ expect(result.evaluations.size).to eq(10)
874
+ end
875
+
876
+ it "handles 50 evaluators with diverse decisions" do
877
+ evaluators = 50.times.map do |i|
878
+ DecisionAgent::Evaluators::StaticEvaluator.new(
879
+ decision: "decision_#{i % 10}", # 10 different decisions
880
+ weight: 0.02 * (i + 1), # Varying weights 0.02 to 1.0
881
+ name: "Eval#{i}"
882
+ )
883
+ end
884
+
885
+ agent = DecisionAgent::Agent.new(
886
+ evaluators: evaluators,
887
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
888
+ )
889
+
890
+ result = agent.decide(context: {})
891
+
892
+ expect(result.decision).to be_a(String)
893
+ expect(result.decision).to match(/decision_\d/)
894
+ expect(result.confidence).to be_between(0.0, 1.0)
895
+ expect(result.evaluations.size).to eq(50)
896
+ end
897
+
898
+ it "handles all evaluators with same decision but different weights" do
899
+ evaluators = 20.times.map do |i|
900
+ DecisionAgent::Evaluators::StaticEvaluator.new(
901
+ decision: "unanimous",
902
+ weight: 0.05 * (i + 1), # Weights from 0.05 to 1.0
903
+ name: "Eval#{i}"
904
+ )
905
+ end
906
+
907
+ agent = DecisionAgent::Agent.new(
908
+ evaluators: evaluators,
909
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
910
+ )
911
+
912
+ result = agent.decide(context: {})
913
+
914
+ expect(result.decision).to eq("unanimous")
915
+ # All weights point to same decision, confidence should be high
916
+ expect(result.confidence).to eq(1.0)
917
+ expect(result.evaluations.size).to eq(20)
918
+ end
919
+ end
920
+ end
921
+
922
+ # ============================================================
923
+ # Scoring Strategy Edge Cases
924
+ # ============================================================
925
+
926
+ describe "scoring strategy edge cases" do
927
+ describe "MaxWeight edge cases" do
928
+ it "handles single evaluation" do
929
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
930
+ decision: "approve",
931
+ weight: 0.6
932
+ )
933
+
934
+ agent = DecisionAgent::Agent.new(
935
+ evaluators: [eval1],
936
+ scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
937
+ )
938
+
939
+ result = agent.decide(context: {})
940
+
941
+ expect(result.decision).to eq("approve")
942
+ expect(result.confidence).to eq(0.6)
943
+ end
944
+
945
+ it "ignores lower weights completely" do
946
+ high = DecisionAgent::Evaluators::StaticEvaluator.new(
947
+ decision: "approve",
948
+ weight: 0.9,
949
+ name: "High"
950
+ )
951
+
952
+ low1 = DecisionAgent::Evaluators::StaticEvaluator.new(
953
+ decision: "reject",
954
+ weight: 0.2,
955
+ name: "Low1"
956
+ )
957
+
958
+ low2 = DecisionAgent::Evaluators::StaticEvaluator.new(
959
+ decision: "reject",
960
+ weight: 0.3,
961
+ name: "Low2"
962
+ )
963
+
964
+ agent = DecisionAgent::Agent.new(
965
+ evaluators: [low1, high, low2],
966
+ scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
967
+ )
968
+
969
+ result = agent.decide(context: {})
970
+
971
+ expect(result.decision).to eq("approve")
972
+ expect(result.confidence).to eq(0.9)
973
+ end
974
+ end
975
+
976
+ describe "Consensus edge cases" do
977
+ it "reduces confidence when no clear consensus" do
978
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
979
+ decision: "option_a",
980
+ weight: 0.4,
981
+ name: "Eval1"
982
+ )
983
+
984
+ eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
985
+ decision: "option_b",
986
+ weight: 0.4,
987
+ name: "Eval2"
988
+ )
989
+
990
+ eval3 = DecisionAgent::Evaluators::StaticEvaluator.new(
991
+ decision: "option_c",
992
+ weight: 0.4,
993
+ name: "Eval3"
994
+ )
995
+
996
+ agent = DecisionAgent::Agent.new(
997
+ evaluators: [eval1, eval2, eval3],
998
+ scoring_strategy: DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.5)
999
+ )
1000
+
1001
+ result = agent.decide(context: {})
1002
+
1003
+ # No option has 50% agreement, confidence should be reduced
1004
+ expect(result.confidence).to be < 0.5
1005
+ end
1006
+
1007
+ it "handles unanimous decision" do
1008
+ evaluators = 5.times.map do |i|
1009
+ DecisionAgent::Evaluators::StaticEvaluator.new(
1010
+ decision: "approve",
1011
+ weight: 0.6 + (i * 0.05),
1012
+ name: "Eval#{i}"
1013
+ )
1014
+ end
1015
+
1016
+ agent = DecisionAgent::Agent.new(
1017
+ evaluators: evaluators,
1018
+ scoring_strategy: DecisionAgent::Scoring::Consensus.new
1019
+ )
1020
+
1021
+ result = agent.decide(context: {})
1022
+
1023
+ expect(result.decision).to eq("approve")
1024
+ # Should have high confidence due to unanimous agreement
1025
+ expect(result.confidence).to be > 0.5
1026
+ end
1027
+
1028
+ it "handles varying minimum agreement thresholds" do
1029
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
1030
+ decision: "approve",
1031
+ weight: 0.8,
1032
+ name: "Eval1"
1033
+ )
1034
+
1035
+ eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
1036
+ decision: "approve",
1037
+ weight: 0.7,
1038
+ name: "Eval2"
1039
+ )
1040
+
1041
+ eval3 = DecisionAgent::Evaluators::StaticEvaluator.new(
1042
+ decision: "reject",
1043
+ weight: 0.6,
1044
+ name: "Eval3"
1045
+ )
1046
+
1047
+ # Test with low threshold
1048
+ agent_low = DecisionAgent::Agent.new(
1049
+ evaluators: [eval1, eval2, eval3],
1050
+ scoring_strategy: DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.3)
1051
+ )
1052
+
1053
+ result_low = agent_low.decide(context: {})
1054
+ expect(result_low.decision).to eq("approve")
1055
+
1056
+ # Test with high threshold
1057
+ agent_high = DecisionAgent::Agent.new(
1058
+ evaluators: [eval1, eval2, eval3],
1059
+ scoring_strategy: DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.9)
1060
+ )
1061
+
1062
+ result_high = agent_high.decide(context: {})
1063
+ # Should still choose approve but with lower confidence
1064
+ expect(result_high.decision).to eq("approve")
1065
+ end
1066
+ end
1067
+
1068
+ describe "Threshold edge cases" do
1069
+ it "returns fallback when all evaluations below threshold" do
1070
+ low_weight = DecisionAgent::Evaluators::StaticEvaluator.new(
1071
+ decision: "approve",
1072
+ weight: 0.5
1073
+ )
1074
+
1075
+ agent = DecisionAgent::Agent.new(
1076
+ evaluators: [low_weight],
1077
+ scoring_strategy: DecisionAgent::Scoring::Threshold.new(
1078
+ threshold: 0.8,
1079
+ fallback_decision: "needs_review"
1080
+ )
1081
+ )
1082
+
1083
+ result = agent.decide(context: {})
1084
+
1085
+ expect(result.decision).to eq("needs_review")
1086
+ expect(result.confidence).to be < 0.8
1087
+ end
1088
+
1089
+ it "returns decision when exactly at threshold" do
1090
+ exact_weight = DecisionAgent::Evaluators::StaticEvaluator.new(
1091
+ decision: "approve",
1092
+ weight: 0.75
1093
+ )
1094
+
1095
+ agent = DecisionAgent::Agent.new(
1096
+ evaluators: [exact_weight],
1097
+ scoring_strategy: DecisionAgent::Scoring::Threshold.new(threshold: 0.75)
1098
+ )
1099
+
1100
+ result = agent.decide(context: {})
1101
+
1102
+ expect(result.decision).to eq("approve")
1103
+ expect(result.confidence).to eq(0.75)
1104
+ end
1105
+
1106
+ it "handles threshold with conflicting evaluations" do
1107
+ high = DecisionAgent::Evaluators::StaticEvaluator.new(
1108
+ decision: "approve",
1109
+ weight: 0.9,
1110
+ name: "High"
1111
+ )
1112
+
1113
+ medium = DecisionAgent::Evaluators::StaticEvaluator.new(
1114
+ decision: "approve",
1115
+ weight: 0.7,
1116
+ name: "Medium"
1117
+ )
1118
+
1119
+ low = DecisionAgent::Evaluators::StaticEvaluator.new(
1120
+ decision: "reject",
1121
+ weight: 0.4,
1122
+ name: "Low"
1123
+ )
1124
+
1125
+ agent = DecisionAgent::Agent.new(
1126
+ evaluators: [high, medium, low],
1127
+ scoring_strategy: DecisionAgent::Scoring::Threshold.new(threshold: 0.75)
1128
+ )
1129
+
1130
+ result = agent.decide(context: {})
1131
+
1132
+ # Average of approve votes is 0.8, which exceeds threshold
1133
+ expect(result.decision).to eq("approve")
1134
+ end
1135
+
1136
+ it "handles very high threshold" do
1137
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
1138
+ decision: "approve",
1139
+ weight: 0.99
1140
+ )
1141
+
1142
+ agent = DecisionAgent::Agent.new(
1143
+ evaluators: [evaluator],
1144
+ scoring_strategy: DecisionAgent::Scoring::Threshold.new(
1145
+ threshold: 0.999,
1146
+ fallback_decision: "uncertain"
1147
+ )
1148
+ )
1149
+
1150
+ result = agent.decide(context: {})
1151
+
1152
+ expect(result.decision).to eq("uncertain")
1153
+ end
1154
+ end
1155
+
1156
+ describe "stress tests with large evaluator counts" do
1157
+ it "handles 100 evaluators efficiently with WeightedAverage" do
1158
+ evaluators = 100.times.map do |i|
1159
+ DecisionAgent::Evaluators::StaticEvaluator.new(
1160
+ decision: "decision_#{i % 10}",
1161
+ weight: (i + 1) / 200.0, # Weights from 0.005 to 0.505
1162
+ name: "Eval#{i}"
1163
+ )
1164
+ end
1165
+
1166
+ agent = DecisionAgent::Agent.new(
1167
+ evaluators: evaluators,
1168
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
1169
+ )
1170
+
1171
+ result = agent.decide(context: {})
1172
+
1173
+ expect(result.decision).to be_a(String)
1174
+ expect(result.confidence).to be_between(0.0, 1.0)
1175
+ expect(result.evaluations.size).to eq(100)
1176
+ end
1177
+
1178
+ it "handles 100 evaluators with all same decision" do
1179
+ evaluators = 100.times.map do |i|
1180
+ DecisionAgent::Evaluators::StaticEvaluator.new(
1181
+ decision: "consensus",
1182
+ weight: 0.5 + (i / 200.0), # Weights from 0.5 to 0.995
1183
+ name: "Eval#{i}"
1184
+ )
1185
+ end
1186
+
1187
+ agent = DecisionAgent::Agent.new(
1188
+ evaluators: evaluators,
1189
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
1190
+ )
1191
+
1192
+ result = agent.decide(context: {})
1193
+
1194
+ expect(result.decision).to eq("consensus")
1195
+ expect(result.confidence).to eq(1.0)
1196
+ end
1197
+
1198
+ it "handles 100 evaluators with MaxWeight strategy" do
1199
+ evaluators = 100.times.map do |i|
1200
+ DecisionAgent::Evaluators::StaticEvaluator.new(
1201
+ decision: "decision_#{i}",
1202
+ weight: i / 100.0, # Weights from 0.0 to 0.99
1203
+ name: "Eval#{i}"
1204
+ )
1205
+ end
1206
+
1207
+ agent = DecisionAgent::Agent.new(
1208
+ evaluators: evaluators,
1209
+ scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
1210
+ )
1211
+
1212
+ result = agent.decide(context: {})
1213
+
1214
+ # Should pick the last one with highest weight (0.99)
1215
+ expect(result.decision).to eq("decision_99")
1216
+ expect(result.confidence).to be_within(0.001).of(0.99)
1217
+ end
1218
+
1219
+ it "handles 100 evaluators with Consensus strategy" do
1220
+ # Create 60 "approve" votes and 40 "reject" votes
1221
+ evaluators = []
1222
+ 60.times do |i|
1223
+ evaluators << DecisionAgent::Evaluators::StaticEvaluator.new(
1224
+ decision: "approve",
1225
+ weight: 0.6,
1226
+ name: "ApproveEval#{i}"
1227
+ )
1228
+ end
1229
+ 40.times do |i|
1230
+ evaluators << DecisionAgent::Evaluators::StaticEvaluator.new(
1231
+ decision: "reject",
1232
+ weight: 0.7,
1233
+ name: "RejectEval#{i}"
1234
+ )
1235
+ end
1236
+
1237
+ agent = DecisionAgent::Agent.new(
1238
+ evaluators: evaluators,
1239
+ scoring_strategy: DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.5)
1240
+ )
1241
+
1242
+ result = agent.decide(context: {})
1243
+
1244
+ # Approve has 60% agreement, should win
1245
+ expect(result.decision).to eq("approve")
1246
+ expect(result.evaluations.size).to eq(100)
1247
+ end
1248
+ end
1249
+
1250
+ describe "floating point precision edge cases" do
1251
+ it "handles repeating decimals (0.333333...)" do
1252
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
1253
+ decision: "approve",
1254
+ weight: 1.0 / 3.0, # 0.333333...
1255
+ name: "Eval1"
1256
+ )
1257
+
1258
+ eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
1259
+ decision: "approve",
1260
+ weight: 1.0 / 3.0,
1261
+ name: "Eval2"
1262
+ )
1263
+
1264
+ eval3 = DecisionAgent::Evaluators::StaticEvaluator.new(
1265
+ decision: "approve",
1266
+ weight: 1.0 / 3.0,
1267
+ name: "Eval3"
1268
+ )
1269
+
1270
+ agent = DecisionAgent::Agent.new(
1271
+ evaluators: [eval1, eval2, eval3],
1272
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
1273
+ )
1274
+
1275
+ result = agent.decide(context: {})
1276
+
1277
+ expect(result.decision).to eq("approve")
1278
+ # Sum should be very close to 1.0
1279
+ expect(result.confidence).to be_within(0.0001).of(1.0)
1280
+ end
1281
+
1282
+ it "normalizes confidence to 4 decimal places" do
1283
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
1284
+ decision: "approve",
1285
+ weight: 0.123456789, # Many decimal places
1286
+ name: "Eval1"
1287
+ )
1288
+
1289
+ agent = DecisionAgent::Agent.new(
1290
+ evaluators: [eval1],
1291
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
1292
+ )
1293
+
1294
+ result = agent.decide(context: {})
1295
+
1296
+ # Confidence should be rounded to 4 decimal places
1297
+ expect(result.confidence.to_s.split('.').last.length).to be <= 4
1298
+ end
1299
+
1300
+ it "handles very small weights (0.0001)" do
1301
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
1302
+ decision: "approve",
1303
+ weight: 0.0001,
1304
+ name: "Eval1"
1305
+ )
1306
+
1307
+ eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
1308
+ decision: "reject",
1309
+ weight: 0.0001,
1310
+ name: "Eval2"
1311
+ )
1312
+
1313
+ agent = DecisionAgent::Agent.new(
1314
+ evaluators: [eval1, eval2],
1315
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
1316
+ )
1317
+
1318
+ result = agent.decide(context: {})
1319
+
1320
+ # Should handle small weights without precision errors
1321
+ expect(result.decision).to be_a(String)
1322
+ expect(result.confidence).to be_between(0.0, 1.0)
1323
+ end
1324
+
1325
+ it "handles weights that sum to slightly above 1.0 due to precision" do
1326
+ eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
1327
+ decision: "approve",
1328
+ weight: 0.7,
1329
+ name: "Eval1"
1330
+ )
1331
+
1332
+ eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
1333
+ decision: "approve",
1334
+ weight: 0.3 + 0.0000001, # Slightly above to create >1.0 sum
1335
+ name: "Eval2"
1336
+ )
1337
+
1338
+ agent = DecisionAgent::Agent.new(
1339
+ evaluators: [eval1, eval2],
1340
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
1341
+ )
1342
+
1343
+ result = agent.decide(context: {})
1344
+
1345
+ # Should normalize to 1.0 or below
1346
+ expect(result.confidence).to be <= 1.0
1347
+ end
1348
+
1349
+ it "handles Consensus with floating point agreement rates" do
1350
+ # Create evaluators where agreement is not a clean fraction
1351
+ evaluators = 7.times.map do |i|
1352
+ DecisionAgent::Evaluators::StaticEvaluator.new(
1353
+ decision: i < 4 ? "approve" : "reject", # 4/7 = 0.571428...
1354
+ weight: 0.6,
1355
+ name: "Eval#{i}"
1356
+ )
1357
+ end
1358
+
1359
+ agent = DecisionAgent::Agent.new(
1360
+ evaluators: evaluators,
1361
+ scoring_strategy: DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.57)
1362
+ )
1363
+
1364
+ result = agent.decide(context: {})
1365
+
1366
+ # Should handle fractional agreement correctly
1367
+ expect(result.decision).to eq("approve")
1368
+ expect(result.confidence).to be_a(Float)
1369
+ end
1370
+ end
1371
+ end
1372
+
1373
+ # ============================================================
1374
+ # Decision Replay Edge Cases
1375
+ # ============================================================
1376
+
1377
+ describe "decision replay edge cases" do
1378
+ describe "strict mode behavior" do
1379
+ it "detects confidence differences above tolerance in strict mode" do
1380
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
1381
+ decision: "approve",
1382
+ weight: 0.8
1383
+ )
1384
+
1385
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
1386
+
1387
+ context = { user: "alice" }
1388
+ original_result = agent.decide(context: context)
1389
+
1390
+ modified_payload = original_result.audit_payload.dup
1391
+ # Tolerance is 0.0001, so this should trigger an error
1392
+ modified_payload[:confidence] = original_result.confidence + 0.001
1393
+
1394
+ expect {
1395
+ DecisionAgent::Replay.run(modified_payload, strict: true)
1396
+ }.to raise_error(DecisionAgent::ReplayMismatchError)
1397
+ end
1398
+
1399
+ it "passes when confidence is identical in strict mode" do
1400
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
1401
+ decision: "approve",
1402
+ weight: 0.8
1403
+ )
1404
+
1405
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
1406
+
1407
+ context = { user: "alice" }
1408
+ original_result = agent.decide(context: context)
1409
+
1410
+ expect {
1411
+ DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
1412
+ }.not_to raise_error
1413
+ end
1414
+
1415
+ it "allows confidence within tolerance in strict mode" do
1416
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
1417
+ decision: "approve",
1418
+ weight: 0.8
1419
+ )
1420
+
1421
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
1422
+
1423
+ context = { user: "alice" }
1424
+ original_result = agent.decide(context: context)
1425
+
1426
+ modified_payload = original_result.audit_payload.dup
1427
+ # Tolerance is 0.0001, so this should NOT trigger an error
1428
+ modified_payload[:confidence] = original_result.confidence + 0.00005
1429
+
1430
+ expect {
1431
+ DecisionAgent::Replay.run(modified_payload, strict: true)
1432
+ }.not_to raise_error
1433
+ end
1434
+ end
1435
+
1436
+ describe "non-strict mode behavior" do
1437
+ it "logs but doesn't raise on decision mismatch" do
1438
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
1439
+ decision: "approve",
1440
+ weight: 0.8
1441
+ )
1442
+
1443
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
1444
+
1445
+ context = { user: "alice" }
1446
+ original_result = agent.decide(context: context)
1447
+
1448
+ modified_payload = original_result.audit_payload.dup
1449
+ modified_payload[:decision] = "reject"
1450
+
1451
+ result = nil
1452
+ expect {
1453
+ result = DecisionAgent::Replay.run(modified_payload, strict: false)
1454
+ }.to output(/Decision changed/).to_stderr
1455
+
1456
+ expect(result).not_to be_nil
1457
+ end
1458
+
1459
+ it "logs but doesn't raise on confidence mismatch" do
1460
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
1461
+ decision: "approve",
1462
+ weight: 0.8
1463
+ )
1464
+
1465
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
1466
+
1467
+ context = { user: "alice" }
1468
+ original_result = agent.decide(context: context)
1469
+
1470
+ modified_payload = original_result.audit_payload.dup
1471
+ modified_payload[:confidence] = 0.5
1472
+
1473
+ result = nil
1474
+ expect {
1475
+ result = DecisionAgent::Replay.run(modified_payload, strict: false)
1476
+ }.to output(/Confidence changed/).to_stderr
1477
+
1478
+ expect(result).not_to be_nil
1479
+ end
1480
+
1481
+ it "continues with multiple mismatches in non-strict mode" do
1482
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
1483
+ decision: "approve",
1484
+ weight: 0.8
1485
+ )
1486
+
1487
+ agent = DecisionAgent::Agent.new(evaluators: [evaluator])
1488
+
1489
+ context = { user: "alice" }
1490
+ original_result = agent.decide(context: context)
1491
+
1492
+ modified_payload = original_result.audit_payload.dup
1493
+ modified_payload[:decision] = "reject"
1494
+ modified_payload[:confidence] = 0.3
1495
+
1496
+ result = nil
1497
+ expect {
1498
+ result = DecisionAgent::Replay.run(modified_payload, strict: false)
1499
+ }.to output(/Decision changed.*Confidence changed/m).to_stderr
1500
+
1501
+ expect(result.decision).to eq("approve") # Should use replayed value
1502
+ end
1503
+ end
1504
+
1505
+ describe "replay validation errors" do
1506
+ it "raises error when context is missing" do
1507
+ invalid_payload = {
1508
+ decision: "approve",
1509
+ confidence: 0.8,
1510
+ evaluations: []
1511
+ }
1512
+
1513
+ expect {
1514
+ DecisionAgent::Replay.run(invalid_payload, strict: true)
1515
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
1516
+ end
1517
+
1518
+ it "raises error when decision is missing" do
1519
+ invalid_payload = {
1520
+ context: {},
1521
+ confidence: 0.8,
1522
+ evaluations: []
1523
+ }
1524
+
1525
+ expect {
1526
+ DecisionAgent::Replay.run(invalid_payload, strict: true)
1527
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
1528
+ end
1529
+
1530
+ it "raises error when confidence is missing" do
1531
+ invalid_payload = {
1532
+ context: {},
1533
+ decision: "approve",
1534
+ evaluations: []
1535
+ }
1536
+
1537
+ expect {
1538
+ DecisionAgent::Replay.run(invalid_payload, strict: true)
1539
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
1540
+ end
1541
+
1542
+ it "raises error when evaluations is missing" do
1543
+ invalid_payload = {
1544
+ context: {},
1545
+ decision: "approve",
1546
+ confidence: 0.8
1547
+ }
1548
+
1549
+ expect {
1550
+ DecisionAgent::Replay.run(invalid_payload, strict: true)
1551
+ }.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
1552
+ end
1553
+
1554
+ it "handles empty audit payload" do
1555
+ expect {
1556
+ DecisionAgent::Replay.run({}, strict: true)
1557
+ }.to raise_error(DecisionAgent::InvalidRuleDslError)
1558
+ end
1559
+ end
1560
+
1561
+ describe "replay with different scoring strategies" do
1562
+ it "correctly replays with WeightedAverage strategy" do
1563
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
1564
+ decision: "approve",
1565
+ weight: 0.8
1566
+ )
1567
+
1568
+ agent = DecisionAgent::Agent.new(
1569
+ evaluators: [evaluator],
1570
+ scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
1571
+ )
1572
+
1573
+ original_result = agent.decide(context: { test: true })
1574
+
1575
+ replayed_result = DecisionAgent::Replay.run(
1576
+ original_result.audit_payload,
1577
+ strict: true
1578
+ )
1579
+
1580
+ expect(replayed_result.decision).to eq(original_result.decision)
1581
+ expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
1582
+ end
1583
+
1584
+ it "correctly replays with Threshold strategy" do
1585
+ evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
1586
+ decision: "approve",
1587
+ weight: 0.9
1588
+ )
1589
+
1590
+ agent = DecisionAgent::Agent.new(
1591
+ evaluators: [evaluator],
1592
+ scoring_strategy: DecisionAgent::Scoring::Threshold.new(
1593
+ threshold: 0.8,
1594
+ fallback_decision: "review"
1595
+ )
1596
+ )
1597
+
1598
+ original_result = agent.decide(context: { test: true })
1599
+
1600
+ replayed_result = DecisionAgent::Replay.run(
1601
+ original_result.audit_payload,
1602
+ strict: true
1603
+ )
1604
+
1605
+ expect(replayed_result.decision).to eq(original_result.decision)
1606
+ expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
1607
+ end
1608
+ end
1609
+ end
1610
+
1611
+ # ============================================================
1612
+ # Error Handling Edge Cases
1613
+ # ============================================================
1614
+
1615
+ describe "error handling edge cases" do
1616
+ describe "invalid JSON rule formats" do
1617
+ it "raises error for non-hash JSON" do
1618
+ expect {
1619
+ DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: [1, 2, 3])
1620
+ }.to raise_error(DecisionAgent::InvalidRuleDslError)
1621
+ end
1622
+
1623
+ it "raises error for string input instead of hash" do
1624
+ expect {
1625
+ DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: "not a hash")
1626
+ }.to raise_error(DecisionAgent::InvalidRuleDslError)
1627
+ end
1628
+
1629
+ it "raises error when rules is not an array" do
1630
+ rules = {
1631
+ version: "1.0",
1632
+ rules: "not an array"
1633
+ }
1634
+
1635
+ expect {
1636
+ DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
1637
+ }.to raise_error(DecisionAgent::InvalidRuleDslError)
1638
+ end
1639
+
1640
+ it "raises error when rule is not a hash" do
1641
+ rules = {
1642
+ version: "1.0",
1643
+ rules: ["not a hash", "also not a hash"]
1644
+ }
1645
+
1646
+ expect {
1647
+ DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
1648
+ }.to raise_error(DecisionAgent::InvalidRuleDslError)
1649
+ end
1650
+ end
1651
+
1652
+ describe "no evaluations scenarios" do
1653
+ it "raises NoEvaluationsError when single evaluator returns nil" do
1654
+ nil_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
1655
+ def evaluate(context, feedback: {})
1656
+ nil
1657
+ end
1658
+ end
1659
+
1660
+ agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator.new])
1661
+
1662
+ expect {
1663
+ agent.decide(context: {})
1664
+ }.to raise_error(DecisionAgent::NoEvaluationsError)
1665
+ end
1666
+
1667
+ it "raises NoEvaluationsError when all evaluators return nil" do
1668
+ nil_evaluator1 = Class.new(DecisionAgent::Evaluators::Base) do
1669
+ def evaluate(context, feedback: {})
1670
+ nil
1671
+ end
1672
+ end
1673
+
1674
+ nil_evaluator2 = Class.new(DecisionAgent::Evaluators::Base) do
1675
+ def evaluate(context, feedback: {})
1676
+ nil
1677
+ end
1678
+ end
1679
+
1680
+ agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator1.new, nil_evaluator2.new])
1681
+
1682
+ expect {
1683
+ agent.decide(context: {})
1684
+ }.to raise_error(DecisionAgent::NoEvaluationsError) do |error|
1685
+ expect(error.message).to include("No evaluators returned a decision")
1686
+ end
1687
+ end
1688
+
1689
+ it "raises NoEvaluationsError when all evaluators raise exceptions" do
1690
+ failing_evaluator1 = Class.new(DecisionAgent::Evaluators::Base) do
1691
+ def evaluate(context, feedback: {})
1692
+ raise StandardError, "Error 1"
1693
+ end
1694
+ end
1695
+
1696
+ failing_evaluator2 = Class.new(DecisionAgent::Evaluators::Base) do
1697
+ def evaluate(context, feedback: {})
1698
+ raise StandardError, "Error 2"
1699
+ end
1700
+ end
1701
+
1702
+ agent = DecisionAgent::Agent.new(evaluators: [failing_evaluator1.new, failing_evaluator2.new])
1703
+
1704
+ expect {
1705
+ agent.decide(context: {})
1706
+ }.to raise_error(DecisionAgent::NoEvaluationsError)
1707
+ end
1708
+
1709
+ it "succeeds when at least one evaluator succeeds despite others failing" do
1710
+ failing_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
1711
+ def evaluate(context, feedback: {})
1712
+ raise StandardError, "Intentional failure"
1713
+ end
1714
+ end
1715
+
1716
+ good_evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
1717
+ decision: "approve",
1718
+ weight: 0.8
1719
+ )
1720
+
1721
+ agent = DecisionAgent::Agent.new(evaluators: [failing_evaluator.new, good_evaluator])
1722
+
1723
+ result = agent.decide(context: {})
1724
+
1725
+ expect(result.decision).to eq("approve")
1726
+ end
1727
+ end
1728
+
1729
+ describe "boundary condition validation" do
1730
+ it "validates weight is not above 1.0" do
1731
+ expect {
1732
+ DecisionAgent::Evaluation.new(
1733
+ decision: "test",
1734
+ weight: 1.1,
1735
+ reason: "test",
1736
+ evaluator_name: "test"
1737
+ )
1738
+ }.to raise_error(DecisionAgent::InvalidWeightError)
1739
+ end
1740
+
1741
+ it "validates weight is not negative" do
1742
+ expect {
1743
+ DecisionAgent::Evaluation.new(
1744
+ decision: "test",
1745
+ weight: -0.5,
1746
+ reason: "test",
1747
+ evaluator_name: "test"
1748
+ )
1749
+ }.to raise_error(DecisionAgent::InvalidWeightError)
1750
+ end
1751
+
1752
+ it "validates confidence is not above 1.0" do
1753
+ expect {
1754
+ DecisionAgent::Decision.new(
1755
+ decision: "test",
1756
+ confidence: 1.001,
1757
+ explanations: [],
1758
+ evaluations: [],
1759
+ audit_payload: {}
1760
+ )
1761
+ }.to raise_error(DecisionAgent::InvalidConfidenceError)
1762
+ end
1763
+
1764
+ it "validates confidence is not negative" do
1765
+ expect {
1766
+ DecisionAgent::Decision.new(
1767
+ decision: "test",
1768
+ confidence: -0.001,
1769
+ explanations: [],
1770
+ evaluations: [],
1771
+ audit_payload: {}
1772
+ )
1773
+ }.to raise_error(DecisionAgent::InvalidConfidenceError)
1774
+ end
1775
+ end
1776
+ end
1777
+ end