decision_agent 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +234 -919
- data/bin/decision_agent +5 -5
- data/lib/decision_agent/agent.rb +19 -26
- data/lib/decision_agent/audit/null_adapter.rb +1 -2
- data/lib/decision_agent/decision.rb +3 -1
- data/lib/decision_agent/dsl/condition_evaluator.rb +4 -3
- data/lib/decision_agent/dsl/rule_parser.rb +4 -6
- data/lib/decision_agent/dsl/schema_validator.rb +27 -31
- data/lib/decision_agent/errors.rb +21 -6
- data/lib/decision_agent/evaluation.rb +3 -1
- data/lib/decision_agent/evaluation_validator.rb +78 -0
- data/lib/decision_agent/evaluators/json_rule_evaluator.rb +26 -0
- data/lib/decision_agent/evaluators/static_evaluator.rb +2 -6
- data/lib/decision_agent/monitoring/alert_manager.rb +282 -0
- data/lib/decision_agent/monitoring/dashboard/public/dashboard.css +381 -0
- data/lib/decision_agent/monitoring/dashboard/public/dashboard.js +471 -0
- data/lib/decision_agent/monitoring/dashboard/public/index.html +161 -0
- data/lib/decision_agent/monitoring/dashboard_server.rb +340 -0
- data/lib/decision_agent/monitoring/metrics_collector.rb +278 -0
- data/lib/decision_agent/monitoring/monitored_agent.rb +71 -0
- data/lib/decision_agent/monitoring/prometheus_exporter.rb +247 -0
- data/lib/decision_agent/replay/replay.rb +12 -22
- data/lib/decision_agent/scoring/base.rb +1 -1
- data/lib/decision_agent/scoring/consensus.rb +5 -5
- data/lib/decision_agent/scoring/weighted_average.rb +1 -1
- data/lib/decision_agent/version.rb +1 -1
- data/lib/decision_agent/versioning/activerecord_adapter.rb +141 -0
- data/lib/decision_agent/versioning/adapter.rb +100 -0
- data/lib/decision_agent/versioning/file_storage_adapter.rb +290 -0
- data/lib/decision_agent/versioning/version_manager.rb +127 -0
- data/lib/decision_agent/web/public/app.js +318 -0
- data/lib/decision_agent/web/public/index.html +56 -1
- data/lib/decision_agent/web/public/styles.css +219 -0
- data/lib/decision_agent/web/server.rb +169 -9
- data/lib/decision_agent.rb +11 -0
- data/lib/generators/decision_agent/install/install_generator.rb +40 -0
- data/lib/generators/decision_agent/install/templates/README +47 -0
- data/lib/generators/decision_agent/install/templates/migration.rb +37 -0
- data/lib/generators/decision_agent/install/templates/rule.rb +30 -0
- data/lib/generators/decision_agent/install/templates/rule_version.rb +66 -0
- data/spec/activerecord_thread_safety_spec.rb +553 -0
- data/spec/agent_spec.rb +13 -13
- data/spec/api_contract_spec.rb +16 -16
- data/spec/audit_adapters_spec.rb +3 -3
- data/spec/comprehensive_edge_cases_spec.rb +86 -86
- data/spec/dsl_validation_spec.rb +83 -83
- data/spec/edge_cases_spec.rb +23 -23
- data/spec/examples/feedback_aware_evaluator_spec.rb +7 -7
- data/spec/examples.txt +548 -0
- data/spec/issue_verification_spec.rb +685 -0
- data/spec/json_rule_evaluator_spec.rb +15 -15
- data/spec/monitoring/alert_manager_spec.rb +378 -0
- data/spec/monitoring/metrics_collector_spec.rb +281 -0
- data/spec/monitoring/monitored_agent_spec.rb +222 -0
- data/spec/monitoring/prometheus_exporter_spec.rb +242 -0
- data/spec/replay_edge_cases_spec.rb +58 -58
- data/spec/replay_spec.rb +11 -11
- data/spec/rfc8785_canonicalization_spec.rb +215 -0
- data/spec/scoring_spec.rb +1 -1
- data/spec/spec_helper.rb +9 -0
- data/spec/thread_safety_spec.rb +482 -0
- data/spec/thread_safety_spec.rb.broken +878 -0
- data/spec/versioning_spec.rb +777 -0
- data/spec/web_ui_rack_spec.rb +135 -0
- metadata +84 -11
data/spec/api_contract_spec.rb
CHANGED
|
@@ -106,47 +106,47 @@ RSpec.describe "DecisionAgent API Contracts" do
|
|
|
106
106
|
|
|
107
107
|
describe "weight bounds validation" do
|
|
108
108
|
it "rejects weight < 0.0" do
|
|
109
|
-
expect
|
|
109
|
+
expect do
|
|
110
110
|
DecisionAgent::Evaluation.new(
|
|
111
111
|
decision: "test",
|
|
112
112
|
weight: -0.1,
|
|
113
113
|
reason: "test",
|
|
114
114
|
evaluator_name: "Test"
|
|
115
115
|
)
|
|
116
|
-
|
|
116
|
+
end.to raise_error(DecisionAgent::InvalidWeightError, /must be between 0.0 and 1.0/)
|
|
117
117
|
end
|
|
118
118
|
|
|
119
119
|
it "rejects weight > 1.0" do
|
|
120
|
-
expect
|
|
120
|
+
expect do
|
|
121
121
|
DecisionAgent::Evaluation.new(
|
|
122
122
|
decision: "test",
|
|
123
123
|
weight: 1.1,
|
|
124
124
|
reason: "test",
|
|
125
125
|
evaluator_name: "Test"
|
|
126
126
|
)
|
|
127
|
-
|
|
127
|
+
end.to raise_error(DecisionAgent::InvalidWeightError, /must be between 0.0 and 1.0/)
|
|
128
128
|
end
|
|
129
129
|
|
|
130
130
|
it "accepts weight = 0.0" do
|
|
131
|
-
expect
|
|
131
|
+
expect do
|
|
132
132
|
DecisionAgent::Evaluation.new(
|
|
133
133
|
decision: "test",
|
|
134
134
|
weight: 0.0,
|
|
135
135
|
reason: "test",
|
|
136
136
|
evaluator_name: "Test"
|
|
137
137
|
)
|
|
138
|
-
|
|
138
|
+
end.not_to raise_error
|
|
139
139
|
end
|
|
140
140
|
|
|
141
141
|
it "accepts weight = 1.0" do
|
|
142
|
-
expect
|
|
142
|
+
expect do
|
|
143
143
|
DecisionAgent::Evaluation.new(
|
|
144
144
|
decision: "test",
|
|
145
145
|
weight: 1.0,
|
|
146
146
|
reason: "test",
|
|
147
147
|
evaluator_name: "Test"
|
|
148
148
|
)
|
|
149
|
-
|
|
149
|
+
end.not_to raise_error
|
|
150
150
|
end
|
|
151
151
|
end
|
|
152
152
|
|
|
@@ -174,13 +174,13 @@ RSpec.describe "DecisionAgent API Contracts" do
|
|
|
174
174
|
end
|
|
175
175
|
|
|
176
176
|
it "requires reason parameter to be provided" do
|
|
177
|
-
expect
|
|
177
|
+
expect do
|
|
178
178
|
DecisionAgent::Evaluation.new(
|
|
179
179
|
decision: "test",
|
|
180
180
|
weight: 0.5,
|
|
181
181
|
evaluator_name: "Test"
|
|
182
182
|
)
|
|
183
|
-
|
|
183
|
+
end.to raise_error(ArgumentError, /missing keyword.*reason/)
|
|
184
184
|
end
|
|
185
185
|
end
|
|
186
186
|
end
|
|
@@ -324,7 +324,7 @@ RSpec.describe "DecisionAgent API Contracts" do
|
|
|
324
324
|
|
|
325
325
|
describe "confidence bounds validation" do
|
|
326
326
|
it "validates confidence is between 0.0 and 1.0" do
|
|
327
|
-
expect
|
|
327
|
+
expect do
|
|
328
328
|
DecisionAgent::Decision.new(
|
|
329
329
|
decision: "test",
|
|
330
330
|
confidence: -0.1,
|
|
@@ -332,9 +332,9 @@ RSpec.describe "DecisionAgent API Contracts" do
|
|
|
332
332
|
evaluations: [],
|
|
333
333
|
audit_payload: {}
|
|
334
334
|
)
|
|
335
|
-
|
|
335
|
+
end.to raise_error(DecisionAgent::InvalidConfidenceError)
|
|
336
336
|
|
|
337
|
-
expect
|
|
337
|
+
expect do
|
|
338
338
|
DecisionAgent::Decision.new(
|
|
339
339
|
decision: "test",
|
|
340
340
|
confidence: 1.1,
|
|
@@ -342,7 +342,7 @@ RSpec.describe "DecisionAgent API Contracts" do
|
|
|
342
342
|
evaluations: [],
|
|
343
343
|
audit_payload: {}
|
|
344
344
|
)
|
|
345
|
-
|
|
345
|
+
end.to raise_error(DecisionAgent::InvalidConfidenceError)
|
|
346
346
|
end
|
|
347
347
|
end
|
|
348
348
|
end
|
|
@@ -351,7 +351,7 @@ RSpec.describe "DecisionAgent API Contracts" do
|
|
|
351
351
|
let(:evaluator) do
|
|
352
352
|
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
353
353
|
decision: "approve",
|
|
354
|
-
weight: 0.5,
|
|
354
|
+
weight: 0.5, # Below threshold
|
|
355
355
|
reason: "Low confidence approval"
|
|
356
356
|
)
|
|
357
357
|
end
|
|
@@ -406,7 +406,7 @@ RSpec.describe "DecisionAgent API Contracts" do
|
|
|
406
406
|
let(:high_confidence_evaluator) do
|
|
407
407
|
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
408
408
|
decision: "approve",
|
|
409
|
-
weight: 0.9,
|
|
409
|
+
weight: 0.9, # Above threshold
|
|
410
410
|
reason: "High confidence approval"
|
|
411
411
|
)
|
|
412
412
|
end
|
data/spec/audit_adapters_spec.rb
CHANGED
|
@@ -15,9 +15,9 @@ RSpec.describe "Audit Adapters" do
|
|
|
15
15
|
|
|
16
16
|
context = DecisionAgent::Context.new({ user: "alice" })
|
|
17
17
|
|
|
18
|
-
expect
|
|
18
|
+
expect do
|
|
19
19
|
adapter.record(decision, context)
|
|
20
|
-
|
|
20
|
+
end.not_to raise_error
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
23
|
|
|
@@ -50,7 +50,7 @@ RSpec.describe "Audit Adapters" do
|
|
|
50
50
|
it "logs JSON format" do
|
|
51
51
|
io = StringIO.new
|
|
52
52
|
logger = Logger.new(io)
|
|
53
|
-
logger.formatter = proc { |
|
|
53
|
+
logger.formatter = proc { |_severity, _datetime, _progname, msg| "#{msg}\n" }
|
|
54
54
|
|
|
55
55
|
adapter = DecisionAgent::Audit::LoggerAdapter.new(logger: logger)
|
|
56
56
|
|
|
@@ -20,9 +20,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
20
20
|
]
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
-
expect
|
|
23
|
+
expect do
|
|
24
24
|
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
25
|
-
|
|
25
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError, /Unsupported operator 'unknown_op'/)
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
it "raises error when operator is misspelled" do
|
|
@@ -38,9 +38,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
38
38
|
]
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
-
expect
|
|
41
|
+
expect do
|
|
42
42
|
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
43
|
-
|
|
43
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError, /Unsupported operator 'greather_than'/)
|
|
44
44
|
end
|
|
45
45
|
|
|
46
46
|
it "raises error when operator is nil" do
|
|
@@ -56,9 +56,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
56
56
|
]
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
-
expect
|
|
59
|
+
expect do
|
|
60
60
|
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
61
|
-
|
|
61
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing 'op'/)
|
|
62
62
|
end
|
|
63
63
|
end
|
|
64
64
|
|
|
@@ -534,12 +534,12 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
534
534
|
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
535
535
|
|
|
536
536
|
context = DecisionAgent::Context.new({
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
537
|
+
"用户": {
|
|
538
|
+
"配置": {
|
|
539
|
+
"语言": "中文"
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
})
|
|
543
543
|
result = evaluator.evaluate(context)
|
|
544
544
|
expect(result).not_to be_nil
|
|
545
545
|
expect(result.decision).to eq("chinese_locale")
|
|
@@ -561,9 +561,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
561
561
|
}
|
|
562
562
|
|
|
563
563
|
# Validator catches empty segments and raises error
|
|
564
|
-
expect
|
|
564
|
+
expect do
|
|
565
565
|
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
566
|
-
|
|
566
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError, /empty segments/)
|
|
567
567
|
end
|
|
568
568
|
|
|
569
569
|
it "handles trailing dots in field paths" do
|
|
@@ -605,9 +605,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
605
605
|
}
|
|
606
606
|
|
|
607
607
|
# Validator catches empty segments and raises error
|
|
608
|
-
expect
|
|
608
|
+
expect do
|
|
609
609
|
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
610
|
-
|
|
610
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError, /empty segments/)
|
|
611
611
|
end
|
|
612
612
|
|
|
613
613
|
it "rejects multiple consecutive dots in field paths" do
|
|
@@ -624,9 +624,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
624
624
|
}
|
|
625
625
|
|
|
626
626
|
# Validator catches empty segments and raises error
|
|
627
|
-
expect
|
|
627
|
+
expect do
|
|
628
628
|
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
629
|
-
|
|
629
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError, /empty segments/)
|
|
630
630
|
end
|
|
631
631
|
end
|
|
632
632
|
end
|
|
@@ -639,21 +639,21 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
639
639
|
describe "nil returns and empty evaluations" do
|
|
640
640
|
it "handles all evaluators returning nil" do
|
|
641
641
|
nil_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
|
|
642
|
-
def evaluate(
|
|
642
|
+
def evaluate(_context, feedback: {})
|
|
643
643
|
nil
|
|
644
644
|
end
|
|
645
645
|
end
|
|
646
646
|
|
|
647
647
|
agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator.new])
|
|
648
648
|
|
|
649
|
-
expect
|
|
649
|
+
expect do
|
|
650
650
|
agent.decide(context: {})
|
|
651
|
-
|
|
651
|
+
end.to raise_error(DecisionAgent::NoEvaluationsError)
|
|
652
652
|
end
|
|
653
653
|
|
|
654
654
|
it "handles mix of nil and valid evaluations" do
|
|
655
655
|
nil_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
|
|
656
|
-
def evaluate(
|
|
656
|
+
def evaluate(_context, feedback: {})
|
|
657
657
|
nil
|
|
658
658
|
end
|
|
659
659
|
end
|
|
@@ -761,7 +761,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
761
761
|
result = agent.decide(context: {})
|
|
762
762
|
|
|
763
763
|
# Should choose one decision
|
|
764
|
-
expect([
|
|
764
|
+
expect(%w[approve reject]).to include(result.decision)
|
|
765
765
|
# Confidence should reflect the tie
|
|
766
766
|
expect(result.confidence).to eq(0.5)
|
|
767
767
|
end
|
|
@@ -793,7 +793,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
793
793
|
result = agent.decide(context: {})
|
|
794
794
|
|
|
795
795
|
# Should choose one of the options
|
|
796
|
-
expect([
|
|
796
|
+
expect(%w[option_a option_b option_c]).to include(result.decision)
|
|
797
797
|
expect(result.confidence).to eq(0.7)
|
|
798
798
|
end
|
|
799
799
|
|
|
@@ -818,7 +818,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
818
818
|
result = agent.decide(context: {})
|
|
819
819
|
|
|
820
820
|
# Should choose one decision, likely with reduced confidence
|
|
821
|
-
expect([
|
|
821
|
+
expect(%w[approve reject]).to include(result.decision)
|
|
822
822
|
end
|
|
823
823
|
end
|
|
824
824
|
|
|
@@ -849,7 +849,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
849
849
|
|
|
850
850
|
result = agent.decide(context: {})
|
|
851
851
|
|
|
852
|
-
expect([
|
|
852
|
+
expect(%w[approve reject manual_review]).to include(result.decision)
|
|
853
853
|
end
|
|
854
854
|
|
|
855
855
|
it "handles many evaluators with diverse decisions" do
|
|
@@ -899,7 +899,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
899
899
|
evaluators = 20.times.map do |i|
|
|
900
900
|
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
901
901
|
decision: "unanimous",
|
|
902
|
-
weight: 0.05 * (i + 1),
|
|
902
|
+
weight: 0.05 * (i + 1), # Weights from 0.05 to 1.0
|
|
903
903
|
name: "Eval#{i}"
|
|
904
904
|
)
|
|
905
905
|
end
|
|
@@ -1158,7 +1158,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1158
1158
|
evaluators = 100.times.map do |i|
|
|
1159
1159
|
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1160
1160
|
decision: "decision_#{i % 10}",
|
|
1161
|
-
weight: (i + 1) / 200.0,
|
|
1161
|
+
weight: (i + 1) / 200.0, # Weights from 0.005 to 0.505
|
|
1162
1162
|
name: "Eval#{i}"
|
|
1163
1163
|
)
|
|
1164
1164
|
end
|
|
@@ -1179,7 +1179,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1179
1179
|
evaluators = 100.times.map do |i|
|
|
1180
1180
|
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1181
1181
|
decision: "consensus",
|
|
1182
|
-
weight: 0.5 + (i / 200.0),
|
|
1182
|
+
weight: 0.5 + (i / 200.0), # Weights from 0.5 to 0.995
|
|
1183
1183
|
name: "Eval#{i}"
|
|
1184
1184
|
)
|
|
1185
1185
|
end
|
|
@@ -1199,7 +1199,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1199
1199
|
evaluators = 100.times.map do |i|
|
|
1200
1200
|
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1201
1201
|
decision: "decision_#{i}",
|
|
1202
|
-
weight: i / 100.0,
|
|
1202
|
+
weight: i / 100.0, # Weights from 0.0 to 0.99
|
|
1203
1203
|
name: "Eval#{i}"
|
|
1204
1204
|
)
|
|
1205
1205
|
end
|
|
@@ -1251,7 +1251,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1251
1251
|
it "handles repeating decimals (0.333333...)" do
|
|
1252
1252
|
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1253
1253
|
decision: "approve",
|
|
1254
|
-
weight: 1.0 / 3.0,
|
|
1254
|
+
weight: 1.0 / 3.0, # 0.333333...
|
|
1255
1255
|
name: "Eval1"
|
|
1256
1256
|
)
|
|
1257
1257
|
|
|
@@ -1282,7 +1282,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1282
1282
|
it "normalizes confidence to 4 decimal places" do
|
|
1283
1283
|
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1284
1284
|
decision: "approve",
|
|
1285
|
-
weight: 0.123456789,
|
|
1285
|
+
weight: 0.123456789, # Many decimal places
|
|
1286
1286
|
name: "Eval1"
|
|
1287
1287
|
)
|
|
1288
1288
|
|
|
@@ -1294,7 +1294,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1294
1294
|
result = agent.decide(context: {})
|
|
1295
1295
|
|
|
1296
1296
|
# Confidence should be rounded to 4 decimal places
|
|
1297
|
-
expect(result.confidence.to_s.split(
|
|
1297
|
+
expect(result.confidence.to_s.split(".").last.length).to be <= 4
|
|
1298
1298
|
end
|
|
1299
1299
|
|
|
1300
1300
|
it "handles very small weights (0.0001)" do
|
|
@@ -1331,7 +1331,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1331
1331
|
|
|
1332
1332
|
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1333
1333
|
decision: "approve",
|
|
1334
|
-
weight: 0.3 + 0.0000001,
|
|
1334
|
+
weight: 0.3 + 0.0000001, # Slightly above to create >1.0 sum
|
|
1335
1335
|
name: "Eval2"
|
|
1336
1336
|
)
|
|
1337
1337
|
|
|
@@ -1350,7 +1350,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1350
1350
|
# Create evaluators where agreement is not a clean fraction
|
|
1351
1351
|
evaluators = 7.times.map do |i|
|
|
1352
1352
|
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1353
|
-
decision: i < 4 ? "approve" : "reject",
|
|
1353
|
+
decision: i < 4 ? "approve" : "reject", # 4/7 = 0.571428...
|
|
1354
1354
|
weight: 0.6,
|
|
1355
1355
|
name: "Eval#{i}"
|
|
1356
1356
|
)
|
|
@@ -1391,9 +1391,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1391
1391
|
# Tolerance is 0.0001, so this should trigger an error
|
|
1392
1392
|
modified_payload[:confidence] = original_result.confidence + 0.001
|
|
1393
1393
|
|
|
1394
|
-
expect
|
|
1394
|
+
expect do
|
|
1395
1395
|
DecisionAgent::Replay.run(modified_payload, strict: true)
|
|
1396
|
-
|
|
1396
|
+
end.to raise_error(DecisionAgent::ReplayMismatchError)
|
|
1397
1397
|
end
|
|
1398
1398
|
|
|
1399
1399
|
it "passes when confidence is identical in strict mode" do
|
|
@@ -1407,9 +1407,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1407
1407
|
context = { user: "alice" }
|
|
1408
1408
|
original_result = agent.decide(context: context)
|
|
1409
1409
|
|
|
1410
|
-
expect
|
|
1410
|
+
expect do
|
|
1411
1411
|
DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
|
|
1412
|
-
|
|
1412
|
+
end.not_to raise_error
|
|
1413
1413
|
end
|
|
1414
1414
|
|
|
1415
1415
|
it "allows confidence within tolerance in strict mode" do
|
|
@@ -1427,9 +1427,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1427
1427
|
# Tolerance is 0.0001, so this should NOT trigger an error
|
|
1428
1428
|
modified_payload[:confidence] = original_result.confidence + 0.00005
|
|
1429
1429
|
|
|
1430
|
-
expect
|
|
1430
|
+
expect do
|
|
1431
1431
|
DecisionAgent::Replay.run(modified_payload, strict: true)
|
|
1432
|
-
|
|
1432
|
+
end.not_to raise_error
|
|
1433
1433
|
end
|
|
1434
1434
|
end
|
|
1435
1435
|
|
|
@@ -1449,9 +1449,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1449
1449
|
modified_payload[:decision] = "reject"
|
|
1450
1450
|
|
|
1451
1451
|
result = nil
|
|
1452
|
-
expect
|
|
1452
|
+
expect do
|
|
1453
1453
|
result = DecisionAgent::Replay.run(modified_payload, strict: false)
|
|
1454
|
-
|
|
1454
|
+
end.to output(/Decision changed/).to_stderr
|
|
1455
1455
|
|
|
1456
1456
|
expect(result).not_to be_nil
|
|
1457
1457
|
end
|
|
@@ -1471,9 +1471,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1471
1471
|
modified_payload[:confidence] = 0.5
|
|
1472
1472
|
|
|
1473
1473
|
result = nil
|
|
1474
|
-
expect
|
|
1474
|
+
expect do
|
|
1475
1475
|
result = DecisionAgent::Replay.run(modified_payload, strict: false)
|
|
1476
|
-
|
|
1476
|
+
end.to output(/Confidence changed/).to_stderr
|
|
1477
1477
|
|
|
1478
1478
|
expect(result).not_to be_nil
|
|
1479
1479
|
end
|
|
@@ -1494,11 +1494,11 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1494
1494
|
modified_payload[:confidence] = 0.3
|
|
1495
1495
|
|
|
1496
1496
|
result = nil
|
|
1497
|
-
expect
|
|
1497
|
+
expect do
|
|
1498
1498
|
result = DecisionAgent::Replay.run(modified_payload, strict: false)
|
|
1499
|
-
|
|
1499
|
+
end.to output(/Decision changed.*Confidence changed/m).to_stderr
|
|
1500
1500
|
|
|
1501
|
-
expect(result.decision).to eq("approve")
|
|
1501
|
+
expect(result.decision).to eq("approve") # Should use replayed value
|
|
1502
1502
|
end
|
|
1503
1503
|
end
|
|
1504
1504
|
|
|
@@ -1510,9 +1510,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1510
1510
|
evaluations: []
|
|
1511
1511
|
}
|
|
1512
1512
|
|
|
1513
|
-
expect
|
|
1513
|
+
expect do
|
|
1514
1514
|
DecisionAgent::Replay.run(invalid_payload, strict: true)
|
|
1515
|
-
|
|
1515
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
|
|
1516
1516
|
end
|
|
1517
1517
|
|
|
1518
1518
|
it "raises error when decision is missing" do
|
|
@@ -1522,9 +1522,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1522
1522
|
evaluations: []
|
|
1523
1523
|
}
|
|
1524
1524
|
|
|
1525
|
-
expect
|
|
1525
|
+
expect do
|
|
1526
1526
|
DecisionAgent::Replay.run(invalid_payload, strict: true)
|
|
1527
|
-
|
|
1527
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
|
|
1528
1528
|
end
|
|
1529
1529
|
|
|
1530
1530
|
it "raises error when confidence is missing" do
|
|
@@ -1534,9 +1534,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1534
1534
|
evaluations: []
|
|
1535
1535
|
}
|
|
1536
1536
|
|
|
1537
|
-
expect
|
|
1537
|
+
expect do
|
|
1538
1538
|
DecisionAgent::Replay.run(invalid_payload, strict: true)
|
|
1539
|
-
|
|
1539
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
|
|
1540
1540
|
end
|
|
1541
1541
|
|
|
1542
1542
|
it "raises error when evaluations is missing" do
|
|
@@ -1546,15 +1546,15 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1546
1546
|
confidence: 0.8
|
|
1547
1547
|
}
|
|
1548
1548
|
|
|
1549
|
-
expect
|
|
1549
|
+
expect do
|
|
1550
1550
|
DecisionAgent::Replay.run(invalid_payload, strict: true)
|
|
1551
|
-
|
|
1551
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
|
|
1552
1552
|
end
|
|
1553
1553
|
|
|
1554
1554
|
it "handles empty audit payload" do
|
|
1555
|
-
expect
|
|
1555
|
+
expect do
|
|
1556
1556
|
DecisionAgent::Replay.run({}, strict: true)
|
|
1557
|
-
|
|
1557
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError)
|
|
1558
1558
|
end
|
|
1559
1559
|
end
|
|
1560
1560
|
|
|
@@ -1615,15 +1615,15 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1615
1615
|
describe "error handling edge cases" do
|
|
1616
1616
|
describe "invalid JSON rule formats" do
|
|
1617
1617
|
it "raises error for non-hash JSON" do
|
|
1618
|
-
expect
|
|
1618
|
+
expect do
|
|
1619
1619
|
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: [1, 2, 3])
|
|
1620
|
-
|
|
1620
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError)
|
|
1621
1621
|
end
|
|
1622
1622
|
|
|
1623
1623
|
it "raises error for string input instead of hash" do
|
|
1624
|
-
expect
|
|
1624
|
+
expect do
|
|
1625
1625
|
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: "not a hash")
|
|
1626
|
-
|
|
1626
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError)
|
|
1627
1627
|
end
|
|
1628
1628
|
|
|
1629
1629
|
it "raises error when rules is not an array" do
|
|
@@ -1632,9 +1632,9 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1632
1632
|
rules: "not an array"
|
|
1633
1633
|
}
|
|
1634
1634
|
|
|
1635
|
-
expect
|
|
1635
|
+
expect do
|
|
1636
1636
|
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
1637
|
-
|
|
1637
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError)
|
|
1638
1638
|
end
|
|
1639
1639
|
|
|
1640
1640
|
it "raises error when rule is not a hash" do
|
|
@@ -1643,72 +1643,72 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1643
1643
|
rules: ["not a hash", "also not a hash"]
|
|
1644
1644
|
}
|
|
1645
1645
|
|
|
1646
|
-
expect
|
|
1646
|
+
expect do
|
|
1647
1647
|
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
1648
|
-
|
|
1648
|
+
end.to raise_error(DecisionAgent::InvalidRuleDslError)
|
|
1649
1649
|
end
|
|
1650
1650
|
end
|
|
1651
1651
|
|
|
1652
1652
|
describe "no evaluations scenarios" do
|
|
1653
1653
|
it "raises NoEvaluationsError when single evaluator returns nil" do
|
|
1654
1654
|
nil_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1655
|
-
def evaluate(
|
|
1655
|
+
def evaluate(_context, feedback: {})
|
|
1656
1656
|
nil
|
|
1657
1657
|
end
|
|
1658
1658
|
end
|
|
1659
1659
|
|
|
1660
1660
|
agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator.new])
|
|
1661
1661
|
|
|
1662
|
-
expect
|
|
1662
|
+
expect do
|
|
1663
1663
|
agent.decide(context: {})
|
|
1664
|
-
|
|
1664
|
+
end.to raise_error(DecisionAgent::NoEvaluationsError)
|
|
1665
1665
|
end
|
|
1666
1666
|
|
|
1667
1667
|
it "raises NoEvaluationsError when all evaluators return nil" do
|
|
1668
1668
|
nil_evaluator1 = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1669
|
-
def evaluate(
|
|
1669
|
+
def evaluate(_context, feedback: {})
|
|
1670
1670
|
nil
|
|
1671
1671
|
end
|
|
1672
1672
|
end
|
|
1673
1673
|
|
|
1674
1674
|
nil_evaluator2 = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1675
|
-
def evaluate(
|
|
1675
|
+
def evaluate(_context, feedback: {})
|
|
1676
1676
|
nil
|
|
1677
1677
|
end
|
|
1678
1678
|
end
|
|
1679
1679
|
|
|
1680
1680
|
agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator1.new, nil_evaluator2.new])
|
|
1681
1681
|
|
|
1682
|
-
expect
|
|
1682
|
+
expect do
|
|
1683
1683
|
agent.decide(context: {})
|
|
1684
|
-
|
|
1684
|
+
end.to raise_error(DecisionAgent::NoEvaluationsError) do |error|
|
|
1685
1685
|
expect(error.message).to include("No evaluators returned a decision")
|
|
1686
1686
|
end
|
|
1687
1687
|
end
|
|
1688
1688
|
|
|
1689
1689
|
it "raises NoEvaluationsError when all evaluators raise exceptions" do
|
|
1690
1690
|
failing_evaluator1 = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1691
|
-
def evaluate(
|
|
1691
|
+
def evaluate(_context, feedback: {})
|
|
1692
1692
|
raise StandardError, "Error 1"
|
|
1693
1693
|
end
|
|
1694
1694
|
end
|
|
1695
1695
|
|
|
1696
1696
|
failing_evaluator2 = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1697
|
-
def evaluate(
|
|
1697
|
+
def evaluate(_context, feedback: {})
|
|
1698
1698
|
raise StandardError, "Error 2"
|
|
1699
1699
|
end
|
|
1700
1700
|
end
|
|
1701
1701
|
|
|
1702
1702
|
agent = DecisionAgent::Agent.new(evaluators: [failing_evaluator1.new, failing_evaluator2.new])
|
|
1703
1703
|
|
|
1704
|
-
expect
|
|
1704
|
+
expect do
|
|
1705
1705
|
agent.decide(context: {})
|
|
1706
|
-
|
|
1706
|
+
end.to raise_error(DecisionAgent::NoEvaluationsError)
|
|
1707
1707
|
end
|
|
1708
1708
|
|
|
1709
1709
|
it "succeeds when at least one evaluator succeeds despite others failing" do
|
|
1710
1710
|
failing_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1711
|
-
def evaluate(
|
|
1711
|
+
def evaluate(_context, feedback: {})
|
|
1712
1712
|
raise StandardError, "Intentional failure"
|
|
1713
1713
|
end
|
|
1714
1714
|
end
|
|
@@ -1728,29 +1728,29 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1728
1728
|
|
|
1729
1729
|
describe "boundary condition validation" do
|
|
1730
1730
|
it "validates weight is not above 1.0" do
|
|
1731
|
-
expect
|
|
1731
|
+
expect do
|
|
1732
1732
|
DecisionAgent::Evaluation.new(
|
|
1733
1733
|
decision: "test",
|
|
1734
1734
|
weight: 1.1,
|
|
1735
1735
|
reason: "test",
|
|
1736
1736
|
evaluator_name: "test"
|
|
1737
1737
|
)
|
|
1738
|
-
|
|
1738
|
+
end.to raise_error(DecisionAgent::InvalidWeightError)
|
|
1739
1739
|
end
|
|
1740
1740
|
|
|
1741
1741
|
it "validates weight is not negative" do
|
|
1742
|
-
expect
|
|
1742
|
+
expect do
|
|
1743
1743
|
DecisionAgent::Evaluation.new(
|
|
1744
1744
|
decision: "test",
|
|
1745
1745
|
weight: -0.5,
|
|
1746
1746
|
reason: "test",
|
|
1747
1747
|
evaluator_name: "test"
|
|
1748
1748
|
)
|
|
1749
|
-
|
|
1749
|
+
end.to raise_error(DecisionAgent::InvalidWeightError)
|
|
1750
1750
|
end
|
|
1751
1751
|
|
|
1752
1752
|
it "validates confidence is not above 1.0" do
|
|
1753
|
-
expect
|
|
1753
|
+
expect do
|
|
1754
1754
|
DecisionAgent::Decision.new(
|
|
1755
1755
|
decision: "test",
|
|
1756
1756
|
confidence: 1.001,
|
|
@@ -1758,11 +1758,11 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1758
1758
|
evaluations: [],
|
|
1759
1759
|
audit_payload: {}
|
|
1760
1760
|
)
|
|
1761
|
-
|
|
1761
|
+
end.to raise_error(DecisionAgent::InvalidConfidenceError)
|
|
1762
1762
|
end
|
|
1763
1763
|
|
|
1764
1764
|
it "validates confidence is not negative" do
|
|
1765
|
-
expect
|
|
1765
|
+
expect do
|
|
1766
1766
|
DecisionAgent::Decision.new(
|
|
1767
1767
|
decision: "test",
|
|
1768
1768
|
confidence: -0.001,
|
|
@@ -1770,7 +1770,7 @@ RSpec.describe "Comprehensive Edge Cases" do
|
|
|
1770
1770
|
evaluations: [],
|
|
1771
1771
|
audit_payload: {}
|
|
1772
1772
|
)
|
|
1773
|
-
|
|
1773
|
+
end.to raise_error(DecisionAgent::InvalidConfidenceError)
|
|
1774
1774
|
end
|
|
1775
1775
|
end
|
|
1776
1776
|
end
|