decision_agent 0.2.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +313 -8
- data/bin/decision_agent +104 -0
- data/lib/decision_agent/agent.rb +72 -1
- data/lib/decision_agent/context.rb +1 -0
- data/lib/decision_agent/data_enrichment/cache/memory_adapter.rb +86 -0
- data/lib/decision_agent/data_enrichment/cache_adapter.rb +49 -0
- data/lib/decision_agent/data_enrichment/circuit_breaker.rb +135 -0
- data/lib/decision_agent/data_enrichment/client.rb +220 -0
- data/lib/decision_agent/data_enrichment/config.rb +78 -0
- data/lib/decision_agent/data_enrichment/errors.rb +36 -0
- data/lib/decision_agent/decision.rb +102 -2
- data/lib/decision_agent/dmn/adapter.rb +135 -0
- data/lib/decision_agent/dmn/cache.rb +306 -0
- data/lib/decision_agent/dmn/decision_graph.rb +327 -0
- data/lib/decision_agent/dmn/decision_tree.rb +192 -0
- data/lib/decision_agent/dmn/errors.rb +30 -0
- data/lib/decision_agent/dmn/exporter.rb +217 -0
- data/lib/decision_agent/dmn/feel/evaluator.rb +819 -0
- data/lib/decision_agent/dmn/feel/functions.rb +420 -0
- data/lib/decision_agent/dmn/feel/parser.rb +349 -0
- data/lib/decision_agent/dmn/feel/simple_parser.rb +276 -0
- data/lib/decision_agent/dmn/feel/transformer.rb +372 -0
- data/lib/decision_agent/dmn/feel/types.rb +276 -0
- data/lib/decision_agent/dmn/importer.rb +77 -0
- data/lib/decision_agent/dmn/model.rb +197 -0
- data/lib/decision_agent/dmn/parser.rb +191 -0
- data/lib/decision_agent/dmn/testing.rb +333 -0
- data/lib/decision_agent/dmn/validator.rb +315 -0
- data/lib/decision_agent/dmn/versioning.rb +229 -0
- data/lib/decision_agent/dmn/visualizer.rb +513 -0
- data/lib/decision_agent/dsl/condition_evaluator.rb +984 -838
- data/lib/decision_agent/dsl/schema_validator.rb +53 -14
- data/lib/decision_agent/evaluators/dmn_evaluator.rb +308 -0
- data/lib/decision_agent/evaluators/json_rule_evaluator.rb +69 -9
- data/lib/decision_agent/explainability/condition_trace.rb +83 -0
- data/lib/decision_agent/explainability/explainability_result.rb +52 -0
- data/lib/decision_agent/explainability/rule_trace.rb +39 -0
- data/lib/decision_agent/explainability/trace_collector.rb +24 -0
- data/lib/decision_agent/monitoring/alert_manager.rb +5 -1
- data/lib/decision_agent/simulation/errors.rb +18 -0
- data/lib/decision_agent/simulation/impact_analyzer.rb +498 -0
- data/lib/decision_agent/simulation/monte_carlo_simulator.rb +635 -0
- data/lib/decision_agent/simulation/replay_engine.rb +486 -0
- data/lib/decision_agent/simulation/scenario_engine.rb +318 -0
- data/lib/decision_agent/simulation/scenario_library.rb +163 -0
- data/lib/decision_agent/simulation/shadow_test_engine.rb +287 -0
- data/lib/decision_agent/simulation/what_if_analyzer.rb +1002 -0
- data/lib/decision_agent/simulation.rb +17 -0
- data/lib/decision_agent/version.rb +1 -1
- data/lib/decision_agent/versioning/activerecord_adapter.rb +23 -8
- data/lib/decision_agent/web/dmn_editor.rb +426 -0
- data/lib/decision_agent/web/public/app.js +119 -0
- data/lib/decision_agent/web/public/dmn-editor.css +596 -0
- data/lib/decision_agent/web/public/dmn-editor.html +250 -0
- data/lib/decision_agent/web/public/dmn-editor.js +553 -0
- data/lib/decision_agent/web/public/index.html +52 -0
- data/lib/decision_agent/web/public/simulation.html +130 -0
- data/lib/decision_agent/web/public/simulation_impact.html +478 -0
- data/lib/decision_agent/web/public/simulation_replay.html +551 -0
- data/lib/decision_agent/web/public/simulation_shadow.html +546 -0
- data/lib/decision_agent/web/public/simulation_whatif.html +532 -0
- data/lib/decision_agent/web/public/styles.css +86 -0
- data/lib/decision_agent/web/server.rb +1059 -23
- data/lib/decision_agent.rb +60 -2
- metadata +105 -61
- data/spec/ab_testing/ab_test_assignment_spec.rb +0 -253
- data/spec/ab_testing/ab_test_manager_spec.rb +0 -612
- data/spec/ab_testing/ab_test_spec.rb +0 -270
- data/spec/ab_testing/ab_testing_agent_spec.rb +0 -481
- data/spec/ab_testing/storage/adapter_spec.rb +0 -64
- data/spec/ab_testing/storage/memory_adapter_spec.rb +0 -485
- data/spec/activerecord_thread_safety_spec.rb +0 -553
- data/spec/advanced_operators_spec.rb +0 -3150
- data/spec/agent_spec.rb +0 -289
- data/spec/api_contract_spec.rb +0 -430
- data/spec/audit_adapters_spec.rb +0 -92
- data/spec/auth/access_audit_logger_spec.rb +0 -394
- data/spec/auth/authenticator_spec.rb +0 -112
- data/spec/auth/password_reset_spec.rb +0 -294
- data/spec/auth/permission_checker_spec.rb +0 -207
- data/spec/auth/permission_spec.rb +0 -73
- data/spec/auth/rbac_adapter_spec.rb +0 -550
- data/spec/auth/rbac_config_spec.rb +0 -82
- data/spec/auth/role_spec.rb +0 -51
- data/spec/auth/session_manager_spec.rb +0 -172
- data/spec/auth/session_spec.rb +0 -112
- data/spec/auth/user_spec.rb +0 -130
- data/spec/comprehensive_edge_cases_spec.rb +0 -1777
- data/spec/context_spec.rb +0 -127
- data/spec/decision_agent_spec.rb +0 -96
- data/spec/decision_spec.rb +0 -423
- data/spec/dsl/condition_evaluator_spec.rb +0 -774
- data/spec/dsl_validation_spec.rb +0 -648
- data/spec/edge_cases_spec.rb +0 -353
- data/spec/evaluation_spec.rb +0 -364
- data/spec/evaluation_validator_spec.rb +0 -165
- data/spec/examples/feedback_aware_evaluator_spec.rb +0 -460
- data/spec/examples.txt +0 -1633
- data/spec/issue_verification_spec.rb +0 -759
- data/spec/json_rule_evaluator_spec.rb +0 -587
- data/spec/monitoring/alert_manager_spec.rb +0 -378
- data/spec/monitoring/metrics_collector_spec.rb +0 -499
- data/spec/monitoring/monitored_agent_spec.rb +0 -222
- data/spec/monitoring/prometheus_exporter_spec.rb +0 -242
- data/spec/monitoring/storage/activerecord_adapter_spec.rb +0 -498
- data/spec/monitoring/storage/base_adapter_spec.rb +0 -61
- data/spec/monitoring/storage/memory_adapter_spec.rb +0 -247
- data/spec/performance_optimizations_spec.rb +0 -486
- data/spec/replay_edge_cases_spec.rb +0 -699
- data/spec/replay_spec.rb +0 -210
- data/spec/rfc8785_canonicalization_spec.rb +0 -215
- data/spec/scoring_spec.rb +0 -225
- data/spec/spec_helper.rb +0 -60
- data/spec/testing/batch_test_importer_spec.rb +0 -693
- data/spec/testing/batch_test_runner_spec.rb +0 -307
- data/spec/testing/test_coverage_analyzer_spec.rb +0 -292
- data/spec/testing/test_result_comparator_spec.rb +0 -392
- data/spec/testing/test_scenario_spec.rb +0 -113
- data/spec/thread_safety_spec.rb +0 -482
- data/spec/thread_safety_spec.rb.broken +0 -878
- data/spec/versioning/adapter_spec.rb +0 -156
- data/spec/versioning_spec.rb +0 -1030
- data/spec/web/middleware/auth_middleware_spec.rb +0 -133
- data/spec/web/middleware/permission_middleware_spec.rb +0 -247
- data/spec/web_ui_rack_spec.rb +0 -1840
data/spec/replay_spec.rb
DELETED
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
require "spec_helper"
|
|
2
|
-
|
|
3
|
-
RSpec.describe DecisionAgent::Replay do
|
|
4
|
-
let(:evaluator) do
|
|
5
|
-
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
6
|
-
decision: "approve",
|
|
7
|
-
weight: 0.8,
|
|
8
|
-
reason: "Static approval"
|
|
9
|
-
)
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
let(:agent) do
|
|
13
|
-
DecisionAgent::Agent.new(evaluators: [evaluator])
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
describe ".run" do
|
|
17
|
-
it "replays decision from audit payload in strict mode" do
|
|
18
|
-
context = { user: "alice", action: "login" }
|
|
19
|
-
original_result = agent.decide(context: context)
|
|
20
|
-
|
|
21
|
-
replayed_result = DecisionAgent::Replay.run(
|
|
22
|
-
original_result.audit_payload,
|
|
23
|
-
strict: true
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
expect(replayed_result.decision).to eq(original_result.decision)
|
|
27
|
-
expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
it "raises ReplayMismatchError in strict mode when decision differs" do
|
|
31
|
-
context = { user: "alice" }
|
|
32
|
-
original_result = agent.decide(context: context)
|
|
33
|
-
|
|
34
|
-
modified_payload = original_result.audit_payload.dup
|
|
35
|
-
modified_payload[:decision] = "reject"
|
|
36
|
-
|
|
37
|
-
expect do
|
|
38
|
-
DecisionAgent::Replay.run(modified_payload, strict: true)
|
|
39
|
-
end.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
|
|
40
|
-
expect(error.differences).to include(/decision mismatch/)
|
|
41
|
-
expect(error.expected[:decision]).to eq("reject")
|
|
42
|
-
expect(error.actual[:decision]).to eq("approve")
|
|
43
|
-
end
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
it "raises ReplayMismatchError in strict mode when confidence differs" do
|
|
47
|
-
context = { user: "alice" }
|
|
48
|
-
original_result = agent.decide(context: context)
|
|
49
|
-
|
|
50
|
-
modified_payload = original_result.audit_payload.dup
|
|
51
|
-
modified_payload[:confidence] = 0.5
|
|
52
|
-
|
|
53
|
-
expect do
|
|
54
|
-
DecisionAgent::Replay.run(modified_payload, strict: true)
|
|
55
|
-
end.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
|
|
56
|
-
expect(error.differences).to include(/confidence mismatch/)
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
it "allows differences in non-strict mode" do
|
|
61
|
-
context = { user: "alice" }
|
|
62
|
-
original_result = agent.decide(context: context)
|
|
63
|
-
|
|
64
|
-
modified_payload = original_result.audit_payload.dup
|
|
65
|
-
modified_payload[:decision] = "reject"
|
|
66
|
-
|
|
67
|
-
expect do
|
|
68
|
-
DecisionAgent::Replay.run(modified_payload, strict: false)
|
|
69
|
-
end.not_to raise_error
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
it "logs differences in non-strict mode" do
|
|
73
|
-
context = { user: "alice" }
|
|
74
|
-
original_result = agent.decide(context: context)
|
|
75
|
-
|
|
76
|
-
modified_payload = original_result.audit_payload.dup
|
|
77
|
-
modified_payload[:decision] = "reject"
|
|
78
|
-
|
|
79
|
-
expect do
|
|
80
|
-
DecisionAgent::Replay.run(modified_payload, strict: false)
|
|
81
|
-
end.to output(/Decision changed/).to_stderr
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
it "validates required fields in audit payload" do
|
|
85
|
-
invalid_payload = { context: {} }
|
|
86
|
-
|
|
87
|
-
expect do
|
|
88
|
-
DecisionAgent::Replay.run(invalid_payload, strict: true)
|
|
89
|
-
end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key/)
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
it "reconstructs evaluations from audit payload" do
|
|
93
|
-
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
94
|
-
decision: "approve",
|
|
95
|
-
weight: 0.7,
|
|
96
|
-
reason: "Eval 1",
|
|
97
|
-
name: "Evaluator1"
|
|
98
|
-
)
|
|
99
|
-
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
100
|
-
decision: "approve",
|
|
101
|
-
weight: 0.9,
|
|
102
|
-
reason: "Eval 2",
|
|
103
|
-
name: "Evaluator2"
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
multi_agent = DecisionAgent::Agent.new(evaluators: [eval1, eval2])
|
|
107
|
-
original_result = multi_agent.decide(context: { user: "bob" })
|
|
108
|
-
|
|
109
|
-
replayed_result = DecisionAgent::Replay.run(
|
|
110
|
-
original_result.audit_payload,
|
|
111
|
-
strict: true
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
expect(replayed_result.evaluations.size).to eq(2)
|
|
115
|
-
expect(replayed_result.evaluations.map(&:evaluator_name)).to match_array(%w[Evaluator1 Evaluator2])
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
it "uses correct scoring strategy from audit payload" do
|
|
119
|
-
max_weight_agent = DecisionAgent::Agent.new(
|
|
120
|
-
evaluators: [evaluator],
|
|
121
|
-
scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
original_result = max_weight_agent.decide(context: { user: "charlie" })
|
|
125
|
-
|
|
126
|
-
expect(original_result.audit_payload[:scoring_strategy]).to include("MaxWeight")
|
|
127
|
-
|
|
128
|
-
replayed_result = DecisionAgent::Replay.run(
|
|
129
|
-
original_result.audit_payload,
|
|
130
|
-
strict: true
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
expect(replayed_result.decision).to eq(original_result.decision)
|
|
134
|
-
end
|
|
135
|
-
|
|
136
|
-
it "handles symbol and string keys in audit payload" do
|
|
137
|
-
context = { user: "alice" }
|
|
138
|
-
original_result = agent.decide(context: context)
|
|
139
|
-
|
|
140
|
-
string_key_payload = JSON.parse(JSON.generate(original_result.audit_payload))
|
|
141
|
-
|
|
142
|
-
replayed_result = DecisionAgent::Replay.run(
|
|
143
|
-
string_key_payload,
|
|
144
|
-
strict: true
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
expect(replayed_result.decision).to eq(original_result.decision)
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
it "preserves feedback in replay" do
|
|
151
|
-
context = { user: "alice" }
|
|
152
|
-
feedback = { source: "manual_override" }
|
|
153
|
-
|
|
154
|
-
original_result = agent.decide(context: context, feedback: feedback)
|
|
155
|
-
|
|
156
|
-
replayed_result = DecisionAgent::Replay.run(
|
|
157
|
-
original_result.audit_payload,
|
|
158
|
-
strict: true
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
expect(replayed_result.audit_payload[:feedback]).to eq(feedback)
|
|
162
|
-
end
|
|
163
|
-
end
|
|
164
|
-
|
|
165
|
-
describe "deterministic replay" do
|
|
166
|
-
it "produces identical results for identical inputs across multiple replays" do
|
|
167
|
-
context = { user: "alice", priority: "high" }
|
|
168
|
-
original_result = agent.decide(context: context)
|
|
169
|
-
|
|
170
|
-
results = 5.times.map do
|
|
171
|
-
DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
results.each do |result|
|
|
175
|
-
expect(result.decision).to eq(original_result.decision)
|
|
176
|
-
expect(result.confidence).to be_within(0.0001).of(original_result.confidence)
|
|
177
|
-
end
|
|
178
|
-
end
|
|
179
|
-
end
|
|
180
|
-
|
|
181
|
-
describe "complex scenario replay" do
|
|
182
|
-
it "replays decisions from JSON rule evaluators" do
|
|
183
|
-
rules = {
|
|
184
|
-
version: "1.0",
|
|
185
|
-
ruleset: "test",
|
|
186
|
-
rules: [
|
|
187
|
-
{
|
|
188
|
-
id: "high_priority",
|
|
189
|
-
if: { field: "priority", op: "eq", value: "high" },
|
|
190
|
-
then: { decision: "escalate", weight: 0.9, reason: "High priority issue" }
|
|
191
|
-
}
|
|
192
|
-
]
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
json_evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
196
|
-
json_agent = DecisionAgent::Agent.new(evaluators: [json_evaluator])
|
|
197
|
-
|
|
198
|
-
context = { priority: "high", user: "alice" }
|
|
199
|
-
original_result = json_agent.decide(context: context)
|
|
200
|
-
|
|
201
|
-
replayed_result = DecisionAgent::Replay.run(
|
|
202
|
-
original_result.audit_payload,
|
|
203
|
-
strict: true
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
expect(replayed_result.decision).to eq("escalate")
|
|
207
|
-
expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
|
|
208
|
-
end
|
|
209
|
-
end
|
|
210
|
-
end
|
|
@@ -1,215 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "spec_helper"
|
|
4
|
-
|
|
5
|
-
RSpec.describe "RFC 8785 JSON Canonicalization" do
|
|
6
|
-
let(:evaluator) do
|
|
7
|
-
DecisionAgent::Evaluators::JsonRuleEvaluator.new(
|
|
8
|
-
rules_json: {
|
|
9
|
-
version: "1.0",
|
|
10
|
-
ruleset: "test",
|
|
11
|
-
rules: [
|
|
12
|
-
{
|
|
13
|
-
id: "always_approve",
|
|
14
|
-
if: { field: "amount", op: "gte", value: 0 },
|
|
15
|
-
then: { decision: "approve", weight: 1.0, reason: "Test rule" }
|
|
16
|
-
}
|
|
17
|
-
]
|
|
18
|
-
}
|
|
19
|
-
)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
let(:agent) { DecisionAgent::Agent.new(evaluators: [evaluator]) }
|
|
23
|
-
|
|
24
|
-
describe "canonical JSON serialization" do
|
|
25
|
-
it "produces deterministic hashes using RFC 8785" do
|
|
26
|
-
# Same context should produce same hash every time
|
|
27
|
-
context = { amount: 100, user: { id: 123, name: "Alice" } }
|
|
28
|
-
|
|
29
|
-
decision1 = agent.decide(context: context)
|
|
30
|
-
decision2 = agent.decide(context: context)
|
|
31
|
-
|
|
32
|
-
hash1 = decision1.audit_payload[:deterministic_hash]
|
|
33
|
-
hash2 = decision2.audit_payload[:deterministic_hash]
|
|
34
|
-
|
|
35
|
-
expect(hash1).to eq(hash2)
|
|
36
|
-
expect(hash1).to be_a(String)
|
|
37
|
-
expect(hash1.length).to eq(64) # SHA256 produces 64 hex characters
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
it "produces different hashes for different contexts" do
|
|
41
|
-
context1 = { amount: 100, user: { id: 123 } }
|
|
42
|
-
context2 = { amount: 200, user: { id: 456 } }
|
|
43
|
-
|
|
44
|
-
decision1 = agent.decide(context: context1)
|
|
45
|
-
decision2 = agent.decide(context: context2)
|
|
46
|
-
|
|
47
|
-
hash1 = decision1.audit_payload[:deterministic_hash]
|
|
48
|
-
hash2 = decision2.audit_payload[:deterministic_hash]
|
|
49
|
-
|
|
50
|
-
expect(hash1).not_to eq(hash2)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
it "is insensitive to property order (canonicalization)" do
|
|
54
|
-
# Different property order should produce same hash
|
|
55
|
-
context1 = { amount: 100, user: { id: 123, name: "Alice" } }
|
|
56
|
-
context2 = { user: { name: "Alice", id: 123 }, amount: 100 }
|
|
57
|
-
|
|
58
|
-
decision1 = agent.decide(context: context1)
|
|
59
|
-
decision2 = agent.decide(context: context2)
|
|
60
|
-
|
|
61
|
-
hash1 = decision1.audit_payload[:deterministic_hash]
|
|
62
|
-
hash2 = decision2.audit_payload[:deterministic_hash]
|
|
63
|
-
|
|
64
|
-
expect(hash1).to eq(hash2), "RFC 8785 canonicalization should sort properties"
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
it "handles special characters correctly" do
|
|
68
|
-
# Test Unicode, quotes, and control characters
|
|
69
|
-
context = {
|
|
70
|
-
amount: 100,
|
|
71
|
-
note: "Test with \"quotes\", €uro, and \n newline"
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
decision = agent.decide(context: context)
|
|
75
|
-
hash = decision.audit_payload[:deterministic_hash]
|
|
76
|
-
|
|
77
|
-
expect(hash).to be_a(String)
|
|
78
|
-
expect(hash.length).to eq(64)
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
it "handles floating point numbers deterministically" do
|
|
82
|
-
# RFC 8785 specifies exact float serialization per IEEE 754
|
|
83
|
-
# Note: 99.99 cannot be exactly represented in binary floating point
|
|
84
|
-
context = { amount: 100, price: 99.99, tax: 0.075 }
|
|
85
|
-
|
|
86
|
-
decision1 = agent.decide(context: context)
|
|
87
|
-
decision2 = agent.decide(context: context)
|
|
88
|
-
|
|
89
|
-
hash1 = decision1.audit_payload[:deterministic_hash]
|
|
90
|
-
hash2 = decision2.audit_payload[:deterministic_hash]
|
|
91
|
-
|
|
92
|
-
# Same context should always produce same hash
|
|
93
|
-
expect(hash1).to eq(hash2), "RFC 8785 should produce consistent hashes for same values"
|
|
94
|
-
|
|
95
|
-
# Verify RFC 8785 uses ECMAScript number serialization
|
|
96
|
-
canonical = agent.send(:canonical_json, context)
|
|
97
|
-
# RFC 8785 may represent 99.99 as 99.98999999999999 due to IEEE 754
|
|
98
|
-
expect(canonical).to match(/99\.\d+/)
|
|
99
|
-
expect(canonical).to include("0.075")
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
it "handles nested structures correctly" do
|
|
103
|
-
context = {
|
|
104
|
-
amount: 100,
|
|
105
|
-
user: {
|
|
106
|
-
id: 123,
|
|
107
|
-
profile: {
|
|
108
|
-
name: "Alice",
|
|
109
|
-
tags: %w[premium verified]
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
decision = agent.decide(context: context)
|
|
115
|
-
hash = decision.audit_payload[:deterministic_hash]
|
|
116
|
-
|
|
117
|
-
expect(hash).to be_a(String)
|
|
118
|
-
expect(hash.length).to eq(64)
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
it "handles arrays consistently" do
|
|
122
|
-
# Array order should be preserved (not sorted)
|
|
123
|
-
context1 = { amount: 100, tags: %w[a b c] }
|
|
124
|
-
context2 = { amount: 100, tags: %w[c b a] }
|
|
125
|
-
|
|
126
|
-
decision1 = agent.decide(context: context1)
|
|
127
|
-
decision2 = agent.decide(context: context2)
|
|
128
|
-
|
|
129
|
-
hash1 = decision1.audit_payload[:deterministic_hash]
|
|
130
|
-
hash2 = decision2.audit_payload[:deterministic_hash]
|
|
131
|
-
|
|
132
|
-
expect(hash1).not_to eq(hash2), "RFC 8785 preserves array order"
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
it "handles nil values correctly" do
|
|
136
|
-
context = { amount: 100, optional_field: nil }
|
|
137
|
-
|
|
138
|
-
decision = agent.decide(context: context)
|
|
139
|
-
hash = decision.audit_payload[:deterministic_hash]
|
|
140
|
-
|
|
141
|
-
expect(hash).to be_a(String)
|
|
142
|
-
expect(hash.length).to eq(64)
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
it "handles boolean values correctly" do
|
|
146
|
-
context = { amount: 100, is_verified: true, is_blocked: false }
|
|
147
|
-
|
|
148
|
-
decision = agent.decide(context: context)
|
|
149
|
-
hash = decision.audit_payload[:deterministic_hash]
|
|
150
|
-
|
|
151
|
-
expect(hash).to be_a(String)
|
|
152
|
-
expect(hash.length).to eq(64)
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
it "is thread-safe with concurrent hash computations" do
|
|
156
|
-
contexts = 10.times.map { |i| { amount: i * 100, id: i } }
|
|
157
|
-
results = []
|
|
158
|
-
mutex = Mutex.new
|
|
159
|
-
|
|
160
|
-
threads = contexts.map do |ctx|
|
|
161
|
-
Thread.new do
|
|
162
|
-
decision = agent.decide(context: ctx)
|
|
163
|
-
hash = decision.audit_payload[:deterministic_hash]
|
|
164
|
-
mutex.synchronize { results << hash }
|
|
165
|
-
end
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
threads.each(&:join)
|
|
169
|
-
|
|
170
|
-
expect(results.size).to eq(10)
|
|
171
|
-
expect(results.uniq.size).to eq(10), "Each context should produce unique hash"
|
|
172
|
-
results.each do |hash|
|
|
173
|
-
expect(hash.length).to eq(64)
|
|
174
|
-
end
|
|
175
|
-
end
|
|
176
|
-
end
|
|
177
|
-
|
|
178
|
-
describe "RFC 8785 compliance" do
|
|
179
|
-
it "uses json-canonicalization gem for canonicalization" do
|
|
180
|
-
# Verify we're using the RFC 8785 implementation
|
|
181
|
-
test_data = { b: 2, a: 1 }
|
|
182
|
-
canonical = agent.send(:canonical_json, test_data)
|
|
183
|
-
|
|
184
|
-
# RFC 8785 should sort keys: {"a":1,"b":2}
|
|
185
|
-
expect(canonical).to include('"a":1')
|
|
186
|
-
expect(canonical).to include('"b":2')
|
|
187
|
-
expect(canonical.index('"a"')).to be < canonical.index('"b"')
|
|
188
|
-
end
|
|
189
|
-
|
|
190
|
-
it "produces compact JSON without whitespace" do
|
|
191
|
-
test_data = { amount: 100, user: { id: 123 } }
|
|
192
|
-
canonical = agent.send(:canonical_json, test_data)
|
|
193
|
-
|
|
194
|
-
# RFC 8785 produces compact JSON
|
|
195
|
-
expect(canonical).not_to include("\n")
|
|
196
|
-
expect(canonical).not_to include(" ")
|
|
197
|
-
end
|
|
198
|
-
end
|
|
199
|
-
|
|
200
|
-
describe "performance characteristics" do
|
|
201
|
-
it "computes hashes efficiently" do
|
|
202
|
-
context = {
|
|
203
|
-
amount: 100,
|
|
204
|
-
user: { id: 123, name: "Alice", tags: (1..100).to_a }
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
# Should complete quickly even with larger payloads
|
|
208
|
-
start_time = Time.now
|
|
209
|
-
100.times { agent.decide(context: context) }
|
|
210
|
-
elapsed = Time.now - start_time
|
|
211
|
-
|
|
212
|
-
expect(elapsed).to be < 1.0, "100 decisions should complete in under 1 second"
|
|
213
|
-
end
|
|
214
|
-
end
|
|
215
|
-
end
|
data/spec/scoring_spec.rb
DELETED
|
@@ -1,225 +0,0 @@
|
|
|
1
|
-
require "spec_helper"
|
|
2
|
-
|
|
3
|
-
RSpec.describe "Scoring Strategies" do
|
|
4
|
-
let(:eval1) do
|
|
5
|
-
DecisionAgent::Evaluation.new(
|
|
6
|
-
decision: "approve",
|
|
7
|
-
weight: 0.6,
|
|
8
|
-
reason: "Test 1",
|
|
9
|
-
evaluator_name: "Eval1"
|
|
10
|
-
)
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
let(:eval2) do
|
|
14
|
-
DecisionAgent::Evaluation.new(
|
|
15
|
-
decision: "approve",
|
|
16
|
-
weight: 0.8,
|
|
17
|
-
reason: "Test 2",
|
|
18
|
-
evaluator_name: "Eval2"
|
|
19
|
-
)
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
let(:eval3) do
|
|
23
|
-
DecisionAgent::Evaluation.new(
|
|
24
|
-
decision: "reject",
|
|
25
|
-
weight: 0.5,
|
|
26
|
-
reason: "Test 3",
|
|
27
|
-
evaluator_name: "Eval3"
|
|
28
|
-
)
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
describe DecisionAgent::Scoring::WeightedAverage do
|
|
32
|
-
it "calculates weighted average for single decision" do
|
|
33
|
-
strategy = DecisionAgent::Scoring::WeightedAverage.new
|
|
34
|
-
result = strategy.score([eval1, eval2])
|
|
35
|
-
|
|
36
|
-
expect(result[:decision]).to eq("approve")
|
|
37
|
-
expect(result[:confidence]).to eq(1.0)
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
it "calculates weighted average with conflicts" do
|
|
41
|
-
strategy = DecisionAgent::Scoring::WeightedAverage.new
|
|
42
|
-
result = strategy.score([eval1, eval2, eval3])
|
|
43
|
-
|
|
44
|
-
total_weight = 0.6 + 0.8 + 0.5
|
|
45
|
-
approve_weight = 0.6 + 0.8
|
|
46
|
-
expected_confidence = approve_weight / total_weight
|
|
47
|
-
|
|
48
|
-
expect(result[:decision]).to eq("approve")
|
|
49
|
-
expect(result[:confidence]).to be_within(0.0001).of(expected_confidence)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
it "returns 0 confidence for empty evaluations" do
|
|
53
|
-
strategy = DecisionAgent::Scoring::WeightedAverage.new
|
|
54
|
-
result = strategy.score([])
|
|
55
|
-
|
|
56
|
-
expect(result[:decision]).to be_nil
|
|
57
|
-
expect(result[:confidence]).to eq(0.0)
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
it "normalizes confidence to [0, 1]" do
|
|
61
|
-
strategy = DecisionAgent::Scoring::WeightedAverage.new
|
|
62
|
-
result = strategy.score([eval1])
|
|
63
|
-
|
|
64
|
-
expect(result[:confidence]).to be_between(0.0, 1.0)
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
describe DecisionAgent::Scoring::MaxWeight do
|
|
69
|
-
it "selects decision with maximum weight" do
|
|
70
|
-
strategy = DecisionAgent::Scoring::MaxWeight.new
|
|
71
|
-
result = strategy.score([eval1, eval2, eval3])
|
|
72
|
-
|
|
73
|
-
expect(result[:decision]).to eq("approve")
|
|
74
|
-
expect(result[:confidence]).to eq(0.8)
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
it "uses first evaluation when weights are equal" do
|
|
78
|
-
eval_a = DecisionAgent::Evaluation.new(
|
|
79
|
-
decision: "option_a",
|
|
80
|
-
weight: 0.7,
|
|
81
|
-
reason: "Test A",
|
|
82
|
-
evaluator_name: "EvalA"
|
|
83
|
-
)
|
|
84
|
-
eval_b = DecisionAgent::Evaluation.new(
|
|
85
|
-
decision: "option_b",
|
|
86
|
-
weight: 0.7,
|
|
87
|
-
reason: "Test B",
|
|
88
|
-
evaluator_name: "EvalB"
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
strategy = DecisionAgent::Scoring::MaxWeight.new
|
|
92
|
-
result = strategy.score([eval_a, eval_b])
|
|
93
|
-
|
|
94
|
-
expect(%w[option_a option_b]).to include(result[:decision])
|
|
95
|
-
expect(result[:confidence]).to eq(0.7)
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
it "returns 0 confidence for empty evaluations" do
|
|
99
|
-
strategy = DecisionAgent::Scoring::MaxWeight.new
|
|
100
|
-
result = strategy.score([])
|
|
101
|
-
|
|
102
|
-
expect(result[:decision]).to be_nil
|
|
103
|
-
expect(result[:confidence]).to eq(0.0)
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
describe DecisionAgent::Scoring::Consensus do
|
|
108
|
-
it "selects decision with highest agreement" do
|
|
109
|
-
eval4 = DecisionAgent::Evaluation.new(
|
|
110
|
-
decision: "approve",
|
|
111
|
-
weight: 0.7,
|
|
112
|
-
reason: "Test 4",
|
|
113
|
-
evaluator_name: "Eval4"
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
strategy = DecisionAgent::Scoring::Consensus.new
|
|
117
|
-
result = strategy.score([eval1, eval2, eval3, eval4])
|
|
118
|
-
|
|
119
|
-
expect(result[:decision]).to eq("approve")
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
it "considers both agreement and weight" do
|
|
123
|
-
low_weight_majority = [
|
|
124
|
-
DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "A", evaluator_name: "E1"),
|
|
125
|
-
DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "B", evaluator_name: "E2"),
|
|
126
|
-
DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "C", evaluator_name: "E3")
|
|
127
|
-
]
|
|
128
|
-
|
|
129
|
-
high_weight_minority = [
|
|
130
|
-
DecisionAgent::Evaluation.new(decision: "reject", weight: 0.9, reason: "D", evaluator_name: "E4")
|
|
131
|
-
]
|
|
132
|
-
|
|
133
|
-
strategy = DecisionAgent::Scoring::Consensus.new
|
|
134
|
-
result = strategy.score(low_weight_majority + high_weight_minority)
|
|
135
|
-
|
|
136
|
-
expect(result[:decision]).to eq("approve")
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
it "reduces confidence when minimum agreement not met" do
|
|
140
|
-
eval_spread = [
|
|
141
|
-
DecisionAgent::Evaluation.new(decision: "option_a", weight: 0.8, reason: "A", evaluator_name: "E1"),
|
|
142
|
-
DecisionAgent::Evaluation.new(decision: "option_b", weight: 0.7, reason: "B", evaluator_name: "E2"),
|
|
143
|
-
DecisionAgent::Evaluation.new(decision: "option_c", weight: 0.6, reason: "C", evaluator_name: "E3")
|
|
144
|
-
]
|
|
145
|
-
|
|
146
|
-
strategy = DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.5)
|
|
147
|
-
result = strategy.score(eval_spread)
|
|
148
|
-
|
|
149
|
-
expect(result[:confidence]).to be < 0.5
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
it "allows custom minimum agreement threshold" do
|
|
153
|
-
strategy = DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.7)
|
|
154
|
-
result = strategy.score([eval1, eval2, eval3])
|
|
155
|
-
|
|
156
|
-
expect(result[:decision]).to eq("approve")
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
it "returns 0 confidence for empty evaluations" do
|
|
160
|
-
strategy = DecisionAgent::Scoring::Consensus.new
|
|
161
|
-
result = strategy.score([])
|
|
162
|
-
|
|
163
|
-
expect(result[:decision]).to be_nil
|
|
164
|
-
expect(result[:confidence]).to eq(0.0)
|
|
165
|
-
end
|
|
166
|
-
end
|
|
167
|
-
|
|
168
|
-
describe DecisionAgent::Scoring::Threshold do
|
|
169
|
-
it "accepts decision when weight meets threshold" do
|
|
170
|
-
strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.7)
|
|
171
|
-
result = strategy.score([eval2])
|
|
172
|
-
|
|
173
|
-
expect(result[:decision]).to eq("approve")
|
|
174
|
-
expect(result[:confidence]).to eq(0.8)
|
|
175
|
-
end
|
|
176
|
-
|
|
177
|
-
it "returns fallback decision when weight below threshold" do
|
|
178
|
-
strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.9, fallback_decision: "manual_review")
|
|
179
|
-
result = strategy.score([eval2])
|
|
180
|
-
|
|
181
|
-
expect(result[:decision]).to eq("manual_review")
|
|
182
|
-
expect(result[:confidence]).to be < 0.9
|
|
183
|
-
end
|
|
184
|
-
|
|
185
|
-
it "uses average weight across evaluations with same decision" do
|
|
186
|
-
strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.7)
|
|
187
|
-
result = strategy.score([eval1, eval2])
|
|
188
|
-
|
|
189
|
-
avg_weight = (0.6 + 0.8) / 2
|
|
190
|
-
expect(result[:decision]).to eq("approve")
|
|
191
|
-
expect(result[:confidence]).to eq(avg_weight)
|
|
192
|
-
end
|
|
193
|
-
|
|
194
|
-
it "uses default fallback decision" do
|
|
195
|
-
strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.9)
|
|
196
|
-
result = strategy.score([eval1])
|
|
197
|
-
|
|
198
|
-
expect(result[:decision]).to eq("no_decision")
|
|
199
|
-
end
|
|
200
|
-
|
|
201
|
-
it "returns fallback for empty evaluations" do
|
|
202
|
-
strategy = DecisionAgent::Scoring::Threshold.new(fallback_decision: "default")
|
|
203
|
-
result = strategy.score([])
|
|
204
|
-
|
|
205
|
-
expect(result[:decision]).to eq("default")
|
|
206
|
-
expect(result[:confidence]).to eq(0.0)
|
|
207
|
-
end
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
describe "confidence bounds" do
|
|
211
|
-
it "ensures all strategies return confidence between 0 and 1" do
|
|
212
|
-
strategies = [
|
|
213
|
-
DecisionAgent::Scoring::WeightedAverage.new,
|
|
214
|
-
DecisionAgent::Scoring::MaxWeight.new,
|
|
215
|
-
DecisionAgent::Scoring::Consensus.new,
|
|
216
|
-
DecisionAgent::Scoring::Threshold.new
|
|
217
|
-
]
|
|
218
|
-
|
|
219
|
-
strategies.each do |strategy|
|
|
220
|
-
result = strategy.score([eval1, eval2, eval3])
|
|
221
|
-
expect(result[:confidence]).to be_between(0.0, 1.0)
|
|
222
|
-
end
|
|
223
|
-
end
|
|
224
|
-
end
|
|
225
|
-
end
|
data/spec/spec_helper.rb
DELETED
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
require "simplecov"
|
|
2
|
-
SimpleCov.start do
|
|
3
|
-
add_filter "/spec/"
|
|
4
|
-
add_filter "/examples/"
|
|
5
|
-
end
|
|
6
|
-
|
|
7
|
-
require "decision_agent"
|
|
8
|
-
|
|
9
|
-
# Load ActiveRecord for thread-safety and integration tests
|
|
10
|
-
begin
|
|
11
|
-
require "active_record"
|
|
12
|
-
require "sqlite3"
|
|
13
|
-
require "decision_agent/versioning/activerecord_adapter"
|
|
14
|
-
rescue LoadError
|
|
15
|
-
# ActiveRecord is optional - tests will be skipped if not available
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
# Store original value for cleanup
|
|
19
|
-
# rubocop:disable Style/GlobalVars
|
|
20
|
-
$original_disable_webui_permissions = nil
|
|
21
|
-
# rubocop:enable Style/GlobalVars
|
|
22
|
-
|
|
23
|
-
RSpec.configure do |config|
|
|
24
|
-
config.expect_with :rspec do |expectations|
|
|
25
|
-
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
config.mock_with :rspec do |mocks|
|
|
29
|
-
mocks.verify_partial_doubles = true
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
config.shared_context_metadata_behavior = :apply_to_host_groups
|
|
33
|
-
config.filter_run_when_matching :focus
|
|
34
|
-
config.example_status_persistence_file_path = "spec/examples.txt"
|
|
35
|
-
config.disable_monkey_patching!
|
|
36
|
-
config.warnings = true
|
|
37
|
-
|
|
38
|
-
config.default_formatter = "doc" if config.files_to_run.one?
|
|
39
|
-
|
|
40
|
-
config.order = :random
|
|
41
|
-
Kernel.srand config.seed
|
|
42
|
-
|
|
43
|
-
# Ensure permissions are enabled for tests
|
|
44
|
-
config.before(:suite) do
|
|
45
|
-
# rubocop:disable Style/GlobalVars
|
|
46
|
-
$original_disable_webui_permissions = ENV.fetch("DISABLE_WEBUI_PERMISSIONS", nil)
|
|
47
|
-
# rubocop:enable Style/GlobalVars
|
|
48
|
-
ENV["DISABLE_WEBUI_PERMISSIONS"] = "false"
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
config.after(:suite) do
|
|
52
|
-
# rubocop:disable Style/GlobalVars
|
|
53
|
-
if $original_disable_webui_permissions
|
|
54
|
-
ENV["DISABLE_WEBUI_PERMISSIONS"] = $original_disable_webui_permissions
|
|
55
|
-
else
|
|
56
|
-
ENV.delete("DISABLE_WEBUI_PERMISSIONS")
|
|
57
|
-
end
|
|
58
|
-
# rubocop:enable Style/GlobalVars
|
|
59
|
-
end
|
|
60
|
-
end
|