decision_agent 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +1060 -0
- data/bin/decision_agent +104 -0
- data/lib/decision_agent/agent.rb +147 -0
- data/lib/decision_agent/audit/adapter.rb +9 -0
- data/lib/decision_agent/audit/logger_adapter.rb +27 -0
- data/lib/decision_agent/audit/null_adapter.rb +8 -0
- data/lib/decision_agent/context.rb +42 -0
- data/lib/decision_agent/decision.rb +51 -0
- data/lib/decision_agent/dsl/condition_evaluator.rb +133 -0
- data/lib/decision_agent/dsl/rule_parser.rb +36 -0
- data/lib/decision_agent/dsl/schema_validator.rb +275 -0
- data/lib/decision_agent/errors.rb +62 -0
- data/lib/decision_agent/evaluation.rb +52 -0
- data/lib/decision_agent/evaluators/base.rb +15 -0
- data/lib/decision_agent/evaluators/json_rule_evaluator.rb +51 -0
- data/lib/decision_agent/evaluators/static_evaluator.rb +31 -0
- data/lib/decision_agent/replay/replay.rb +147 -0
- data/lib/decision_agent/scoring/base.rb +19 -0
- data/lib/decision_agent/scoring/consensus.rb +40 -0
- data/lib/decision_agent/scoring/max_weight.rb +16 -0
- data/lib/decision_agent/scoring/threshold.rb +40 -0
- data/lib/decision_agent/scoring/weighted_average.rb +26 -0
- data/lib/decision_agent/version.rb +3 -0
- data/lib/decision_agent/web/public/app.js +580 -0
- data/lib/decision_agent/web/public/index.html +190 -0
- data/lib/decision_agent/web/public/styles.css +558 -0
- data/lib/decision_agent/web/server.rb +255 -0
- data/lib/decision_agent.rb +29 -0
- data/spec/agent_spec.rb +249 -0
- data/spec/api_contract_spec.rb +430 -0
- data/spec/audit_adapters_spec.rb +74 -0
- data/spec/comprehensive_edge_cases_spec.rb +1777 -0
- data/spec/context_spec.rb +84 -0
- data/spec/dsl_validation_spec.rb +648 -0
- data/spec/edge_cases_spec.rb +353 -0
- data/spec/examples/feedback_aware_evaluator_spec.rb +460 -0
- data/spec/json_rule_evaluator_spec.rb +587 -0
- data/spec/replay_edge_cases_spec.rb +699 -0
- data/spec/replay_spec.rb +210 -0
- data/spec/scoring_spec.rb +225 -0
- data/spec/spec_helper.rb +28 -0
- metadata +133 -0
data/spec/replay_spec.rb
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
require "spec_helper"
|
|
2
|
+
|
|
3
|
+
RSpec.describe DecisionAgent::Replay do
|
|
4
|
+
let(:evaluator) do
|
|
5
|
+
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
6
|
+
decision: "approve",
|
|
7
|
+
weight: 0.8,
|
|
8
|
+
reason: "Static approval"
|
|
9
|
+
)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
let(:agent) do
|
|
13
|
+
DecisionAgent::Agent.new(evaluators: [evaluator])
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
describe ".run" do
|
|
17
|
+
it "replays decision from audit payload in strict mode" do
|
|
18
|
+
context = { user: "alice", action: "login" }
|
|
19
|
+
original_result = agent.decide(context: context)
|
|
20
|
+
|
|
21
|
+
replayed_result = DecisionAgent::Replay.run(
|
|
22
|
+
original_result.audit_payload,
|
|
23
|
+
strict: true
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
expect(replayed_result.decision).to eq(original_result.decision)
|
|
27
|
+
expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "raises ReplayMismatchError in strict mode when decision differs" do
|
|
31
|
+
context = { user: "alice" }
|
|
32
|
+
original_result = agent.decide(context: context)
|
|
33
|
+
|
|
34
|
+
modified_payload = original_result.audit_payload.dup
|
|
35
|
+
modified_payload[:decision] = "reject"
|
|
36
|
+
|
|
37
|
+
expect {
|
|
38
|
+
DecisionAgent::Replay.run(modified_payload, strict: true)
|
|
39
|
+
}.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
|
|
40
|
+
expect(error.differences).to include(/decision mismatch/)
|
|
41
|
+
expect(error.expected[:decision]).to eq("reject")
|
|
42
|
+
expect(error.actual[:decision]).to eq("approve")
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "raises ReplayMismatchError in strict mode when confidence differs" do
|
|
47
|
+
context = { user: "alice" }
|
|
48
|
+
original_result = agent.decide(context: context)
|
|
49
|
+
|
|
50
|
+
modified_payload = original_result.audit_payload.dup
|
|
51
|
+
modified_payload[:confidence] = 0.5
|
|
52
|
+
|
|
53
|
+
expect {
|
|
54
|
+
DecisionAgent::Replay.run(modified_payload, strict: true)
|
|
55
|
+
}.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
|
|
56
|
+
expect(error.differences).to include(/confidence mismatch/)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it "allows differences in non-strict mode" do
|
|
61
|
+
context = { user: "alice" }
|
|
62
|
+
original_result = agent.decide(context: context)
|
|
63
|
+
|
|
64
|
+
modified_payload = original_result.audit_payload.dup
|
|
65
|
+
modified_payload[:decision] = "reject"
|
|
66
|
+
|
|
67
|
+
expect {
|
|
68
|
+
DecisionAgent::Replay.run(modified_payload, strict: false)
|
|
69
|
+
}.not_to raise_error
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it "logs differences in non-strict mode" do
|
|
73
|
+
context = { user: "alice" }
|
|
74
|
+
original_result = agent.decide(context: context)
|
|
75
|
+
|
|
76
|
+
modified_payload = original_result.audit_payload.dup
|
|
77
|
+
modified_payload[:decision] = "reject"
|
|
78
|
+
|
|
79
|
+
expect {
|
|
80
|
+
DecisionAgent::Replay.run(modified_payload, strict: false)
|
|
81
|
+
}.to output(/Decision changed/).to_stderr
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
it "validates required fields in audit payload" do
|
|
85
|
+
invalid_payload = { context: {} }
|
|
86
|
+
|
|
87
|
+
expect {
|
|
88
|
+
DecisionAgent::Replay.run(invalid_payload, strict: true)
|
|
89
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key/)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it "reconstructs evaluations from audit payload" do
|
|
93
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
94
|
+
decision: "approve",
|
|
95
|
+
weight: 0.7,
|
|
96
|
+
reason: "Eval 1",
|
|
97
|
+
name: "Evaluator1"
|
|
98
|
+
)
|
|
99
|
+
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
100
|
+
decision: "approve",
|
|
101
|
+
weight: 0.9,
|
|
102
|
+
reason: "Eval 2",
|
|
103
|
+
name: "Evaluator2"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
multi_agent = DecisionAgent::Agent.new(evaluators: [eval1, eval2])
|
|
107
|
+
original_result = multi_agent.decide(context: { user: "bob" })
|
|
108
|
+
|
|
109
|
+
replayed_result = DecisionAgent::Replay.run(
|
|
110
|
+
original_result.audit_payload,
|
|
111
|
+
strict: true
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
expect(replayed_result.evaluations.size).to eq(2)
|
|
115
|
+
expect(replayed_result.evaluations.map(&:evaluator_name)).to match_array(["Evaluator1", "Evaluator2"])
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
it "uses correct scoring strategy from audit payload" do
|
|
119
|
+
max_weight_agent = DecisionAgent::Agent.new(
|
|
120
|
+
evaluators: [evaluator],
|
|
121
|
+
scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
original_result = max_weight_agent.decide(context: { user: "charlie" })
|
|
125
|
+
|
|
126
|
+
expect(original_result.audit_payload[:scoring_strategy]).to include("MaxWeight")
|
|
127
|
+
|
|
128
|
+
replayed_result = DecisionAgent::Replay.run(
|
|
129
|
+
original_result.audit_payload,
|
|
130
|
+
strict: true
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
expect(replayed_result.decision).to eq(original_result.decision)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
it "handles symbol and string keys in audit payload" do
|
|
137
|
+
context = { user: "alice" }
|
|
138
|
+
original_result = agent.decide(context: context)
|
|
139
|
+
|
|
140
|
+
string_key_payload = JSON.parse(JSON.generate(original_result.audit_payload))
|
|
141
|
+
|
|
142
|
+
replayed_result = DecisionAgent::Replay.run(
|
|
143
|
+
string_key_payload,
|
|
144
|
+
strict: true
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
expect(replayed_result.decision).to eq(original_result.decision)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
it "preserves feedback in replay" do
|
|
151
|
+
context = { user: "alice" }
|
|
152
|
+
feedback = { source: "manual_override" }
|
|
153
|
+
|
|
154
|
+
original_result = agent.decide(context: context, feedback: feedback)
|
|
155
|
+
|
|
156
|
+
replayed_result = DecisionAgent::Replay.run(
|
|
157
|
+
original_result.audit_payload,
|
|
158
|
+
strict: true
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
expect(replayed_result.audit_payload[:feedback]).to eq(feedback)
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
describe "deterministic replay" do
|
|
166
|
+
it "produces identical results for identical inputs across multiple replays" do
|
|
167
|
+
context = { user: "alice", priority: "high" }
|
|
168
|
+
original_result = agent.decide(context: context)
|
|
169
|
+
|
|
170
|
+
results = 5.times.map do
|
|
171
|
+
DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
results.each do |result|
|
|
175
|
+
expect(result.decision).to eq(original_result.decision)
|
|
176
|
+
expect(result.confidence).to be_within(0.0001).of(original_result.confidence)
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
describe "complex scenario replay" do
|
|
182
|
+
it "replays decisions from JSON rule evaluators" do
|
|
183
|
+
rules = {
|
|
184
|
+
version: "1.0",
|
|
185
|
+
ruleset: "test",
|
|
186
|
+
rules: [
|
|
187
|
+
{
|
|
188
|
+
id: "high_priority",
|
|
189
|
+
if: { field: "priority", op: "eq", value: "high" },
|
|
190
|
+
then: { decision: "escalate", weight: 0.9, reason: "High priority issue" }
|
|
191
|
+
}
|
|
192
|
+
]
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
json_evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
196
|
+
json_agent = DecisionAgent::Agent.new(evaluators: [json_evaluator])
|
|
197
|
+
|
|
198
|
+
context = { priority: "high", user: "alice" }
|
|
199
|
+
original_result = json_agent.decide(context: context)
|
|
200
|
+
|
|
201
|
+
replayed_result = DecisionAgent::Replay.run(
|
|
202
|
+
original_result.audit_payload,
|
|
203
|
+
strict: true
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
expect(replayed_result.decision).to eq("escalate")
|
|
207
|
+
expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
require "spec_helper"
|
|
2
|
+
|
|
3
|
+
RSpec.describe "Scoring Strategies" do
|
|
4
|
+
let(:eval1) do
|
|
5
|
+
DecisionAgent::Evaluation.new(
|
|
6
|
+
decision: "approve",
|
|
7
|
+
weight: 0.6,
|
|
8
|
+
reason: "Test 1",
|
|
9
|
+
evaluator_name: "Eval1"
|
|
10
|
+
)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
let(:eval2) do
|
|
14
|
+
DecisionAgent::Evaluation.new(
|
|
15
|
+
decision: "approve",
|
|
16
|
+
weight: 0.8,
|
|
17
|
+
reason: "Test 2",
|
|
18
|
+
evaluator_name: "Eval2"
|
|
19
|
+
)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
let(:eval3) do
|
|
23
|
+
DecisionAgent::Evaluation.new(
|
|
24
|
+
decision: "reject",
|
|
25
|
+
weight: 0.5,
|
|
26
|
+
reason: "Test 3",
|
|
27
|
+
evaluator_name: "Eval3"
|
|
28
|
+
)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
describe DecisionAgent::Scoring::WeightedAverage do
|
|
32
|
+
it "calculates weighted average for single decision" do
|
|
33
|
+
strategy = DecisionAgent::Scoring::WeightedAverage.new
|
|
34
|
+
result = strategy.score([eval1, eval2])
|
|
35
|
+
|
|
36
|
+
expect(result[:decision]).to eq("approve")
|
|
37
|
+
expect(result[:confidence]).to eq(1.0)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it "calculates weighted average with conflicts" do
|
|
41
|
+
strategy = DecisionAgent::Scoring::WeightedAverage.new
|
|
42
|
+
result = strategy.score([eval1, eval2, eval3])
|
|
43
|
+
|
|
44
|
+
total_weight = 0.6 + 0.8 + 0.5
|
|
45
|
+
approve_weight = 0.6 + 0.8
|
|
46
|
+
expected_confidence = approve_weight / total_weight
|
|
47
|
+
|
|
48
|
+
expect(result[:decision]).to eq("approve")
|
|
49
|
+
expect(result[:confidence]).to be_within(0.0001).of(expected_confidence)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it "returns 0 confidence for empty evaluations" do
|
|
53
|
+
strategy = DecisionAgent::Scoring::WeightedAverage.new
|
|
54
|
+
result = strategy.score([])
|
|
55
|
+
|
|
56
|
+
expect(result[:decision]).to be_nil
|
|
57
|
+
expect(result[:confidence]).to eq(0.0)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it "normalizes confidence to [0, 1]" do
|
|
61
|
+
strategy = DecisionAgent::Scoring::WeightedAverage.new
|
|
62
|
+
result = strategy.score([eval1])
|
|
63
|
+
|
|
64
|
+
expect(result[:confidence]).to be_between(0.0, 1.0)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
describe DecisionAgent::Scoring::MaxWeight do
|
|
69
|
+
it "selects decision with maximum weight" do
|
|
70
|
+
strategy = DecisionAgent::Scoring::MaxWeight.new
|
|
71
|
+
result = strategy.score([eval1, eval2, eval3])
|
|
72
|
+
|
|
73
|
+
expect(result[:decision]).to eq("approve")
|
|
74
|
+
expect(result[:confidence]).to eq(0.8)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it "uses first evaluation when weights are equal" do
|
|
78
|
+
eval_a = DecisionAgent::Evaluation.new(
|
|
79
|
+
decision: "option_a",
|
|
80
|
+
weight: 0.7,
|
|
81
|
+
reason: "Test A",
|
|
82
|
+
evaluator_name: "EvalA"
|
|
83
|
+
)
|
|
84
|
+
eval_b = DecisionAgent::Evaluation.new(
|
|
85
|
+
decision: "option_b",
|
|
86
|
+
weight: 0.7,
|
|
87
|
+
reason: "Test B",
|
|
88
|
+
evaluator_name: "EvalB"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
strategy = DecisionAgent::Scoring::MaxWeight.new
|
|
92
|
+
result = strategy.score([eval_a, eval_b])
|
|
93
|
+
|
|
94
|
+
expect(["option_a", "option_b"]).to include(result[:decision])
|
|
95
|
+
expect(result[:confidence]).to eq(0.7)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
it "returns 0 confidence for empty evaluations" do
|
|
99
|
+
strategy = DecisionAgent::Scoring::MaxWeight.new
|
|
100
|
+
result = strategy.score([])
|
|
101
|
+
|
|
102
|
+
expect(result[:decision]).to be_nil
|
|
103
|
+
expect(result[:confidence]).to eq(0.0)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
describe DecisionAgent::Scoring::Consensus do
|
|
108
|
+
it "selects decision with highest agreement" do
|
|
109
|
+
eval4 = DecisionAgent::Evaluation.new(
|
|
110
|
+
decision: "approve",
|
|
111
|
+
weight: 0.7,
|
|
112
|
+
reason: "Test 4",
|
|
113
|
+
evaluator_name: "Eval4"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
strategy = DecisionAgent::Scoring::Consensus.new
|
|
117
|
+
result = strategy.score([eval1, eval2, eval3, eval4])
|
|
118
|
+
|
|
119
|
+
expect(result[:decision]).to eq("approve")
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
it "considers both agreement and weight" do
|
|
123
|
+
low_weight_majority = [
|
|
124
|
+
DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "A", evaluator_name: "E1"),
|
|
125
|
+
DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "B", evaluator_name: "E2"),
|
|
126
|
+
DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "C", evaluator_name: "E3")
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
high_weight_minority = [
|
|
130
|
+
DecisionAgent::Evaluation.new(decision: "reject", weight: 0.9, reason: "D", evaluator_name: "E4")
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
strategy = DecisionAgent::Scoring::Consensus.new
|
|
134
|
+
result = strategy.score(low_weight_majority + high_weight_minority)
|
|
135
|
+
|
|
136
|
+
expect(result[:decision]).to eq("approve")
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
it "reduces confidence when minimum agreement not met" do
|
|
140
|
+
eval_spread = [
|
|
141
|
+
DecisionAgent::Evaluation.new(decision: "option_a", weight: 0.8, reason: "A", evaluator_name: "E1"),
|
|
142
|
+
DecisionAgent::Evaluation.new(decision: "option_b", weight: 0.7, reason: "B", evaluator_name: "E2"),
|
|
143
|
+
DecisionAgent::Evaluation.new(decision: "option_c", weight: 0.6, reason: "C", evaluator_name: "E3")
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
strategy = DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.5)
|
|
147
|
+
result = strategy.score(eval_spread)
|
|
148
|
+
|
|
149
|
+
expect(result[:confidence]).to be < 0.5
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
it "allows custom minimum agreement threshold" do
|
|
153
|
+
strategy = DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.7)
|
|
154
|
+
result = strategy.score([eval1, eval2, eval3])
|
|
155
|
+
|
|
156
|
+
expect(result[:decision]).to eq("approve")
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
it "returns 0 confidence for empty evaluations" do
|
|
160
|
+
strategy = DecisionAgent::Scoring::Consensus.new
|
|
161
|
+
result = strategy.score([])
|
|
162
|
+
|
|
163
|
+
expect(result[:decision]).to be_nil
|
|
164
|
+
expect(result[:confidence]).to eq(0.0)
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
describe DecisionAgent::Scoring::Threshold do
|
|
169
|
+
it "accepts decision when weight meets threshold" do
|
|
170
|
+
strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.7)
|
|
171
|
+
result = strategy.score([eval2])
|
|
172
|
+
|
|
173
|
+
expect(result[:decision]).to eq("approve")
|
|
174
|
+
expect(result[:confidence]).to eq(0.8)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
it "returns fallback decision when weight below threshold" do
|
|
178
|
+
strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.9, fallback_decision: "manual_review")
|
|
179
|
+
result = strategy.score([eval2])
|
|
180
|
+
|
|
181
|
+
expect(result[:decision]).to eq("manual_review")
|
|
182
|
+
expect(result[:confidence]).to be < 0.9
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
it "uses average weight across evaluations with same decision" do
|
|
186
|
+
strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.7)
|
|
187
|
+
result = strategy.score([eval1, eval2])
|
|
188
|
+
|
|
189
|
+
avg_weight = (0.6 + 0.8) / 2
|
|
190
|
+
expect(result[:decision]).to eq("approve")
|
|
191
|
+
expect(result[:confidence]).to eq(avg_weight)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
it "uses default fallback decision" do
|
|
195
|
+
strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.9)
|
|
196
|
+
result = strategy.score([eval1])
|
|
197
|
+
|
|
198
|
+
expect(result[:decision]).to eq("no_decision")
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
it "returns fallback for empty evaluations" do
|
|
202
|
+
strategy = DecisionAgent::Scoring::Threshold.new(fallback_decision: "default")
|
|
203
|
+
result = strategy.score([])
|
|
204
|
+
|
|
205
|
+
expect(result[:decision]).to eq("default")
|
|
206
|
+
expect(result[:confidence]).to eq(0.0)
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
describe "confidence bounds" do
|
|
211
|
+
it "ensures all strategies return confidence between 0 and 1" do
|
|
212
|
+
strategies = [
|
|
213
|
+
DecisionAgent::Scoring::WeightedAverage.new,
|
|
214
|
+
DecisionAgent::Scoring::MaxWeight.new,
|
|
215
|
+
DecisionAgent::Scoring::Consensus.new,
|
|
216
|
+
DecisionAgent::Scoring::Threshold.new
|
|
217
|
+
]
|
|
218
|
+
|
|
219
|
+
strategies.each do |strategy|
|
|
220
|
+
result = strategy.score([eval1, eval2, eval3])
|
|
221
|
+
expect(result[:confidence]).to be_between(0.0, 1.0)
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
require "simplecov"
|
|
2
|
+
SimpleCov.start do
|
|
3
|
+
add_filter "/spec/"
|
|
4
|
+
add_filter "/examples/"
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
require "decision_agent"
|
|
8
|
+
|
|
9
|
+
RSpec.configure do |config|
|
|
10
|
+
config.expect_with :rspec do |expectations|
|
|
11
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
config.mock_with :rspec do |mocks|
|
|
15
|
+
mocks.verify_partial_doubles = true
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
config.shared_context_metadata_behavior = :apply_to_host_groups
|
|
19
|
+
config.filter_run_when_matching :focus
|
|
20
|
+
config.example_status_persistence_file_path = "spec/examples.txt"
|
|
21
|
+
config.disable_monkey_patching!
|
|
22
|
+
config.warnings = true
|
|
23
|
+
|
|
24
|
+
config.default_formatter = "doc" if config.files_to_run.one?
|
|
25
|
+
|
|
26
|
+
config.order = :random
|
|
27
|
+
Kernel.srand config.seed
|
|
28
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: decision_agent
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Sam Aswin
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2025-12-19 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: sinatra
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '3.0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '3.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rspec
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '3.12'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '3.12'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rake
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '13.0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '13.0'
|
|
55
|
+
description: A production-grade decision agent that provides deterministic rule evaluation,
|
|
56
|
+
conflict resolution, and full audit replay capabilities. Framework-agnostic and
|
|
57
|
+
AI-optional.
|
|
58
|
+
email:
|
|
59
|
+
- samaswin87@gmail.com
|
|
60
|
+
executables:
|
|
61
|
+
- decision_agent
|
|
62
|
+
extensions: []
|
|
63
|
+
extra_rdoc_files: []
|
|
64
|
+
files:
|
|
65
|
+
- LICENSE.txt
|
|
66
|
+
- README.md
|
|
67
|
+
- bin/decision_agent
|
|
68
|
+
- lib/decision_agent.rb
|
|
69
|
+
- lib/decision_agent/agent.rb
|
|
70
|
+
- lib/decision_agent/audit/adapter.rb
|
|
71
|
+
- lib/decision_agent/audit/logger_adapter.rb
|
|
72
|
+
- lib/decision_agent/audit/null_adapter.rb
|
|
73
|
+
- lib/decision_agent/context.rb
|
|
74
|
+
- lib/decision_agent/decision.rb
|
|
75
|
+
- lib/decision_agent/dsl/condition_evaluator.rb
|
|
76
|
+
- lib/decision_agent/dsl/rule_parser.rb
|
|
77
|
+
- lib/decision_agent/dsl/schema_validator.rb
|
|
78
|
+
- lib/decision_agent/errors.rb
|
|
79
|
+
- lib/decision_agent/evaluation.rb
|
|
80
|
+
- lib/decision_agent/evaluators/base.rb
|
|
81
|
+
- lib/decision_agent/evaluators/json_rule_evaluator.rb
|
|
82
|
+
- lib/decision_agent/evaluators/static_evaluator.rb
|
|
83
|
+
- lib/decision_agent/replay/replay.rb
|
|
84
|
+
- lib/decision_agent/scoring/base.rb
|
|
85
|
+
- lib/decision_agent/scoring/consensus.rb
|
|
86
|
+
- lib/decision_agent/scoring/max_weight.rb
|
|
87
|
+
- lib/decision_agent/scoring/threshold.rb
|
|
88
|
+
- lib/decision_agent/scoring/weighted_average.rb
|
|
89
|
+
- lib/decision_agent/version.rb
|
|
90
|
+
- lib/decision_agent/web/public/app.js
|
|
91
|
+
- lib/decision_agent/web/public/index.html
|
|
92
|
+
- lib/decision_agent/web/public/styles.css
|
|
93
|
+
- lib/decision_agent/web/server.rb
|
|
94
|
+
- spec/agent_spec.rb
|
|
95
|
+
- spec/api_contract_spec.rb
|
|
96
|
+
- spec/audit_adapters_spec.rb
|
|
97
|
+
- spec/comprehensive_edge_cases_spec.rb
|
|
98
|
+
- spec/context_spec.rb
|
|
99
|
+
- spec/dsl_validation_spec.rb
|
|
100
|
+
- spec/edge_cases_spec.rb
|
|
101
|
+
- spec/examples/feedback_aware_evaluator_spec.rb
|
|
102
|
+
- spec/json_rule_evaluator_spec.rb
|
|
103
|
+
- spec/replay_edge_cases_spec.rb
|
|
104
|
+
- spec/replay_spec.rb
|
|
105
|
+
- spec/scoring_spec.rb
|
|
106
|
+
- spec/spec_helper.rb
|
|
107
|
+
homepage: https://github.com/samaswin87/decision_agent
|
|
108
|
+
licenses:
|
|
109
|
+
- MIT
|
|
110
|
+
metadata:
|
|
111
|
+
homepage_uri: https://github.com/samaswin87/decision_agent
|
|
112
|
+
source_code_uri: https://github.com/samaswin87/decision_agent
|
|
113
|
+
changelog_uri: https://github.com/samaswin87/decision_agent/blob/main/CHANGELOG.md
|
|
114
|
+
post_install_message:
|
|
115
|
+
rdoc_options: []
|
|
116
|
+
require_paths:
|
|
117
|
+
- lib
|
|
118
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
119
|
+
requirements:
|
|
120
|
+
- - ">="
|
|
121
|
+
- !ruby/object:Gem::Version
|
|
122
|
+
version: 2.7.0
|
|
123
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
124
|
+
requirements:
|
|
125
|
+
- - ">="
|
|
126
|
+
- !ruby/object:Gem::Version
|
|
127
|
+
version: '0'
|
|
128
|
+
requirements: []
|
|
129
|
+
rubygems_version: 3.0.3.1
|
|
130
|
+
signing_key:
|
|
131
|
+
specification_version: 4
|
|
132
|
+
summary: Deterministic, explainable, auditable decision engine for Ruby
|
|
133
|
+
test_files: []
|