decision_agent 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +1060 -0
- data/bin/decision_agent +104 -0
- data/lib/decision_agent/agent.rb +147 -0
- data/lib/decision_agent/audit/adapter.rb +9 -0
- data/lib/decision_agent/audit/logger_adapter.rb +27 -0
- data/lib/decision_agent/audit/null_adapter.rb +8 -0
- data/lib/decision_agent/context.rb +42 -0
- data/lib/decision_agent/decision.rb +51 -0
- data/lib/decision_agent/dsl/condition_evaluator.rb +133 -0
- data/lib/decision_agent/dsl/rule_parser.rb +36 -0
- data/lib/decision_agent/dsl/schema_validator.rb +275 -0
- data/lib/decision_agent/errors.rb +62 -0
- data/lib/decision_agent/evaluation.rb +52 -0
- data/lib/decision_agent/evaluators/base.rb +15 -0
- data/lib/decision_agent/evaluators/json_rule_evaluator.rb +51 -0
- data/lib/decision_agent/evaluators/static_evaluator.rb +31 -0
- data/lib/decision_agent/replay/replay.rb +147 -0
- data/lib/decision_agent/scoring/base.rb +19 -0
- data/lib/decision_agent/scoring/consensus.rb +40 -0
- data/lib/decision_agent/scoring/max_weight.rb +16 -0
- data/lib/decision_agent/scoring/threshold.rb +40 -0
- data/lib/decision_agent/scoring/weighted_average.rb +26 -0
- data/lib/decision_agent/version.rb +3 -0
- data/lib/decision_agent/web/public/app.js +580 -0
- data/lib/decision_agent/web/public/index.html +190 -0
- data/lib/decision_agent/web/public/styles.css +558 -0
- data/lib/decision_agent/web/server.rb +255 -0
- data/lib/decision_agent.rb +29 -0
- data/spec/agent_spec.rb +249 -0
- data/spec/api_contract_spec.rb +430 -0
- data/spec/audit_adapters_spec.rb +74 -0
- data/spec/comprehensive_edge_cases_spec.rb +1777 -0
- data/spec/context_spec.rb +84 -0
- data/spec/dsl_validation_spec.rb +648 -0
- data/spec/edge_cases_spec.rb +353 -0
- data/spec/examples/feedback_aware_evaluator_spec.rb +460 -0
- data/spec/json_rule_evaluator_spec.rb +587 -0
- data/spec/replay_edge_cases_spec.rb +699 -0
- data/spec/replay_spec.rb +210 -0
- data/spec/scoring_spec.rb +225 -0
- data/spec/spec_helper.rb +28 -0
- metadata +133 -0
|
@@ -0,0 +1,1777 @@
|
|
|
1
|
+
require "spec_helper"
|
|
2
|
+
|
|
3
|
+
RSpec.describe "Comprehensive Edge Cases" do
|
|
4
|
+
# ============================================================
|
|
5
|
+
# JSON Rule DSL Edge Cases
|
|
6
|
+
# ============================================================
|
|
7
|
+
|
|
8
|
+
describe "JSON Rule DSL edge cases" do
|
|
9
|
+
describe "invalid operators" do
|
|
10
|
+
it "raises error when operator is unknown" do
|
|
11
|
+
rules = {
|
|
12
|
+
version: "1.0",
|
|
13
|
+
ruleset: "test",
|
|
14
|
+
rules: [
|
|
15
|
+
{
|
|
16
|
+
id: "rule_1",
|
|
17
|
+
if: { field: "status", op: "unknown_op", value: "active" },
|
|
18
|
+
then: { decision: "approve" }
|
|
19
|
+
}
|
|
20
|
+
]
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
expect {
|
|
24
|
+
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
25
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /Unsupported operator 'unknown_op'/)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it "raises error when operator is misspelled" do
|
|
29
|
+
rules = {
|
|
30
|
+
version: "1.0",
|
|
31
|
+
ruleset: "test",
|
|
32
|
+
rules: [
|
|
33
|
+
{
|
|
34
|
+
id: "rule_1",
|
|
35
|
+
if: { field: "score", op: "greather_than", value: 10 },
|
|
36
|
+
then: { decision: "pass" }
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
expect {
|
|
42
|
+
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
43
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /Unsupported operator 'greather_than'/)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "raises error when operator is nil" do
|
|
47
|
+
rules = {
|
|
48
|
+
version: "1.0",
|
|
49
|
+
ruleset: "test",
|
|
50
|
+
rules: [
|
|
51
|
+
{
|
|
52
|
+
id: "rule_1",
|
|
53
|
+
if: { field: "status", op: nil, value: "active" },
|
|
54
|
+
then: { decision: "approve" }
|
|
55
|
+
}
|
|
56
|
+
]
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
expect {
|
|
60
|
+
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
61
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /missing 'op'/)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
describe "deeply nested conditions" do
|
|
66
|
+
it "handles deeply nested all/any combinations" do
|
|
67
|
+
rules = {
|
|
68
|
+
version: "1.0",
|
|
69
|
+
ruleset: "test",
|
|
70
|
+
rules: [
|
|
71
|
+
{
|
|
72
|
+
id: "rule_1",
|
|
73
|
+
if: {
|
|
74
|
+
all: [
|
|
75
|
+
{
|
|
76
|
+
any: [
|
|
77
|
+
{ field: "a", op: "eq", value: 1 },
|
|
78
|
+
{
|
|
79
|
+
all: [
|
|
80
|
+
{ field: "b", op: "eq", value: 2 },
|
|
81
|
+
{ field: "c", op: "eq", value: 3 }
|
|
82
|
+
]
|
|
83
|
+
}
|
|
84
|
+
]
|
|
85
|
+
},
|
|
86
|
+
{ field: "d", op: "eq", value: 4 }
|
|
87
|
+
]
|
|
88
|
+
},
|
|
89
|
+
then: { decision: "complex_match" }
|
|
90
|
+
}
|
|
91
|
+
]
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
95
|
+
|
|
96
|
+
# Should match via first branch of 'any'
|
|
97
|
+
context1 = DecisionAgent::Context.new({ a: 1, d: 4 })
|
|
98
|
+
expect(evaluator.evaluate(context1)).not_to be_nil
|
|
99
|
+
|
|
100
|
+
# Should match via second branch of 'any'
|
|
101
|
+
context2 = DecisionAgent::Context.new({ b: 2, c: 3, d: 4 })
|
|
102
|
+
expect(evaluator.evaluate(context2)).not_to be_nil
|
|
103
|
+
|
|
104
|
+
# Should not match (missing 'd')
|
|
105
|
+
context3 = DecisionAgent::Context.new({ a: 1 })
|
|
106
|
+
expect(evaluator.evaluate(context3)).to be_nil
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it "handles empty nested conditions gracefully" do
|
|
110
|
+
rules = {
|
|
111
|
+
version: "1.0",
|
|
112
|
+
ruleset: "test",
|
|
113
|
+
rules: [
|
|
114
|
+
{
|
|
115
|
+
id: "rule_1",
|
|
116
|
+
if: {
|
|
117
|
+
all: [
|
|
118
|
+
{ field: "status", op: "eq", value: "active" },
|
|
119
|
+
{ any: [] }
|
|
120
|
+
]
|
|
121
|
+
},
|
|
122
|
+
then: { decision: "approve" }
|
|
123
|
+
}
|
|
124
|
+
]
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
128
|
+
context = DecisionAgent::Context.new({ status: "active" })
|
|
129
|
+
|
|
130
|
+
# any: [] should return false, making the all condition false
|
|
131
|
+
evaluation = evaluator.evaluate(context)
|
|
132
|
+
expect(evaluation).to be_nil
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
describe "missing fields in nested structures" do
|
|
137
|
+
it "handles missing intermediate nested fields" do
|
|
138
|
+
rules = {
|
|
139
|
+
version: "1.0",
|
|
140
|
+
ruleset: "test",
|
|
141
|
+
rules: [
|
|
142
|
+
{
|
|
143
|
+
id: "rule_1",
|
|
144
|
+
if: { field: "user.profile.role", op: "eq", value: "admin" },
|
|
145
|
+
then: { decision: "allow" }
|
|
146
|
+
}
|
|
147
|
+
]
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
151
|
+
|
|
152
|
+
# Missing profile
|
|
153
|
+
context1 = DecisionAgent::Context.new({ user: {} })
|
|
154
|
+
expect(evaluator.evaluate(context1)).to be_nil
|
|
155
|
+
|
|
156
|
+
# user is nil
|
|
157
|
+
context2 = DecisionAgent::Context.new({ user: nil })
|
|
158
|
+
expect(evaluator.evaluate(context2)).to be_nil
|
|
159
|
+
|
|
160
|
+
# profile is nil
|
|
161
|
+
context3 = DecisionAgent::Context.new({ user: { profile: nil } })
|
|
162
|
+
expect(evaluator.evaluate(context3)).to be_nil
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
it "handles array-like nested access attempts" do
|
|
166
|
+
rules = {
|
|
167
|
+
version: "1.0",
|
|
168
|
+
ruleset: "test",
|
|
169
|
+
rules: [
|
|
170
|
+
{
|
|
171
|
+
id: "rule_1",
|
|
172
|
+
if: { field: "items.0.name", op: "eq", value: "first" },
|
|
173
|
+
then: { decision: "match" }
|
|
174
|
+
}
|
|
175
|
+
]
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
179
|
+
|
|
180
|
+
# This should gracefully fail since array access isn't supported in current implementation
|
|
181
|
+
context = DecisionAgent::Context.new({ items: [{ name: "first" }] })
|
|
182
|
+
evaluation = evaluator.evaluate(context)
|
|
183
|
+
|
|
184
|
+
# Current implementation doesn't support array indexing
|
|
185
|
+
expect(evaluation).to be_nil
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
it "returns nil for very deeply missing nested fields" do
|
|
189
|
+
rules = {
|
|
190
|
+
version: "1.0",
|
|
191
|
+
ruleset: "test",
|
|
192
|
+
rules: [
|
|
193
|
+
{
|
|
194
|
+
id: "rule_1",
|
|
195
|
+
if: { field: "a.b.c.d.e.f.g.h", op: "eq", value: "deep" },
|
|
196
|
+
then: { decision: "found" }
|
|
197
|
+
}
|
|
198
|
+
]
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
202
|
+
|
|
203
|
+
# Missing at various levels
|
|
204
|
+
expect(evaluator.evaluate(DecisionAgent::Context.new({}))).to be_nil
|
|
205
|
+
expect(evaluator.evaluate(DecisionAgent::Context.new({ a: {} }))).to be_nil
|
|
206
|
+
expect(evaluator.evaluate(DecisionAgent::Context.new({ a: { b: { c: {} } } }))).to be_nil
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
it "handles partial path matches gracefully" do
|
|
210
|
+
rules = {
|
|
211
|
+
version: "1.0",
|
|
212
|
+
ruleset: "test",
|
|
213
|
+
rules: [
|
|
214
|
+
{
|
|
215
|
+
id: "rule_1",
|
|
216
|
+
if: { field: "user.settings.theme", op: "eq", value: "dark" },
|
|
217
|
+
then: { decision: "dark_mode" }
|
|
218
|
+
}
|
|
219
|
+
]
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
223
|
+
|
|
224
|
+
# Path exists partially but value is wrong type
|
|
225
|
+
context = DecisionAgent::Context.new({ user: { settings: "not_a_hash" } })
|
|
226
|
+
expect(evaluator.evaluate(context)).to be_nil
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
describe "very deep nesting (5+ levels)" do
|
|
231
|
+
it "evaluates 5-level nested all/any combinations" do
|
|
232
|
+
rules = {
|
|
233
|
+
version: "1.0",
|
|
234
|
+
ruleset: "test",
|
|
235
|
+
rules: [
|
|
236
|
+
{
|
|
237
|
+
id: "rule_1",
|
|
238
|
+
if: {
|
|
239
|
+
all: [
|
|
240
|
+
{
|
|
241
|
+
any: [
|
|
242
|
+
{
|
|
243
|
+
all: [
|
|
244
|
+
{
|
|
245
|
+
any: [
|
|
246
|
+
{
|
|
247
|
+
all: [
|
|
248
|
+
{ field: "a", op: "eq", value: 1 },
|
|
249
|
+
{ field: "b", op: "eq", value: 2 }
|
|
250
|
+
]
|
|
251
|
+
}
|
|
252
|
+
]
|
|
253
|
+
}
|
|
254
|
+
]
|
|
255
|
+
}
|
|
256
|
+
]
|
|
257
|
+
}
|
|
258
|
+
]
|
|
259
|
+
},
|
|
260
|
+
then: { decision: "very_nested_match" }
|
|
261
|
+
}
|
|
262
|
+
]
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
266
|
+
|
|
267
|
+
# Should match
|
|
268
|
+
context1 = DecisionAgent::Context.new({ a: 1, b: 2 })
|
|
269
|
+
result = evaluator.evaluate(context1)
|
|
270
|
+
expect(result).not_to be_nil
|
|
271
|
+
expect(result.decision).to eq("very_nested_match")
|
|
272
|
+
|
|
273
|
+
# Should not match (missing b)
|
|
274
|
+
context2 = DecisionAgent::Context.new({ a: 1 })
|
|
275
|
+
expect(evaluator.evaluate(context2)).to be_nil
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
it "evaluates 7-level nested structures" do
|
|
279
|
+
rules = {
|
|
280
|
+
version: "1.0",
|
|
281
|
+
ruleset: "test",
|
|
282
|
+
rules: [
|
|
283
|
+
{
|
|
284
|
+
id: "rule_1",
|
|
285
|
+
if: {
|
|
286
|
+
all: [
|
|
287
|
+
{
|
|
288
|
+
any: [
|
|
289
|
+
{
|
|
290
|
+
all: [
|
|
291
|
+
{
|
|
292
|
+
any: [
|
|
293
|
+
{
|
|
294
|
+
all: [
|
|
295
|
+
{
|
|
296
|
+
any: [
|
|
297
|
+
{
|
|
298
|
+
all: [
|
|
299
|
+
{ field: "x", op: "eq", value: true }
|
|
300
|
+
]
|
|
301
|
+
}
|
|
302
|
+
]
|
|
303
|
+
}
|
|
304
|
+
]
|
|
305
|
+
}
|
|
306
|
+
]
|
|
307
|
+
}
|
|
308
|
+
]
|
|
309
|
+
}
|
|
310
|
+
]
|
|
311
|
+
}
|
|
312
|
+
]
|
|
313
|
+
},
|
|
314
|
+
then: { decision: "extremely_nested" }
|
|
315
|
+
}
|
|
316
|
+
]
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
320
|
+
|
|
321
|
+
context = DecisionAgent::Context.new({ x: true })
|
|
322
|
+
result = evaluator.evaluate(context)
|
|
323
|
+
expect(result).not_to be_nil
|
|
324
|
+
expect(result.decision).to eq("extremely_nested")
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
it "handles mixed all/any at each level" do
|
|
328
|
+
rules = {
|
|
329
|
+
version: "1.0",
|
|
330
|
+
ruleset: "test",
|
|
331
|
+
rules: [
|
|
332
|
+
{
|
|
333
|
+
id: "rule_1",
|
|
334
|
+
if: {
|
|
335
|
+
all: [
|
|
336
|
+
{ field: "level1", op: "eq", value: 1 },
|
|
337
|
+
{
|
|
338
|
+
any: [
|
|
339
|
+
{ field: "level2a", op: "eq", value: 2 },
|
|
340
|
+
{
|
|
341
|
+
all: [
|
|
342
|
+
{ field: "level3a", op: "eq", value: 3 },
|
|
343
|
+
{
|
|
344
|
+
any: [
|
|
345
|
+
{ field: "level4a", op: "eq", value: 4 },
|
|
346
|
+
{
|
|
347
|
+
all: [
|
|
348
|
+
{ field: "level5a", op: "eq", value: 5 },
|
|
349
|
+
{ field: "level5b", op: "eq", value: 6 }
|
|
350
|
+
]
|
|
351
|
+
}
|
|
352
|
+
]
|
|
353
|
+
}
|
|
354
|
+
]
|
|
355
|
+
}
|
|
356
|
+
]
|
|
357
|
+
}
|
|
358
|
+
]
|
|
359
|
+
},
|
|
360
|
+
then: { decision: "mixed_deep_match" }
|
|
361
|
+
}
|
|
362
|
+
]
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
366
|
+
|
|
367
|
+
# Match via first branch (level2a)
|
|
368
|
+
context1 = DecisionAgent::Context.new({ level1: 1, level2a: 2 })
|
|
369
|
+
expect(evaluator.evaluate(context1)&.decision).to eq("mixed_deep_match")
|
|
370
|
+
|
|
371
|
+
# Match via nested path (level4a)
|
|
372
|
+
context2 = DecisionAgent::Context.new({ level1: 1, level3a: 3, level4a: 4 })
|
|
373
|
+
expect(evaluator.evaluate(context2)&.decision).to eq("mixed_deep_match")
|
|
374
|
+
|
|
375
|
+
# Match via deepest path
|
|
376
|
+
context3 = DecisionAgent::Context.new({ level1: 1, level3a: 3, level5a: 5, level5b: 6 })
|
|
377
|
+
expect(evaluator.evaluate(context3)&.decision).to eq("mixed_deep_match")
|
|
378
|
+
|
|
379
|
+
# No match (missing level1)
|
|
380
|
+
context4 = DecisionAgent::Context.new({ level2a: 2 })
|
|
381
|
+
expect(evaluator.evaluate(context4)).to be_nil
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
describe "large rule sets" do
|
|
386
|
+
it "evaluates 100 rules efficiently (first-match semantics)" do
|
|
387
|
+
rules_array = 100.times.map do |i|
|
|
388
|
+
{
|
|
389
|
+
id: "rule_#{i}",
|
|
390
|
+
if: { field: "number", op: "eq", value: i },
|
|
391
|
+
then: { decision: "matched_#{i}", weight: 0.5 + (i / 200.0) }
|
|
392
|
+
}
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
rules = {
|
|
396
|
+
version: "1.0",
|
|
397
|
+
ruleset: "large_set",
|
|
398
|
+
rules: rules_array
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
402
|
+
|
|
403
|
+
# Should match first rule
|
|
404
|
+
context1 = DecisionAgent::Context.new({ number: 0 })
|
|
405
|
+
result1 = evaluator.evaluate(context1)
|
|
406
|
+
expect(result1.decision).to eq("matched_0")
|
|
407
|
+
expect(result1.metadata[:rule_id]).to eq("rule_0")
|
|
408
|
+
|
|
409
|
+
# Should match middle rule
|
|
410
|
+
context2 = DecisionAgent::Context.new({ number: 50 })
|
|
411
|
+
result2 = evaluator.evaluate(context2)
|
|
412
|
+
expect(result2.decision).to eq("matched_50")
|
|
413
|
+
|
|
414
|
+
# Should match last rule
|
|
415
|
+
context3 = DecisionAgent::Context.new({ number: 99 })
|
|
416
|
+
result3 = evaluator.evaluate(context3)
|
|
417
|
+
expect(result3.decision).to eq("matched_99")
|
|
418
|
+
|
|
419
|
+
# Should not match any rule
|
|
420
|
+
context4 = DecisionAgent::Context.new({ number: 100 })
|
|
421
|
+
expect(evaluator.evaluate(context4)).to be_nil
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
it "handles 500 rules without stack overflow" do
|
|
425
|
+
rules_array = 500.times.map do |i|
|
|
426
|
+
{
|
|
427
|
+
id: "rule_#{i}",
|
|
428
|
+
if: {
|
|
429
|
+
all: [
|
|
430
|
+
{ field: "category", op: "eq", value: "test" },
|
|
431
|
+
{ field: "id", op: "eq", value: i }
|
|
432
|
+
]
|
|
433
|
+
},
|
|
434
|
+
then: { decision: "rule_#{i}" }
|
|
435
|
+
}
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
rules = {
|
|
439
|
+
version: "1.0",
|
|
440
|
+
ruleset: "very_large_set",
|
|
441
|
+
rules: rules_array
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
445
|
+
|
|
446
|
+
# Should evaluate without error
|
|
447
|
+
context = DecisionAgent::Context.new({ category: "test", id: 250 })
|
|
448
|
+
result = evaluator.evaluate(context)
|
|
449
|
+
expect(result.decision).to eq("rule_250")
|
|
450
|
+
end
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
describe "unicode support" do
|
|
454
|
+
it "handles unicode field names" do
|
|
455
|
+
rules = {
|
|
456
|
+
version: "1.0",
|
|
457
|
+
ruleset: "test",
|
|
458
|
+
rules: [
|
|
459
|
+
{
|
|
460
|
+
id: "rule_1",
|
|
461
|
+
if: { field: "user.名前", op: "eq", value: "太郎" },
|
|
462
|
+
then: { decision: "japanese_match" }
|
|
463
|
+
}
|
|
464
|
+
]
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
468
|
+
|
|
469
|
+
context = DecisionAgent::Context.new({ user: { "名前": "太郎" } })
|
|
470
|
+
result = evaluator.evaluate(context)
|
|
471
|
+
expect(result).not_to be_nil
|
|
472
|
+
expect(result.decision).to eq("japanese_match")
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
it "compares unicode values correctly" do
|
|
476
|
+
rules = {
|
|
477
|
+
version: "1.0",
|
|
478
|
+
ruleset: "test",
|
|
479
|
+
rules: [
|
|
480
|
+
{
|
|
481
|
+
id: "rule_1",
|
|
482
|
+
if: { field: "message", op: "eq", value: "Héllo Wörld 🌍" },
|
|
483
|
+
then: { decision: "unicode_match" }
|
|
484
|
+
}
|
|
485
|
+
]
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
489
|
+
|
|
490
|
+
context = DecisionAgent::Context.new({ message: "Héllo Wörld 🌍" })
|
|
491
|
+
result = evaluator.evaluate(context)
|
|
492
|
+
expect(result).not_to be_nil
|
|
493
|
+
expect(result.decision).to eq("unicode_match")
|
|
494
|
+
|
|
495
|
+
# Should not match with different unicode
|
|
496
|
+
context2 = DecisionAgent::Context.new({ message: "Hello World 🌍" })
|
|
497
|
+
expect(evaluator.evaluate(context2)).to be_nil
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
it "handles emoji in decision values" do
|
|
501
|
+
rules = {
|
|
502
|
+
version: "1.0",
|
|
503
|
+
ruleset: "test",
|
|
504
|
+
rules: [
|
|
505
|
+
{
|
|
506
|
+
id: "rule_1",
|
|
507
|
+
if: { field: "status", op: "eq", value: "happy" },
|
|
508
|
+
then: { decision: "😊_approved", reason: "User is happy 🎉" }
|
|
509
|
+
}
|
|
510
|
+
]
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
514
|
+
|
|
515
|
+
context = DecisionAgent::Context.new({ status: "happy" })
|
|
516
|
+
result = evaluator.evaluate(context)
|
|
517
|
+
expect(result.decision).to eq("😊_approved")
|
|
518
|
+
expect(result.reason).to eq("User is happy 🎉")
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
it "handles mixed unicode in nested field paths" do
|
|
522
|
+
rules = {
|
|
523
|
+
version: "1.0",
|
|
524
|
+
ruleset: "test",
|
|
525
|
+
rules: [
|
|
526
|
+
{
|
|
527
|
+
id: "rule_1",
|
|
528
|
+
if: { field: "用户.配置.语言", op: "eq", value: "中文" },
|
|
529
|
+
then: { decision: "chinese_locale" }
|
|
530
|
+
}
|
|
531
|
+
]
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
535
|
+
|
|
536
|
+
context = DecisionAgent::Context.new({
|
|
537
|
+
"用户": {
|
|
538
|
+
"配置": {
|
|
539
|
+
"语言": "中文"
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
})
|
|
543
|
+
result = evaluator.evaluate(context)
|
|
544
|
+
expect(result).not_to be_nil
|
|
545
|
+
expect(result.decision).to eq("chinese_locale")
|
|
546
|
+
end
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
describe "malformed dot notation edge cases" do
|
|
550
|
+
it "rejects leading dots in field paths" do
|
|
551
|
+
rules = {
|
|
552
|
+
version: "1.0",
|
|
553
|
+
ruleset: "test",
|
|
554
|
+
rules: [
|
|
555
|
+
{
|
|
556
|
+
id: "rule_1",
|
|
557
|
+
if: { field: ".field", op: "eq", value: "test" },
|
|
558
|
+
then: { decision: "match" }
|
|
559
|
+
}
|
|
560
|
+
]
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
# Validator catches empty segments and raises error
|
|
564
|
+
expect {
|
|
565
|
+
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
566
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /empty segments/)
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
it "handles trailing dots in field paths" do
|
|
570
|
+
rules = {
|
|
571
|
+
version: "1.0",
|
|
572
|
+
ruleset: "test",
|
|
573
|
+
rules: [
|
|
574
|
+
{
|
|
575
|
+
id: "rule_1",
|
|
576
|
+
if: { field: "field.nested.", op: "eq", value: "test" },
|
|
577
|
+
then: { decision: "match" }
|
|
578
|
+
}
|
|
579
|
+
]
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
# Trailing dots might be accepted but won't match in practice
|
|
583
|
+
# Or they might be rejected - test actual behavior
|
|
584
|
+
evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
585
|
+
context = DecisionAgent::Context.new({ field: { nested: { "": "test" } } })
|
|
586
|
+
|
|
587
|
+
# Evaluation behavior depends on implementation
|
|
588
|
+
# Just verify it doesn't crash
|
|
589
|
+
result = evaluator.evaluate(context)
|
|
590
|
+
# Result may be nil or match depending on how empty string keys are handled
|
|
591
|
+
expect(result).to be_a(DecisionAgent::Evaluation).or be_nil
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
it "rejects consecutive dots in field paths" do
|
|
595
|
+
rules = {
|
|
596
|
+
version: "1.0",
|
|
597
|
+
ruleset: "test",
|
|
598
|
+
rules: [
|
|
599
|
+
{
|
|
600
|
+
id: "rule_1",
|
|
601
|
+
if: { field: "field..nested", op: "eq", value: "test" },
|
|
602
|
+
then: { decision: "match" }
|
|
603
|
+
}
|
|
604
|
+
]
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
# Validator catches empty segments and raises error
|
|
608
|
+
expect {
|
|
609
|
+
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
610
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /empty segments/)
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
it "rejects multiple consecutive dots in field paths" do
|
|
614
|
+
rules = {
|
|
615
|
+
version: "1.0",
|
|
616
|
+
ruleset: "test",
|
|
617
|
+
rules: [
|
|
618
|
+
{
|
|
619
|
+
id: "rule_1",
|
|
620
|
+
if: { field: "a..b..c", op: "eq", value: "test" },
|
|
621
|
+
then: { decision: "match" }
|
|
622
|
+
}
|
|
623
|
+
]
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
# Validator catches empty segments and raises error
|
|
627
|
+
expect {
|
|
628
|
+
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
629
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /empty segments/)
|
|
630
|
+
end
|
|
631
|
+
end
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
# ============================================================
|
|
635
|
+
# Evaluator Behavior Edge Cases
|
|
636
|
+
# ============================================================
|
|
637
|
+
|
|
638
|
+
describe "evaluator behavior edge cases" do
|
|
639
|
+
describe "nil returns and empty evaluations" do
|
|
640
|
+
it "handles all evaluators returning nil" do
|
|
641
|
+
nil_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
|
|
642
|
+
def evaluate(context, feedback: {})
|
|
643
|
+
nil
|
|
644
|
+
end
|
|
645
|
+
end
|
|
646
|
+
|
|
647
|
+
agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator.new])
|
|
648
|
+
|
|
649
|
+
expect {
|
|
650
|
+
agent.decide(context: {})
|
|
651
|
+
}.to raise_error(DecisionAgent::NoEvaluationsError)
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
it "handles mix of nil and valid evaluations" do
|
|
655
|
+
nil_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
|
|
656
|
+
def evaluate(context, feedback: {})
|
|
657
|
+
nil
|
|
658
|
+
end
|
|
659
|
+
end
|
|
660
|
+
|
|
661
|
+
valid_evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
662
|
+
decision: "approve",
|
|
663
|
+
weight: 0.8
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator.new, valid_evaluator])
|
|
667
|
+
|
|
668
|
+
result = agent.decide(context: {})
|
|
669
|
+
|
|
670
|
+
expect(result.decision).to eq("approve")
|
|
671
|
+
expect(result.evaluations.size).to eq(1)
|
|
672
|
+
end
|
|
673
|
+
end
|
|
674
|
+
|
|
675
|
+
describe "zero weight handling" do
|
|
676
|
+
it "handles evaluator with zero weight" do
|
|
677
|
+
zero_weight_evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
678
|
+
decision: "approve",
|
|
679
|
+
weight: 0.0,
|
|
680
|
+
reason: "Zero weight"
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
agent = DecisionAgent::Agent.new(evaluators: [zero_weight_evaluator])
|
|
684
|
+
|
|
685
|
+
result = agent.decide(context: {})
|
|
686
|
+
|
|
687
|
+
expect(result.decision).to eq("approve")
|
|
688
|
+
expect(result.evaluations.first.weight).to eq(0.0)
|
|
689
|
+
end
|
|
690
|
+
|
|
691
|
+
it "handles mix of zero and non-zero weights" do
|
|
692
|
+
zero_weight = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
693
|
+
decision: "reject",
|
|
694
|
+
weight: 0.0,
|
|
695
|
+
name: "ZeroWeight"
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
normal_weight = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
699
|
+
decision: "approve",
|
|
700
|
+
weight: 0.8,
|
|
701
|
+
name: "NormalWeight"
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
agent = DecisionAgent::Agent.new(
|
|
705
|
+
evaluators: [zero_weight, normal_weight],
|
|
706
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
result = agent.decide(context: {})
|
|
710
|
+
|
|
711
|
+
# With weighted average, the non-zero weight should dominate
|
|
712
|
+
expect(result.decision).to eq("approve")
|
|
713
|
+
end
|
|
714
|
+
|
|
715
|
+
it "handles all evaluators with zero weight" do
|
|
716
|
+
zero_weight1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
717
|
+
decision: "approve",
|
|
718
|
+
weight: 0.0
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
zero_weight2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
722
|
+
decision: "reject",
|
|
723
|
+
weight: 0.0
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
agent = DecisionAgent::Agent.new(evaluators: [zero_weight1, zero_weight2])
|
|
727
|
+
|
|
728
|
+
result = agent.decide(context: {})
|
|
729
|
+
|
|
730
|
+
# Should still make a decision even with all zero weights
|
|
731
|
+
expect(result.decision).to be_a(String)
|
|
732
|
+
expect(result.confidence).to be_between(0.0, 1.0)
|
|
733
|
+
end
|
|
734
|
+
end
|
|
735
|
+
end
|
|
736
|
+
|
|
737
|
+
# ============================================================
|
|
738
|
+
# Conflict Resolution Edge Cases
|
|
739
|
+
# ============================================================
|
|
740
|
+
|
|
741
|
+
describe "conflict resolution edge cases" do
|
|
742
|
+
describe "equal weights" do
|
|
743
|
+
it "handles equal weights in WeightedAverage" do
|
|
744
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
745
|
+
decision: "approve",
|
|
746
|
+
weight: 0.5,
|
|
747
|
+
name: "Eval1"
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
751
|
+
decision: "reject",
|
|
752
|
+
weight: 0.5,
|
|
753
|
+
name: "Eval2"
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
agent = DecisionAgent::Agent.new(
|
|
757
|
+
evaluators: [eval1, eval2],
|
|
758
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
result = agent.decide(context: {})
|
|
762
|
+
|
|
763
|
+
# Should choose one decision
|
|
764
|
+
expect(["approve", "reject"]).to include(result.decision)
|
|
765
|
+
# Confidence should reflect the tie
|
|
766
|
+
expect(result.confidence).to eq(0.5)
|
|
767
|
+
end
|
|
768
|
+
|
|
769
|
+
it "handles equal weights in MaxWeight" do
|
|
770
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
771
|
+
decision: "option_a",
|
|
772
|
+
weight: 0.7,
|
|
773
|
+
name: "Eval1"
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
777
|
+
decision: "option_b",
|
|
778
|
+
weight: 0.7,
|
|
779
|
+
name: "Eval2"
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
eval3 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
783
|
+
decision: "option_c",
|
|
784
|
+
weight: 0.7,
|
|
785
|
+
name: "Eval3"
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
agent = DecisionAgent::Agent.new(
|
|
789
|
+
evaluators: [eval1, eval2, eval3],
|
|
790
|
+
scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
|
|
791
|
+
)
|
|
792
|
+
|
|
793
|
+
result = agent.decide(context: {})
|
|
794
|
+
|
|
795
|
+
# Should choose one of the options
|
|
796
|
+
expect(["option_a", "option_b", "option_c"]).to include(result.decision)
|
|
797
|
+
expect(result.confidence).to eq(0.7)
|
|
798
|
+
end
|
|
799
|
+
|
|
800
|
+
it "handles equal weights in Consensus" do
|
|
801
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
802
|
+
decision: "approve",
|
|
803
|
+
weight: 0.6,
|
|
804
|
+
name: "Eval1"
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
808
|
+
decision: "reject",
|
|
809
|
+
weight: 0.6,
|
|
810
|
+
name: "Eval2"
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
agent = DecisionAgent::Agent.new(
|
|
814
|
+
evaluators: [eval1, eval2],
|
|
815
|
+
scoring_strategy: DecisionAgent::Scoring::Consensus.new
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
result = agent.decide(context: {})
|
|
819
|
+
|
|
820
|
+
# Should choose one decision, likely with reduced confidence
|
|
821
|
+
expect(["approve", "reject"]).to include(result.decision)
|
|
822
|
+
end
|
|
823
|
+
end
|
|
824
|
+
|
|
825
|
+
describe "mixed decisions" do
|
|
826
|
+
it "handles three-way split in decisions" do
|
|
827
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
828
|
+
decision: "approve",
|
|
829
|
+
weight: 0.5,
|
|
830
|
+
name: "Eval1"
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
834
|
+
decision: "reject",
|
|
835
|
+
weight: 0.5,
|
|
836
|
+
name: "Eval2"
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
eval3 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
840
|
+
decision: "manual_review",
|
|
841
|
+
weight: 0.5,
|
|
842
|
+
name: "Eval3"
|
|
843
|
+
)
|
|
844
|
+
|
|
845
|
+
agent = DecisionAgent::Agent.new(
|
|
846
|
+
evaluators: [eval1, eval2, eval3],
|
|
847
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
848
|
+
)
|
|
849
|
+
|
|
850
|
+
result = agent.decide(context: {})
|
|
851
|
+
|
|
852
|
+
expect(["approve", "reject", "manual_review"]).to include(result.decision)
|
|
853
|
+
end
|
|
854
|
+
|
|
855
|
+
it "handles many evaluators with diverse decisions" do
|
|
856
|
+
evaluators = 10.times.map do |i|
|
|
857
|
+
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
858
|
+
decision: "decision_#{i % 5}", # 5 different decisions
|
|
859
|
+
weight: 0.1 * (i + 1), # Varying weights
|
|
860
|
+
name: "Eval#{i}"
|
|
861
|
+
)
|
|
862
|
+
end
|
|
863
|
+
|
|
864
|
+
agent = DecisionAgent::Agent.new(
|
|
865
|
+
evaluators: evaluators,
|
|
866
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
867
|
+
)
|
|
868
|
+
|
|
869
|
+
result = agent.decide(context: {})
|
|
870
|
+
|
|
871
|
+
expect(result.decision).to be_a(String)
|
|
872
|
+
expect(result.confidence).to be_between(0.0, 1.0)
|
|
873
|
+
expect(result.evaluations.size).to eq(10)
|
|
874
|
+
end
|
|
875
|
+
|
|
876
|
+
it "handles 50 evaluators with diverse decisions" do
|
|
877
|
+
evaluators = 50.times.map do |i|
|
|
878
|
+
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
879
|
+
decision: "decision_#{i % 10}", # 10 different decisions
|
|
880
|
+
weight: 0.02 * (i + 1), # Varying weights 0.02 to 1.0
|
|
881
|
+
name: "Eval#{i}"
|
|
882
|
+
)
|
|
883
|
+
end
|
|
884
|
+
|
|
885
|
+
agent = DecisionAgent::Agent.new(
|
|
886
|
+
evaluators: evaluators,
|
|
887
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
888
|
+
)
|
|
889
|
+
|
|
890
|
+
result = agent.decide(context: {})
|
|
891
|
+
|
|
892
|
+
expect(result.decision).to be_a(String)
|
|
893
|
+
expect(result.decision).to match(/decision_\d/)
|
|
894
|
+
expect(result.confidence).to be_between(0.0, 1.0)
|
|
895
|
+
expect(result.evaluations.size).to eq(50)
|
|
896
|
+
end
|
|
897
|
+
|
|
898
|
+
it "handles all evaluators with same decision but different weights" do
|
|
899
|
+
evaluators = 20.times.map do |i|
|
|
900
|
+
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
901
|
+
decision: "unanimous",
|
|
902
|
+
weight: 0.05 * (i + 1), # Weights from 0.05 to 1.0
|
|
903
|
+
name: "Eval#{i}"
|
|
904
|
+
)
|
|
905
|
+
end
|
|
906
|
+
|
|
907
|
+
agent = DecisionAgent::Agent.new(
|
|
908
|
+
evaluators: evaluators,
|
|
909
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
result = agent.decide(context: {})
|
|
913
|
+
|
|
914
|
+
expect(result.decision).to eq("unanimous")
|
|
915
|
+
# All weights point to same decision, confidence should be high
|
|
916
|
+
expect(result.confidence).to eq(1.0)
|
|
917
|
+
expect(result.evaluations.size).to eq(20)
|
|
918
|
+
end
|
|
919
|
+
end
|
|
920
|
+
end
|
|
921
|
+
|
|
922
|
+
# ============================================================
|
|
923
|
+
# Scoring Strategy Edge Cases
|
|
924
|
+
# ============================================================
|
|
925
|
+
|
|
926
|
+
describe "scoring strategy edge cases" do
|
|
927
|
+
describe "MaxWeight edge cases" do
|
|
928
|
+
it "handles single evaluation" do
|
|
929
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
930
|
+
decision: "approve",
|
|
931
|
+
weight: 0.6
|
|
932
|
+
)
|
|
933
|
+
|
|
934
|
+
agent = DecisionAgent::Agent.new(
|
|
935
|
+
evaluators: [eval1],
|
|
936
|
+
scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
|
|
937
|
+
)
|
|
938
|
+
|
|
939
|
+
result = agent.decide(context: {})
|
|
940
|
+
|
|
941
|
+
expect(result.decision).to eq("approve")
|
|
942
|
+
expect(result.confidence).to eq(0.6)
|
|
943
|
+
end
|
|
944
|
+
|
|
945
|
+
it "ignores lower weights completely" do
|
|
946
|
+
high = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
947
|
+
decision: "approve",
|
|
948
|
+
weight: 0.9,
|
|
949
|
+
name: "High"
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
low1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
953
|
+
decision: "reject",
|
|
954
|
+
weight: 0.2,
|
|
955
|
+
name: "Low1"
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
low2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
959
|
+
decision: "reject",
|
|
960
|
+
weight: 0.3,
|
|
961
|
+
name: "Low2"
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
agent = DecisionAgent::Agent.new(
|
|
965
|
+
evaluators: [low1, high, low2],
|
|
966
|
+
scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
|
|
967
|
+
)
|
|
968
|
+
|
|
969
|
+
result = agent.decide(context: {})
|
|
970
|
+
|
|
971
|
+
expect(result.decision).to eq("approve")
|
|
972
|
+
expect(result.confidence).to eq(0.9)
|
|
973
|
+
end
|
|
974
|
+
end
|
|
975
|
+
|
|
976
|
+
describe "Consensus edge cases" do
|
|
977
|
+
it "reduces confidence when no clear consensus" do
|
|
978
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
979
|
+
decision: "option_a",
|
|
980
|
+
weight: 0.4,
|
|
981
|
+
name: "Eval1"
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
985
|
+
decision: "option_b",
|
|
986
|
+
weight: 0.4,
|
|
987
|
+
name: "Eval2"
|
|
988
|
+
)
|
|
989
|
+
|
|
990
|
+
eval3 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
991
|
+
decision: "option_c",
|
|
992
|
+
weight: 0.4,
|
|
993
|
+
name: "Eval3"
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
agent = DecisionAgent::Agent.new(
|
|
997
|
+
evaluators: [eval1, eval2, eval3],
|
|
998
|
+
scoring_strategy: DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.5)
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
result = agent.decide(context: {})
|
|
1002
|
+
|
|
1003
|
+
# No option has 50% agreement, confidence should be reduced
|
|
1004
|
+
expect(result.confidence).to be < 0.5
|
|
1005
|
+
end
|
|
1006
|
+
|
|
1007
|
+
it "handles unanimous decision" do
|
|
1008
|
+
evaluators = 5.times.map do |i|
|
|
1009
|
+
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1010
|
+
decision: "approve",
|
|
1011
|
+
weight: 0.6 + (i * 0.05),
|
|
1012
|
+
name: "Eval#{i}"
|
|
1013
|
+
)
|
|
1014
|
+
end
|
|
1015
|
+
|
|
1016
|
+
agent = DecisionAgent::Agent.new(
|
|
1017
|
+
evaluators: evaluators,
|
|
1018
|
+
scoring_strategy: DecisionAgent::Scoring::Consensus.new
|
|
1019
|
+
)
|
|
1020
|
+
|
|
1021
|
+
result = agent.decide(context: {})
|
|
1022
|
+
|
|
1023
|
+
expect(result.decision).to eq("approve")
|
|
1024
|
+
# Should have high confidence due to unanimous agreement
|
|
1025
|
+
expect(result.confidence).to be > 0.5
|
|
1026
|
+
end
|
|
1027
|
+
|
|
1028
|
+
it "handles varying minimum agreement thresholds" do
|
|
1029
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1030
|
+
decision: "approve",
|
|
1031
|
+
weight: 0.8,
|
|
1032
|
+
name: "Eval1"
|
|
1033
|
+
)
|
|
1034
|
+
|
|
1035
|
+
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1036
|
+
decision: "approve",
|
|
1037
|
+
weight: 0.7,
|
|
1038
|
+
name: "Eval2"
|
|
1039
|
+
)
|
|
1040
|
+
|
|
1041
|
+
eval3 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1042
|
+
decision: "reject",
|
|
1043
|
+
weight: 0.6,
|
|
1044
|
+
name: "Eval3"
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
# Test with low threshold
|
|
1048
|
+
agent_low = DecisionAgent::Agent.new(
|
|
1049
|
+
evaluators: [eval1, eval2, eval3],
|
|
1050
|
+
scoring_strategy: DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.3)
|
|
1051
|
+
)
|
|
1052
|
+
|
|
1053
|
+
result_low = agent_low.decide(context: {})
|
|
1054
|
+
expect(result_low.decision).to eq("approve")
|
|
1055
|
+
|
|
1056
|
+
# Test with high threshold
|
|
1057
|
+
agent_high = DecisionAgent::Agent.new(
|
|
1058
|
+
evaluators: [eval1, eval2, eval3],
|
|
1059
|
+
scoring_strategy: DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.9)
|
|
1060
|
+
)
|
|
1061
|
+
|
|
1062
|
+
result_high = agent_high.decide(context: {})
|
|
1063
|
+
# Should still choose approve but with lower confidence
|
|
1064
|
+
expect(result_high.decision).to eq("approve")
|
|
1065
|
+
end
|
|
1066
|
+
end
|
|
1067
|
+
|
|
1068
|
+
describe "Threshold edge cases" do
|
|
1069
|
+
it "returns fallback when all evaluations below threshold" do
|
|
1070
|
+
low_weight = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1071
|
+
decision: "approve",
|
|
1072
|
+
weight: 0.5
|
|
1073
|
+
)
|
|
1074
|
+
|
|
1075
|
+
agent = DecisionAgent::Agent.new(
|
|
1076
|
+
evaluators: [low_weight],
|
|
1077
|
+
scoring_strategy: DecisionAgent::Scoring::Threshold.new(
|
|
1078
|
+
threshold: 0.8,
|
|
1079
|
+
fallback_decision: "needs_review"
|
|
1080
|
+
)
|
|
1081
|
+
)
|
|
1082
|
+
|
|
1083
|
+
result = agent.decide(context: {})
|
|
1084
|
+
|
|
1085
|
+
expect(result.decision).to eq("needs_review")
|
|
1086
|
+
expect(result.confidence).to be < 0.8
|
|
1087
|
+
end
|
|
1088
|
+
|
|
1089
|
+
it "returns decision when exactly at threshold" do
|
|
1090
|
+
exact_weight = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1091
|
+
decision: "approve",
|
|
1092
|
+
weight: 0.75
|
|
1093
|
+
)
|
|
1094
|
+
|
|
1095
|
+
agent = DecisionAgent::Agent.new(
|
|
1096
|
+
evaluators: [exact_weight],
|
|
1097
|
+
scoring_strategy: DecisionAgent::Scoring::Threshold.new(threshold: 0.75)
|
|
1098
|
+
)
|
|
1099
|
+
|
|
1100
|
+
result = agent.decide(context: {})
|
|
1101
|
+
|
|
1102
|
+
expect(result.decision).to eq("approve")
|
|
1103
|
+
expect(result.confidence).to eq(0.75)
|
|
1104
|
+
end
|
|
1105
|
+
|
|
1106
|
+
it "handles threshold with conflicting evaluations" do
|
|
1107
|
+
high = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1108
|
+
decision: "approve",
|
|
1109
|
+
weight: 0.9,
|
|
1110
|
+
name: "High"
|
|
1111
|
+
)
|
|
1112
|
+
|
|
1113
|
+
medium = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1114
|
+
decision: "approve",
|
|
1115
|
+
weight: 0.7,
|
|
1116
|
+
name: "Medium"
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
low = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1120
|
+
decision: "reject",
|
|
1121
|
+
weight: 0.4,
|
|
1122
|
+
name: "Low"
|
|
1123
|
+
)
|
|
1124
|
+
|
|
1125
|
+
agent = DecisionAgent::Agent.new(
|
|
1126
|
+
evaluators: [high, medium, low],
|
|
1127
|
+
scoring_strategy: DecisionAgent::Scoring::Threshold.new(threshold: 0.75)
|
|
1128
|
+
)
|
|
1129
|
+
|
|
1130
|
+
result = agent.decide(context: {})
|
|
1131
|
+
|
|
1132
|
+
# Average of approve votes is 0.8, which exceeds threshold
|
|
1133
|
+
expect(result.decision).to eq("approve")
|
|
1134
|
+
end
|
|
1135
|
+
|
|
1136
|
+
it "handles very high threshold" do
|
|
1137
|
+
evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1138
|
+
decision: "approve",
|
|
1139
|
+
weight: 0.99
|
|
1140
|
+
)
|
|
1141
|
+
|
|
1142
|
+
agent = DecisionAgent::Agent.new(
|
|
1143
|
+
evaluators: [evaluator],
|
|
1144
|
+
scoring_strategy: DecisionAgent::Scoring::Threshold.new(
|
|
1145
|
+
threshold: 0.999,
|
|
1146
|
+
fallback_decision: "uncertain"
|
|
1147
|
+
)
|
|
1148
|
+
)
|
|
1149
|
+
|
|
1150
|
+
result = agent.decide(context: {})
|
|
1151
|
+
|
|
1152
|
+
expect(result.decision).to eq("uncertain")
|
|
1153
|
+
end
|
|
1154
|
+
end
|
|
1155
|
+
|
|
1156
|
+
describe "stress tests with large evaluator counts" do
|
|
1157
|
+
it "handles 100 evaluators efficiently with WeightedAverage" do
|
|
1158
|
+
evaluators = 100.times.map do |i|
|
|
1159
|
+
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1160
|
+
decision: "decision_#{i % 10}",
|
|
1161
|
+
weight: (i + 1) / 200.0, # Weights from 0.005 to 0.505
|
|
1162
|
+
name: "Eval#{i}"
|
|
1163
|
+
)
|
|
1164
|
+
end
|
|
1165
|
+
|
|
1166
|
+
agent = DecisionAgent::Agent.new(
|
|
1167
|
+
evaluators: evaluators,
|
|
1168
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
1169
|
+
)
|
|
1170
|
+
|
|
1171
|
+
result = agent.decide(context: {})
|
|
1172
|
+
|
|
1173
|
+
expect(result.decision).to be_a(String)
|
|
1174
|
+
expect(result.confidence).to be_between(0.0, 1.0)
|
|
1175
|
+
expect(result.evaluations.size).to eq(100)
|
|
1176
|
+
end
|
|
1177
|
+
|
|
1178
|
+
it "handles 100 evaluators with all same decision" do
|
|
1179
|
+
evaluators = 100.times.map do |i|
|
|
1180
|
+
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1181
|
+
decision: "consensus",
|
|
1182
|
+
weight: 0.5 + (i / 200.0), # Weights from 0.5 to 0.995
|
|
1183
|
+
name: "Eval#{i}"
|
|
1184
|
+
)
|
|
1185
|
+
end
|
|
1186
|
+
|
|
1187
|
+
agent = DecisionAgent::Agent.new(
|
|
1188
|
+
evaluators: evaluators,
|
|
1189
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
1190
|
+
)
|
|
1191
|
+
|
|
1192
|
+
result = agent.decide(context: {})
|
|
1193
|
+
|
|
1194
|
+
expect(result.decision).to eq("consensus")
|
|
1195
|
+
expect(result.confidence).to eq(1.0)
|
|
1196
|
+
end
|
|
1197
|
+
|
|
1198
|
+
it "handles 100 evaluators with MaxWeight strategy" do
|
|
1199
|
+
evaluators = 100.times.map do |i|
|
|
1200
|
+
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1201
|
+
decision: "decision_#{i}",
|
|
1202
|
+
weight: i / 100.0, # Weights from 0.0 to 0.99
|
|
1203
|
+
name: "Eval#{i}"
|
|
1204
|
+
)
|
|
1205
|
+
end
|
|
1206
|
+
|
|
1207
|
+
agent = DecisionAgent::Agent.new(
|
|
1208
|
+
evaluators: evaluators,
|
|
1209
|
+
scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
|
|
1210
|
+
)
|
|
1211
|
+
|
|
1212
|
+
result = agent.decide(context: {})
|
|
1213
|
+
|
|
1214
|
+
# Should pick the last one with highest weight (0.99)
|
|
1215
|
+
expect(result.decision).to eq("decision_99")
|
|
1216
|
+
expect(result.confidence).to be_within(0.001).of(0.99)
|
|
1217
|
+
end
|
|
1218
|
+
|
|
1219
|
+
it "handles 100 evaluators with Consensus strategy" do
|
|
1220
|
+
# Create 60 "approve" votes and 40 "reject" votes
|
|
1221
|
+
evaluators = []
|
|
1222
|
+
60.times do |i|
|
|
1223
|
+
evaluators << DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1224
|
+
decision: "approve",
|
|
1225
|
+
weight: 0.6,
|
|
1226
|
+
name: "ApproveEval#{i}"
|
|
1227
|
+
)
|
|
1228
|
+
end
|
|
1229
|
+
40.times do |i|
|
|
1230
|
+
evaluators << DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1231
|
+
decision: "reject",
|
|
1232
|
+
weight: 0.7,
|
|
1233
|
+
name: "RejectEval#{i}"
|
|
1234
|
+
)
|
|
1235
|
+
end
|
|
1236
|
+
|
|
1237
|
+
agent = DecisionAgent::Agent.new(
|
|
1238
|
+
evaluators: evaluators,
|
|
1239
|
+
scoring_strategy: DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.5)
|
|
1240
|
+
)
|
|
1241
|
+
|
|
1242
|
+
result = agent.decide(context: {})
|
|
1243
|
+
|
|
1244
|
+
# Approve has 60% agreement, should win
|
|
1245
|
+
expect(result.decision).to eq("approve")
|
|
1246
|
+
expect(result.evaluations.size).to eq(100)
|
|
1247
|
+
end
|
|
1248
|
+
end
|
|
1249
|
+
|
|
1250
|
+
describe "floating point precision edge cases" do
|
|
1251
|
+
it "handles repeating decimals (0.333333...)" do
|
|
1252
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1253
|
+
decision: "approve",
|
|
1254
|
+
weight: 1.0 / 3.0, # 0.333333...
|
|
1255
|
+
name: "Eval1"
|
|
1256
|
+
)
|
|
1257
|
+
|
|
1258
|
+
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1259
|
+
decision: "approve",
|
|
1260
|
+
weight: 1.0 / 3.0,
|
|
1261
|
+
name: "Eval2"
|
|
1262
|
+
)
|
|
1263
|
+
|
|
1264
|
+
eval3 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1265
|
+
decision: "approve",
|
|
1266
|
+
weight: 1.0 / 3.0,
|
|
1267
|
+
name: "Eval3"
|
|
1268
|
+
)
|
|
1269
|
+
|
|
1270
|
+
agent = DecisionAgent::Agent.new(
|
|
1271
|
+
evaluators: [eval1, eval2, eval3],
|
|
1272
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
1273
|
+
)
|
|
1274
|
+
|
|
1275
|
+
result = agent.decide(context: {})
|
|
1276
|
+
|
|
1277
|
+
expect(result.decision).to eq("approve")
|
|
1278
|
+
# Sum should be very close to 1.0
|
|
1279
|
+
expect(result.confidence).to be_within(0.0001).of(1.0)
|
|
1280
|
+
end
|
|
1281
|
+
|
|
1282
|
+
it "normalizes confidence to 4 decimal places" do
|
|
1283
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1284
|
+
decision: "approve",
|
|
1285
|
+
weight: 0.123456789, # Many decimal places
|
|
1286
|
+
name: "Eval1"
|
|
1287
|
+
)
|
|
1288
|
+
|
|
1289
|
+
agent = DecisionAgent::Agent.new(
|
|
1290
|
+
evaluators: [eval1],
|
|
1291
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
1292
|
+
)
|
|
1293
|
+
|
|
1294
|
+
result = agent.decide(context: {})
|
|
1295
|
+
|
|
1296
|
+
# Confidence should be rounded to 4 decimal places
|
|
1297
|
+
expect(result.confidence.to_s.split('.').last.length).to be <= 4
|
|
1298
|
+
end
|
|
1299
|
+
|
|
1300
|
+
it "handles very small weights (0.0001)" do
|
|
1301
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1302
|
+
decision: "approve",
|
|
1303
|
+
weight: 0.0001,
|
|
1304
|
+
name: "Eval1"
|
|
1305
|
+
)
|
|
1306
|
+
|
|
1307
|
+
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1308
|
+
decision: "reject",
|
|
1309
|
+
weight: 0.0001,
|
|
1310
|
+
name: "Eval2"
|
|
1311
|
+
)
|
|
1312
|
+
|
|
1313
|
+
agent = DecisionAgent::Agent.new(
|
|
1314
|
+
evaluators: [eval1, eval2],
|
|
1315
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
1316
|
+
)
|
|
1317
|
+
|
|
1318
|
+
result = agent.decide(context: {})
|
|
1319
|
+
|
|
1320
|
+
# Should handle small weights without precision errors
|
|
1321
|
+
expect(result.decision).to be_a(String)
|
|
1322
|
+
expect(result.confidence).to be_between(0.0, 1.0)
|
|
1323
|
+
end
|
|
1324
|
+
|
|
1325
|
+
it "handles weights that sum to slightly above 1.0 due to precision" do
|
|
1326
|
+
eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1327
|
+
decision: "approve",
|
|
1328
|
+
weight: 0.7,
|
|
1329
|
+
name: "Eval1"
|
|
1330
|
+
)
|
|
1331
|
+
|
|
1332
|
+
eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1333
|
+
decision: "approve",
|
|
1334
|
+
weight: 0.3 + 0.0000001, # Slightly above to create >1.0 sum
|
|
1335
|
+
name: "Eval2"
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
agent = DecisionAgent::Agent.new(
|
|
1339
|
+
evaluators: [eval1, eval2],
|
|
1340
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
1341
|
+
)
|
|
1342
|
+
|
|
1343
|
+
result = agent.decide(context: {})
|
|
1344
|
+
|
|
1345
|
+
# Should normalize to 1.0 or below
|
|
1346
|
+
expect(result.confidence).to be <= 1.0
|
|
1347
|
+
end
|
|
1348
|
+
|
|
1349
|
+
it "handles Consensus with floating point agreement rates" do
|
|
1350
|
+
# Create evaluators where agreement is not a clean fraction
|
|
1351
|
+
evaluators = 7.times.map do |i|
|
|
1352
|
+
DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1353
|
+
decision: i < 4 ? "approve" : "reject", # 4/7 = 0.571428...
|
|
1354
|
+
weight: 0.6,
|
|
1355
|
+
name: "Eval#{i}"
|
|
1356
|
+
)
|
|
1357
|
+
end
|
|
1358
|
+
|
|
1359
|
+
agent = DecisionAgent::Agent.new(
|
|
1360
|
+
evaluators: evaluators,
|
|
1361
|
+
scoring_strategy: DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.57)
|
|
1362
|
+
)
|
|
1363
|
+
|
|
1364
|
+
result = agent.decide(context: {})
|
|
1365
|
+
|
|
1366
|
+
# Should handle fractional agreement correctly
|
|
1367
|
+
expect(result.decision).to eq("approve")
|
|
1368
|
+
expect(result.confidence).to be_a(Float)
|
|
1369
|
+
end
|
|
1370
|
+
end
|
|
1371
|
+
end
|
|
1372
|
+
|
|
1373
|
+
# ============================================================
|
|
1374
|
+
# Decision Replay Edge Cases
|
|
1375
|
+
# ============================================================
|
|
1376
|
+
|
|
1377
|
+
describe "decision replay edge cases" do
|
|
1378
|
+
describe "strict mode behavior" do
|
|
1379
|
+
it "detects confidence differences above tolerance in strict mode" do
|
|
1380
|
+
evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1381
|
+
decision: "approve",
|
|
1382
|
+
weight: 0.8
|
|
1383
|
+
)
|
|
1384
|
+
|
|
1385
|
+
agent = DecisionAgent::Agent.new(evaluators: [evaluator])
|
|
1386
|
+
|
|
1387
|
+
context = { user: "alice" }
|
|
1388
|
+
original_result = agent.decide(context: context)
|
|
1389
|
+
|
|
1390
|
+
modified_payload = original_result.audit_payload.dup
|
|
1391
|
+
# Tolerance is 0.0001, so this should trigger an error
|
|
1392
|
+
modified_payload[:confidence] = original_result.confidence + 0.001
|
|
1393
|
+
|
|
1394
|
+
expect {
|
|
1395
|
+
DecisionAgent::Replay.run(modified_payload, strict: true)
|
|
1396
|
+
}.to raise_error(DecisionAgent::ReplayMismatchError)
|
|
1397
|
+
end
|
|
1398
|
+
|
|
1399
|
+
it "passes when confidence is identical in strict mode" do
|
|
1400
|
+
evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1401
|
+
decision: "approve",
|
|
1402
|
+
weight: 0.8
|
|
1403
|
+
)
|
|
1404
|
+
|
|
1405
|
+
agent = DecisionAgent::Agent.new(evaluators: [evaluator])
|
|
1406
|
+
|
|
1407
|
+
context = { user: "alice" }
|
|
1408
|
+
original_result = agent.decide(context: context)
|
|
1409
|
+
|
|
1410
|
+
expect {
|
|
1411
|
+
DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
|
|
1412
|
+
}.not_to raise_error
|
|
1413
|
+
end
|
|
1414
|
+
|
|
1415
|
+
it "allows confidence within tolerance in strict mode" do
|
|
1416
|
+
evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1417
|
+
decision: "approve",
|
|
1418
|
+
weight: 0.8
|
|
1419
|
+
)
|
|
1420
|
+
|
|
1421
|
+
agent = DecisionAgent::Agent.new(evaluators: [evaluator])
|
|
1422
|
+
|
|
1423
|
+
context = { user: "alice" }
|
|
1424
|
+
original_result = agent.decide(context: context)
|
|
1425
|
+
|
|
1426
|
+
modified_payload = original_result.audit_payload.dup
|
|
1427
|
+
# Tolerance is 0.0001, so this should NOT trigger an error
|
|
1428
|
+
modified_payload[:confidence] = original_result.confidence + 0.00005
|
|
1429
|
+
|
|
1430
|
+
expect {
|
|
1431
|
+
DecisionAgent::Replay.run(modified_payload, strict: true)
|
|
1432
|
+
}.not_to raise_error
|
|
1433
|
+
end
|
|
1434
|
+
end
|
|
1435
|
+
|
|
1436
|
+
describe "non-strict mode behavior" do
|
|
1437
|
+
it "logs but doesn't raise on decision mismatch" do
|
|
1438
|
+
evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1439
|
+
decision: "approve",
|
|
1440
|
+
weight: 0.8
|
|
1441
|
+
)
|
|
1442
|
+
|
|
1443
|
+
agent = DecisionAgent::Agent.new(evaluators: [evaluator])
|
|
1444
|
+
|
|
1445
|
+
context = { user: "alice" }
|
|
1446
|
+
original_result = agent.decide(context: context)
|
|
1447
|
+
|
|
1448
|
+
modified_payload = original_result.audit_payload.dup
|
|
1449
|
+
modified_payload[:decision] = "reject"
|
|
1450
|
+
|
|
1451
|
+
result = nil
|
|
1452
|
+
expect {
|
|
1453
|
+
result = DecisionAgent::Replay.run(modified_payload, strict: false)
|
|
1454
|
+
}.to output(/Decision changed/).to_stderr
|
|
1455
|
+
|
|
1456
|
+
expect(result).not_to be_nil
|
|
1457
|
+
end
|
|
1458
|
+
|
|
1459
|
+
it "logs but doesn't raise on confidence mismatch" do
|
|
1460
|
+
evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1461
|
+
decision: "approve",
|
|
1462
|
+
weight: 0.8
|
|
1463
|
+
)
|
|
1464
|
+
|
|
1465
|
+
agent = DecisionAgent::Agent.new(evaluators: [evaluator])
|
|
1466
|
+
|
|
1467
|
+
context = { user: "alice" }
|
|
1468
|
+
original_result = agent.decide(context: context)
|
|
1469
|
+
|
|
1470
|
+
modified_payload = original_result.audit_payload.dup
|
|
1471
|
+
modified_payload[:confidence] = 0.5
|
|
1472
|
+
|
|
1473
|
+
result = nil
|
|
1474
|
+
expect {
|
|
1475
|
+
result = DecisionAgent::Replay.run(modified_payload, strict: false)
|
|
1476
|
+
}.to output(/Confidence changed/).to_stderr
|
|
1477
|
+
|
|
1478
|
+
expect(result).not_to be_nil
|
|
1479
|
+
end
|
|
1480
|
+
|
|
1481
|
+
it "continues with multiple mismatches in non-strict mode" do
|
|
1482
|
+
evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1483
|
+
decision: "approve",
|
|
1484
|
+
weight: 0.8
|
|
1485
|
+
)
|
|
1486
|
+
|
|
1487
|
+
agent = DecisionAgent::Agent.new(evaluators: [evaluator])
|
|
1488
|
+
|
|
1489
|
+
context = { user: "alice" }
|
|
1490
|
+
original_result = agent.decide(context: context)
|
|
1491
|
+
|
|
1492
|
+
modified_payload = original_result.audit_payload.dup
|
|
1493
|
+
modified_payload[:decision] = "reject"
|
|
1494
|
+
modified_payload[:confidence] = 0.3
|
|
1495
|
+
|
|
1496
|
+
result = nil
|
|
1497
|
+
expect {
|
|
1498
|
+
result = DecisionAgent::Replay.run(modified_payload, strict: false)
|
|
1499
|
+
}.to output(/Decision changed.*Confidence changed/m).to_stderr
|
|
1500
|
+
|
|
1501
|
+
expect(result.decision).to eq("approve") # Should use replayed value
|
|
1502
|
+
end
|
|
1503
|
+
end
|
|
1504
|
+
|
|
1505
|
+
describe "replay validation errors" do
|
|
1506
|
+
it "raises error when context is missing" do
|
|
1507
|
+
invalid_payload = {
|
|
1508
|
+
decision: "approve",
|
|
1509
|
+
confidence: 0.8,
|
|
1510
|
+
evaluations: []
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
expect {
|
|
1514
|
+
DecisionAgent::Replay.run(invalid_payload, strict: true)
|
|
1515
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
|
|
1516
|
+
end
|
|
1517
|
+
|
|
1518
|
+
it "raises error when decision is missing" do
|
|
1519
|
+
invalid_payload = {
|
|
1520
|
+
context: {},
|
|
1521
|
+
confidence: 0.8,
|
|
1522
|
+
evaluations: []
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
expect {
|
|
1526
|
+
DecisionAgent::Replay.run(invalid_payload, strict: true)
|
|
1527
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
|
|
1528
|
+
end
|
|
1529
|
+
|
|
1530
|
+
it "raises error when confidence is missing" do
|
|
1531
|
+
invalid_payload = {
|
|
1532
|
+
context: {},
|
|
1533
|
+
decision: "approve",
|
|
1534
|
+
evaluations: []
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
expect {
|
|
1538
|
+
DecisionAgent::Replay.run(invalid_payload, strict: true)
|
|
1539
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
|
|
1540
|
+
end
|
|
1541
|
+
|
|
1542
|
+
it "raises error when evaluations is missing" do
|
|
1543
|
+
invalid_payload = {
|
|
1544
|
+
context: {},
|
|
1545
|
+
decision: "approve",
|
|
1546
|
+
confidence: 0.8
|
|
1547
|
+
}
|
|
1548
|
+
|
|
1549
|
+
expect {
|
|
1550
|
+
DecisionAgent::Replay.run(invalid_payload, strict: true)
|
|
1551
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
|
|
1552
|
+
end
|
|
1553
|
+
|
|
1554
|
+
it "handles empty audit payload" do
|
|
1555
|
+
expect {
|
|
1556
|
+
DecisionAgent::Replay.run({}, strict: true)
|
|
1557
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError)
|
|
1558
|
+
end
|
|
1559
|
+
end
|
|
1560
|
+
|
|
1561
|
+
describe "replay with different scoring strategies" do
|
|
1562
|
+
it "correctly replays with WeightedAverage strategy" do
|
|
1563
|
+
evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1564
|
+
decision: "approve",
|
|
1565
|
+
weight: 0.8
|
|
1566
|
+
)
|
|
1567
|
+
|
|
1568
|
+
agent = DecisionAgent::Agent.new(
|
|
1569
|
+
evaluators: [evaluator],
|
|
1570
|
+
scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
|
|
1571
|
+
)
|
|
1572
|
+
|
|
1573
|
+
original_result = agent.decide(context: { test: true })
|
|
1574
|
+
|
|
1575
|
+
replayed_result = DecisionAgent::Replay.run(
|
|
1576
|
+
original_result.audit_payload,
|
|
1577
|
+
strict: true
|
|
1578
|
+
)
|
|
1579
|
+
|
|
1580
|
+
expect(replayed_result.decision).to eq(original_result.decision)
|
|
1581
|
+
expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
|
|
1582
|
+
end
|
|
1583
|
+
|
|
1584
|
+
it "correctly replays with Threshold strategy" do
|
|
1585
|
+
evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1586
|
+
decision: "approve",
|
|
1587
|
+
weight: 0.9
|
|
1588
|
+
)
|
|
1589
|
+
|
|
1590
|
+
agent = DecisionAgent::Agent.new(
|
|
1591
|
+
evaluators: [evaluator],
|
|
1592
|
+
scoring_strategy: DecisionAgent::Scoring::Threshold.new(
|
|
1593
|
+
threshold: 0.8,
|
|
1594
|
+
fallback_decision: "review"
|
|
1595
|
+
)
|
|
1596
|
+
)
|
|
1597
|
+
|
|
1598
|
+
original_result = agent.decide(context: { test: true })
|
|
1599
|
+
|
|
1600
|
+
replayed_result = DecisionAgent::Replay.run(
|
|
1601
|
+
original_result.audit_payload,
|
|
1602
|
+
strict: true
|
|
1603
|
+
)
|
|
1604
|
+
|
|
1605
|
+
expect(replayed_result.decision).to eq(original_result.decision)
|
|
1606
|
+
expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
|
|
1607
|
+
end
|
|
1608
|
+
end
|
|
1609
|
+
end
|
|
1610
|
+
|
|
1611
|
+
# ============================================================
|
|
1612
|
+
# Error Handling Edge Cases
|
|
1613
|
+
# ============================================================
|
|
1614
|
+
|
|
1615
|
+
describe "error handling edge cases" do
|
|
1616
|
+
describe "invalid JSON rule formats" do
|
|
1617
|
+
it "raises error for non-hash JSON" do
|
|
1618
|
+
expect {
|
|
1619
|
+
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: [1, 2, 3])
|
|
1620
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError)
|
|
1621
|
+
end
|
|
1622
|
+
|
|
1623
|
+
it "raises error for string input instead of hash" do
|
|
1624
|
+
expect {
|
|
1625
|
+
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: "not a hash")
|
|
1626
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError)
|
|
1627
|
+
end
|
|
1628
|
+
|
|
1629
|
+
it "raises error when rules is not an array" do
|
|
1630
|
+
rules = {
|
|
1631
|
+
version: "1.0",
|
|
1632
|
+
rules: "not an array"
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
expect {
|
|
1636
|
+
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
1637
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError)
|
|
1638
|
+
end
|
|
1639
|
+
|
|
1640
|
+
it "raises error when rule is not a hash" do
|
|
1641
|
+
rules = {
|
|
1642
|
+
version: "1.0",
|
|
1643
|
+
rules: ["not a hash", "also not a hash"]
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1646
|
+
expect {
|
|
1647
|
+
DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
|
|
1648
|
+
}.to raise_error(DecisionAgent::InvalidRuleDslError)
|
|
1649
|
+
end
|
|
1650
|
+
end
|
|
1651
|
+
|
|
1652
|
+
describe "no evaluations scenarios" do
|
|
1653
|
+
it "raises NoEvaluationsError when single evaluator returns nil" do
|
|
1654
|
+
nil_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1655
|
+
def evaluate(context, feedback: {})
|
|
1656
|
+
nil
|
|
1657
|
+
end
|
|
1658
|
+
end
|
|
1659
|
+
|
|
1660
|
+
agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator.new])
|
|
1661
|
+
|
|
1662
|
+
expect {
|
|
1663
|
+
agent.decide(context: {})
|
|
1664
|
+
}.to raise_error(DecisionAgent::NoEvaluationsError)
|
|
1665
|
+
end
|
|
1666
|
+
|
|
1667
|
+
it "raises NoEvaluationsError when all evaluators return nil" do
|
|
1668
|
+
nil_evaluator1 = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1669
|
+
def evaluate(context, feedback: {})
|
|
1670
|
+
nil
|
|
1671
|
+
end
|
|
1672
|
+
end
|
|
1673
|
+
|
|
1674
|
+
nil_evaluator2 = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1675
|
+
def evaluate(context, feedback: {})
|
|
1676
|
+
nil
|
|
1677
|
+
end
|
|
1678
|
+
end
|
|
1679
|
+
|
|
1680
|
+
agent = DecisionAgent::Agent.new(evaluators: [nil_evaluator1.new, nil_evaluator2.new])
|
|
1681
|
+
|
|
1682
|
+
expect {
|
|
1683
|
+
agent.decide(context: {})
|
|
1684
|
+
}.to raise_error(DecisionAgent::NoEvaluationsError) do |error|
|
|
1685
|
+
expect(error.message).to include("No evaluators returned a decision")
|
|
1686
|
+
end
|
|
1687
|
+
end
|
|
1688
|
+
|
|
1689
|
+
it "raises NoEvaluationsError when all evaluators raise exceptions" do
|
|
1690
|
+
failing_evaluator1 = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1691
|
+
def evaluate(context, feedback: {})
|
|
1692
|
+
raise StandardError, "Error 1"
|
|
1693
|
+
end
|
|
1694
|
+
end
|
|
1695
|
+
|
|
1696
|
+
failing_evaluator2 = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1697
|
+
def evaluate(context, feedback: {})
|
|
1698
|
+
raise StandardError, "Error 2"
|
|
1699
|
+
end
|
|
1700
|
+
end
|
|
1701
|
+
|
|
1702
|
+
agent = DecisionAgent::Agent.new(evaluators: [failing_evaluator1.new, failing_evaluator2.new])
|
|
1703
|
+
|
|
1704
|
+
expect {
|
|
1705
|
+
agent.decide(context: {})
|
|
1706
|
+
}.to raise_error(DecisionAgent::NoEvaluationsError)
|
|
1707
|
+
end
|
|
1708
|
+
|
|
1709
|
+
it "succeeds when at least one evaluator succeeds despite others failing" do
|
|
1710
|
+
failing_evaluator = Class.new(DecisionAgent::Evaluators::Base) do
|
|
1711
|
+
def evaluate(context, feedback: {})
|
|
1712
|
+
raise StandardError, "Intentional failure"
|
|
1713
|
+
end
|
|
1714
|
+
end
|
|
1715
|
+
|
|
1716
|
+
good_evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
|
|
1717
|
+
decision: "approve",
|
|
1718
|
+
weight: 0.8
|
|
1719
|
+
)
|
|
1720
|
+
|
|
1721
|
+
agent = DecisionAgent::Agent.new(evaluators: [failing_evaluator.new, good_evaluator])
|
|
1722
|
+
|
|
1723
|
+
result = agent.decide(context: {})
|
|
1724
|
+
|
|
1725
|
+
expect(result.decision).to eq("approve")
|
|
1726
|
+
end
|
|
1727
|
+
end
|
|
1728
|
+
|
|
1729
|
+
describe "boundary condition validation" do
|
|
1730
|
+
it "validates weight is not above 1.0" do
|
|
1731
|
+
expect {
|
|
1732
|
+
DecisionAgent::Evaluation.new(
|
|
1733
|
+
decision: "test",
|
|
1734
|
+
weight: 1.1,
|
|
1735
|
+
reason: "test",
|
|
1736
|
+
evaluator_name: "test"
|
|
1737
|
+
)
|
|
1738
|
+
}.to raise_error(DecisionAgent::InvalidWeightError)
|
|
1739
|
+
end
|
|
1740
|
+
|
|
1741
|
+
it "validates weight is not negative" do
|
|
1742
|
+
expect {
|
|
1743
|
+
DecisionAgent::Evaluation.new(
|
|
1744
|
+
decision: "test",
|
|
1745
|
+
weight: -0.5,
|
|
1746
|
+
reason: "test",
|
|
1747
|
+
evaluator_name: "test"
|
|
1748
|
+
)
|
|
1749
|
+
}.to raise_error(DecisionAgent::InvalidWeightError)
|
|
1750
|
+
end
|
|
1751
|
+
|
|
1752
|
+
it "validates confidence is not above 1.0" do
|
|
1753
|
+
expect {
|
|
1754
|
+
DecisionAgent::Decision.new(
|
|
1755
|
+
decision: "test",
|
|
1756
|
+
confidence: 1.001,
|
|
1757
|
+
explanations: [],
|
|
1758
|
+
evaluations: [],
|
|
1759
|
+
audit_payload: {}
|
|
1760
|
+
)
|
|
1761
|
+
}.to raise_error(DecisionAgent::InvalidConfidenceError)
|
|
1762
|
+
end
|
|
1763
|
+
|
|
1764
|
+
it "validates confidence is not negative" do
|
|
1765
|
+
expect {
|
|
1766
|
+
DecisionAgent::Decision.new(
|
|
1767
|
+
decision: "test",
|
|
1768
|
+
confidence: -0.001,
|
|
1769
|
+
explanations: [],
|
|
1770
|
+
evaluations: [],
|
|
1771
|
+
audit_payload: {}
|
|
1772
|
+
)
|
|
1773
|
+
}.to raise_error(DecisionAgent::InvalidConfidenceError)
|
|
1774
|
+
end
|
|
1775
|
+
end
|
|
1776
|
+
end
|
|
1777
|
+
end
|