decision_agent 0.3.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +272 -7
  3. data/lib/decision_agent/agent.rb +72 -1
  4. data/lib/decision_agent/context.rb +1 -0
  5. data/lib/decision_agent/data_enrichment/cache/memory_adapter.rb +86 -0
  6. data/lib/decision_agent/data_enrichment/cache_adapter.rb +49 -0
  7. data/lib/decision_agent/data_enrichment/circuit_breaker.rb +135 -0
  8. data/lib/decision_agent/data_enrichment/client.rb +220 -0
  9. data/lib/decision_agent/data_enrichment/config.rb +78 -0
  10. data/lib/decision_agent/data_enrichment/errors.rb +36 -0
  11. data/lib/decision_agent/decision.rb +102 -2
  12. data/lib/decision_agent/dmn/feel/evaluator.rb +28 -6
  13. data/lib/decision_agent/dsl/condition_evaluator.rb +982 -839
  14. data/lib/decision_agent/dsl/schema_validator.rb +51 -13
  15. data/lib/decision_agent/evaluators/dmn_evaluator.rb +106 -19
  16. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +69 -9
  17. data/lib/decision_agent/explainability/condition_trace.rb +83 -0
  18. data/lib/decision_agent/explainability/explainability_result.rb +52 -0
  19. data/lib/decision_agent/explainability/rule_trace.rb +39 -0
  20. data/lib/decision_agent/explainability/trace_collector.rb +24 -0
  21. data/lib/decision_agent/monitoring/alert_manager.rb +5 -1
  22. data/lib/decision_agent/simulation/errors.rb +18 -0
  23. data/lib/decision_agent/simulation/impact_analyzer.rb +498 -0
  24. data/lib/decision_agent/simulation/monte_carlo_simulator.rb +635 -0
  25. data/lib/decision_agent/simulation/replay_engine.rb +486 -0
  26. data/lib/decision_agent/simulation/scenario_engine.rb +318 -0
  27. data/lib/decision_agent/simulation/scenario_library.rb +163 -0
  28. data/lib/decision_agent/simulation/shadow_test_engine.rb +287 -0
  29. data/lib/decision_agent/simulation/what_if_analyzer.rb +1002 -0
  30. data/lib/decision_agent/simulation.rb +17 -0
  31. data/lib/decision_agent/version.rb +1 -1
  32. data/lib/decision_agent/versioning/activerecord_adapter.rb +23 -8
  33. data/lib/decision_agent/web/public/app.js +119 -0
  34. data/lib/decision_agent/web/public/index.html +49 -0
  35. data/lib/decision_agent/web/public/simulation.html +130 -0
  36. data/lib/decision_agent/web/public/simulation_impact.html +478 -0
  37. data/lib/decision_agent/web/public/simulation_replay.html +551 -0
  38. data/lib/decision_agent/web/public/simulation_shadow.html +546 -0
  39. data/lib/decision_agent/web/public/simulation_whatif.html +532 -0
  40. data/lib/decision_agent/web/public/styles.css +65 -0
  41. data/lib/decision_agent/web/server.rb +594 -23
  42. data/lib/decision_agent.rb +60 -2
  43. metadata +53 -73
  44. data/spec/ab_testing/ab_test_assignment_spec.rb +0 -253
  45. data/spec/ab_testing/ab_test_manager_spec.rb +0 -612
  46. data/spec/ab_testing/ab_test_spec.rb +0 -270
  47. data/spec/ab_testing/ab_testing_agent_spec.rb +0 -655
  48. data/spec/ab_testing/storage/adapter_spec.rb +0 -64
  49. data/spec/ab_testing/storage/memory_adapter_spec.rb +0 -485
  50. data/spec/activerecord_thread_safety_spec.rb +0 -553
  51. data/spec/advanced_operators_spec.rb +0 -3150
  52. data/spec/agent_spec.rb +0 -289
  53. data/spec/api_contract_spec.rb +0 -430
  54. data/spec/audit_adapters_spec.rb +0 -92
  55. data/spec/auth/access_audit_logger_spec.rb +0 -394
  56. data/spec/auth/authenticator_spec.rb +0 -112
  57. data/spec/auth/password_reset_spec.rb +0 -294
  58. data/spec/auth/permission_checker_spec.rb +0 -207
  59. data/spec/auth/permission_spec.rb +0 -73
  60. data/spec/auth/rbac_adapter_spec.rb +0 -778
  61. data/spec/auth/rbac_config_spec.rb +0 -82
  62. data/spec/auth/role_spec.rb +0 -51
  63. data/spec/auth/session_manager_spec.rb +0 -172
  64. data/spec/auth/session_spec.rb +0 -112
  65. data/spec/auth/user_spec.rb +0 -130
  66. data/spec/comprehensive_edge_cases_spec.rb +0 -1777
  67. data/spec/context_spec.rb +0 -127
  68. data/spec/decision_agent_spec.rb +0 -96
  69. data/spec/decision_spec.rb +0 -423
  70. data/spec/dmn/decision_graph_spec.rb +0 -282
  71. data/spec/dmn/decision_tree_spec.rb +0 -203
  72. data/spec/dmn/feel/errors_spec.rb +0 -18
  73. data/spec/dmn/feel/functions_spec.rb +0 -400
  74. data/spec/dmn/feel/simple_parser_spec.rb +0 -274
  75. data/spec/dmn/feel/types_spec.rb +0 -176
  76. data/spec/dmn/feel_parser_spec.rb +0 -489
  77. data/spec/dmn/hit_policy_spec.rb +0 -202
  78. data/spec/dmn/integration_spec.rb +0 -226
  79. data/spec/dsl/condition_evaluator_spec.rb +0 -774
  80. data/spec/dsl_validation_spec.rb +0 -648
  81. data/spec/edge_cases_spec.rb +0 -353
  82. data/spec/evaluation_spec.rb +0 -364
  83. data/spec/evaluation_validator_spec.rb +0 -165
  84. data/spec/examples/feedback_aware_evaluator_spec.rb +0 -460
  85. data/spec/examples.txt +0 -1909
  86. data/spec/fixtures/dmn/complex_decision.dmn +0 -81
  87. data/spec/fixtures/dmn/invalid_structure.dmn +0 -31
  88. data/spec/fixtures/dmn/simple_decision.dmn +0 -40
  89. data/spec/issue_verification_spec.rb +0 -759
  90. data/spec/json_rule_evaluator_spec.rb +0 -587
  91. data/spec/monitoring/alert_manager_spec.rb +0 -378
  92. data/spec/monitoring/metrics_collector_spec.rb +0 -501
  93. data/spec/monitoring/monitored_agent_spec.rb +0 -225
  94. data/spec/monitoring/prometheus_exporter_spec.rb +0 -242
  95. data/spec/monitoring/storage/activerecord_adapter_spec.rb +0 -498
  96. data/spec/monitoring/storage/base_adapter_spec.rb +0 -61
  97. data/spec/monitoring/storage/memory_adapter_spec.rb +0 -247
  98. data/spec/performance_optimizations_spec.rb +0 -493
  99. data/spec/replay_edge_cases_spec.rb +0 -699
  100. data/spec/replay_spec.rb +0 -210
  101. data/spec/rfc8785_canonicalization_spec.rb +0 -215
  102. data/spec/scoring_spec.rb +0 -225
  103. data/spec/spec_helper.rb +0 -60
  104. data/spec/testing/batch_test_importer_spec.rb +0 -693
  105. data/spec/testing/batch_test_runner_spec.rb +0 -307
  106. data/spec/testing/test_coverage_analyzer_spec.rb +0 -292
  107. data/spec/testing/test_result_comparator_spec.rb +0 -392
  108. data/spec/testing/test_scenario_spec.rb +0 -113
  109. data/spec/thread_safety_spec.rb +0 -490
  110. data/spec/thread_safety_spec.rb.broken +0 -878
  111. data/spec/versioning/adapter_spec.rb +0 -156
  112. data/spec/versioning_spec.rb +0 -1030
  113. data/spec/web/middleware/auth_middleware_spec.rb +0 -133
  114. data/spec/web/middleware/permission_middleware_spec.rb +0 -247
  115. data/spec/web_ui_rack_spec.rb +0 -2134
data/spec/replay_spec.rb DELETED
@@ -1,210 +0,0 @@
1
- require "spec_helper"
2
-
3
- RSpec.describe DecisionAgent::Replay do
4
- let(:evaluator) do
5
- DecisionAgent::Evaluators::StaticEvaluator.new(
6
- decision: "approve",
7
- weight: 0.8,
8
- reason: "Static approval"
9
- )
10
- end
11
-
12
- let(:agent) do
13
- DecisionAgent::Agent.new(evaluators: [evaluator])
14
- end
15
-
16
- describe ".run" do
17
- it "replays decision from audit payload in strict mode" do
18
- context = { user: "alice", action: "login" }
19
- original_result = agent.decide(context: context)
20
-
21
- replayed_result = DecisionAgent::Replay.run(
22
- original_result.audit_payload,
23
- strict: true
24
- )
25
-
26
- expect(replayed_result.decision).to eq(original_result.decision)
27
- expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
28
- end
29
-
30
- it "raises ReplayMismatchError in strict mode when decision differs" do
31
- context = { user: "alice" }
32
- original_result = agent.decide(context: context)
33
-
34
- modified_payload = original_result.audit_payload.dup
35
- modified_payload[:decision] = "reject"
36
-
37
- expect do
38
- DecisionAgent::Replay.run(modified_payload, strict: true)
39
- end.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
40
- expect(error.differences).to include(/decision mismatch/)
41
- expect(error.expected[:decision]).to eq("reject")
42
- expect(error.actual[:decision]).to eq("approve")
43
- end
44
- end
45
-
46
- it "raises ReplayMismatchError in strict mode when confidence differs" do
47
- context = { user: "alice" }
48
- original_result = agent.decide(context: context)
49
-
50
- modified_payload = original_result.audit_payload.dup
51
- modified_payload[:confidence] = 0.5
52
-
53
- expect do
54
- DecisionAgent::Replay.run(modified_payload, strict: true)
55
- end.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
56
- expect(error.differences).to include(/confidence mismatch/)
57
- end
58
- end
59
-
60
- it "allows differences in non-strict mode" do
61
- context = { user: "alice" }
62
- original_result = agent.decide(context: context)
63
-
64
- modified_payload = original_result.audit_payload.dup
65
- modified_payload[:decision] = "reject"
66
-
67
- expect do
68
- DecisionAgent::Replay.run(modified_payload, strict: false)
69
- end.not_to raise_error
70
- end
71
-
72
- it "logs differences in non-strict mode" do
73
- context = { user: "alice" }
74
- original_result = agent.decide(context: context)
75
-
76
- modified_payload = original_result.audit_payload.dup
77
- modified_payload[:decision] = "reject"
78
-
79
- expect do
80
- DecisionAgent::Replay.run(modified_payload, strict: false)
81
- end.to output(/Decision changed/).to_stderr
82
- end
83
-
84
- it "validates required fields in audit payload" do
85
- invalid_payload = { context: {} }
86
-
87
- expect do
88
- DecisionAgent::Replay.run(invalid_payload, strict: true)
89
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key/)
90
- end
91
-
92
- it "reconstructs evaluations from audit payload" do
93
- eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
94
- decision: "approve",
95
- weight: 0.7,
96
- reason: "Eval 1",
97
- name: "Evaluator1"
98
- )
99
- eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
100
- decision: "approve",
101
- weight: 0.9,
102
- reason: "Eval 2",
103
- name: "Evaluator2"
104
- )
105
-
106
- multi_agent = DecisionAgent::Agent.new(evaluators: [eval1, eval2])
107
- original_result = multi_agent.decide(context: { user: "bob" })
108
-
109
- replayed_result = DecisionAgent::Replay.run(
110
- original_result.audit_payload,
111
- strict: true
112
- )
113
-
114
- expect(replayed_result.evaluations.size).to eq(2)
115
- expect(replayed_result.evaluations.map(&:evaluator_name)).to match_array(%w[Evaluator1 Evaluator2])
116
- end
117
-
118
- it "uses correct scoring strategy from audit payload" do
119
- max_weight_agent = DecisionAgent::Agent.new(
120
- evaluators: [evaluator],
121
- scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
122
- )
123
-
124
- original_result = max_weight_agent.decide(context: { user: "charlie" })
125
-
126
- expect(original_result.audit_payload[:scoring_strategy]).to include("MaxWeight")
127
-
128
- replayed_result = DecisionAgent::Replay.run(
129
- original_result.audit_payload,
130
- strict: true
131
- )
132
-
133
- expect(replayed_result.decision).to eq(original_result.decision)
134
- end
135
-
136
- it "handles symbol and string keys in audit payload" do
137
- context = { user: "alice" }
138
- original_result = agent.decide(context: context)
139
-
140
- string_key_payload = JSON.parse(JSON.generate(original_result.audit_payload))
141
-
142
- replayed_result = DecisionAgent::Replay.run(
143
- string_key_payload,
144
- strict: true
145
- )
146
-
147
- expect(replayed_result.decision).to eq(original_result.decision)
148
- end
149
-
150
- it "preserves feedback in replay" do
151
- context = { user: "alice" }
152
- feedback = { source: "manual_override" }
153
-
154
- original_result = agent.decide(context: context, feedback: feedback)
155
-
156
- replayed_result = DecisionAgent::Replay.run(
157
- original_result.audit_payload,
158
- strict: true
159
- )
160
-
161
- expect(replayed_result.audit_payload[:feedback]).to eq(feedback)
162
- end
163
- end
164
-
165
- describe "deterministic replay" do
166
- it "produces identical results for identical inputs across multiple replays" do
167
- context = { user: "alice", priority: "high" }
168
- original_result = agent.decide(context: context)
169
-
170
- results = 5.times.map do
171
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
172
- end
173
-
174
- results.each do |result|
175
- expect(result.decision).to eq(original_result.decision)
176
- expect(result.confidence).to be_within(0.0001).of(original_result.confidence)
177
- end
178
- end
179
- end
180
-
181
- describe "complex scenario replay" do
182
- it "replays decisions from JSON rule evaluators" do
183
- rules = {
184
- version: "1.0",
185
- ruleset: "test",
186
- rules: [
187
- {
188
- id: "high_priority",
189
- if: { field: "priority", op: "eq", value: "high" },
190
- then: { decision: "escalate", weight: 0.9, reason: "High priority issue" }
191
- }
192
- ]
193
- }
194
-
195
- json_evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
196
- json_agent = DecisionAgent::Agent.new(evaluators: [json_evaluator])
197
-
198
- context = { priority: "high", user: "alice" }
199
- original_result = json_agent.decide(context: context)
200
-
201
- replayed_result = DecisionAgent::Replay.run(
202
- original_result.audit_payload,
203
- strict: true
204
- )
205
-
206
- expect(replayed_result.decision).to eq("escalate")
207
- expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
208
- end
209
- end
210
- end
@@ -1,215 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- RSpec.describe "RFC 8785 JSON Canonicalization" do
6
- let(:evaluator) do
7
- DecisionAgent::Evaluators::JsonRuleEvaluator.new(
8
- rules_json: {
9
- version: "1.0",
10
- ruleset: "test",
11
- rules: [
12
- {
13
- id: "always_approve",
14
- if: { field: "amount", op: "gte", value: 0 },
15
- then: { decision: "approve", weight: 1.0, reason: "Test rule" }
16
- }
17
- ]
18
- }
19
- )
20
- end
21
-
22
- let(:agent) { DecisionAgent::Agent.new(evaluators: [evaluator]) }
23
-
24
- describe "canonical JSON serialization" do
25
- it "produces deterministic hashes using RFC 8785" do
26
- # Same context should produce same hash every time
27
- context = { amount: 100, user: { id: 123, name: "Alice" } }
28
-
29
- decision1 = agent.decide(context: context)
30
- decision2 = agent.decide(context: context)
31
-
32
- hash1 = decision1.audit_payload[:deterministic_hash]
33
- hash2 = decision2.audit_payload[:deterministic_hash]
34
-
35
- expect(hash1).to eq(hash2)
36
- expect(hash1).to be_a(String)
37
- expect(hash1.length).to eq(64) # SHA256 produces 64 hex characters
38
- end
39
-
40
- it "produces different hashes for different contexts" do
41
- context1 = { amount: 100, user: { id: 123 } }
42
- context2 = { amount: 200, user: { id: 456 } }
43
-
44
- decision1 = agent.decide(context: context1)
45
- decision2 = agent.decide(context: context2)
46
-
47
- hash1 = decision1.audit_payload[:deterministic_hash]
48
- hash2 = decision2.audit_payload[:deterministic_hash]
49
-
50
- expect(hash1).not_to eq(hash2)
51
- end
52
-
53
- it "is insensitive to property order (canonicalization)" do
54
- # Different property order should produce same hash
55
- context1 = { amount: 100, user: { id: 123, name: "Alice" } }
56
- context2 = { user: { name: "Alice", id: 123 }, amount: 100 }
57
-
58
- decision1 = agent.decide(context: context1)
59
- decision2 = agent.decide(context: context2)
60
-
61
- hash1 = decision1.audit_payload[:deterministic_hash]
62
- hash2 = decision2.audit_payload[:deterministic_hash]
63
-
64
- expect(hash1).to eq(hash2), "RFC 8785 canonicalization should sort properties"
65
- end
66
-
67
- it "handles special characters correctly" do
68
- # Test Unicode, quotes, and control characters
69
- context = {
70
- amount: 100,
71
- note: "Test with \"quotes\", €uro, and \n newline"
72
- }
73
-
74
- decision = agent.decide(context: context)
75
- hash = decision.audit_payload[:deterministic_hash]
76
-
77
- expect(hash).to be_a(String)
78
- expect(hash.length).to eq(64)
79
- end
80
-
81
- it "handles floating point numbers deterministically" do
82
- # RFC 8785 specifies exact float serialization per IEEE 754
83
- # Note: 99.99 cannot be exactly represented in binary floating point
84
- context = { amount: 100, price: 99.99, tax: 0.075 }
85
-
86
- decision1 = agent.decide(context: context)
87
- decision2 = agent.decide(context: context)
88
-
89
- hash1 = decision1.audit_payload[:deterministic_hash]
90
- hash2 = decision2.audit_payload[:deterministic_hash]
91
-
92
- # Same context should always produce same hash
93
- expect(hash1).to eq(hash2), "RFC 8785 should produce consistent hashes for same values"
94
-
95
- # Verify RFC 8785 uses ECMAScript number serialization
96
- canonical = agent.send(:canonical_json, context)
97
- # RFC 8785 may represent 99.99 as 99.98999999999999 due to IEEE 754
98
- expect(canonical).to match(/99\.\d+/)
99
- expect(canonical).to include("0.075")
100
- end
101
-
102
- it "handles nested structures correctly" do
103
- context = {
104
- amount: 100,
105
- user: {
106
- id: 123,
107
- profile: {
108
- name: "Alice",
109
- tags: %w[premium verified]
110
- }
111
- }
112
- }
113
-
114
- decision = agent.decide(context: context)
115
- hash = decision.audit_payload[:deterministic_hash]
116
-
117
- expect(hash).to be_a(String)
118
- expect(hash.length).to eq(64)
119
- end
120
-
121
- it "handles arrays consistently" do
122
- # Array order should be preserved (not sorted)
123
- context1 = { amount: 100, tags: %w[a b c] }
124
- context2 = { amount: 100, tags: %w[c b a] }
125
-
126
- decision1 = agent.decide(context: context1)
127
- decision2 = agent.decide(context: context2)
128
-
129
- hash1 = decision1.audit_payload[:deterministic_hash]
130
- hash2 = decision2.audit_payload[:deterministic_hash]
131
-
132
- expect(hash1).not_to eq(hash2), "RFC 8785 preserves array order"
133
- end
134
-
135
- it "handles nil values correctly" do
136
- context = { amount: 100, optional_field: nil }
137
-
138
- decision = agent.decide(context: context)
139
- hash = decision.audit_payload[:deterministic_hash]
140
-
141
- expect(hash).to be_a(String)
142
- expect(hash.length).to eq(64)
143
- end
144
-
145
- it "handles boolean values correctly" do
146
- context = { amount: 100, is_verified: true, is_blocked: false }
147
-
148
- decision = agent.decide(context: context)
149
- hash = decision.audit_payload[:deterministic_hash]
150
-
151
- expect(hash).to be_a(String)
152
- expect(hash.length).to eq(64)
153
- end
154
-
155
- it "is thread-safe with concurrent hash computations" do
156
- contexts = 10.times.map { |i| { amount: i * 100, id: i } }
157
- results = []
158
- mutex = Mutex.new
159
-
160
- threads = contexts.map do |ctx|
161
- Thread.new do
162
- decision = agent.decide(context: ctx)
163
- hash = decision.audit_payload[:deterministic_hash]
164
- mutex.synchronize { results << hash }
165
- end
166
- end
167
-
168
- threads.each(&:join)
169
-
170
- expect(results.size).to eq(10)
171
- expect(results.uniq.size).to eq(10), "Each context should produce unique hash"
172
- results.each do |hash|
173
- expect(hash.length).to eq(64)
174
- end
175
- end
176
- end
177
-
178
- describe "RFC 8785 compliance" do
179
- it "uses json-canonicalization gem for canonicalization" do
180
- # Verify we're using the RFC 8785 implementation
181
- test_data = { b: 2, a: 1 }
182
- canonical = agent.send(:canonical_json, test_data)
183
-
184
- # RFC 8785 should sort keys: {"a":1,"b":2}
185
- expect(canonical).to include('"a":1')
186
- expect(canonical).to include('"b":2')
187
- expect(canonical.index('"a"')).to be < canonical.index('"b"')
188
- end
189
-
190
- it "produces compact JSON without whitespace" do
191
- test_data = { amount: 100, user: { id: 123 } }
192
- canonical = agent.send(:canonical_json, test_data)
193
-
194
- # RFC 8785 produces compact JSON
195
- expect(canonical).not_to include("\n")
196
- expect(canonical).not_to include(" ")
197
- end
198
- end
199
-
200
- describe "performance characteristics" do
201
- it "computes hashes efficiently" do
202
- context = {
203
- amount: 100,
204
- user: { id: 123, name: "Alice", tags: (1..100).to_a }
205
- }
206
-
207
- # Should complete quickly even with larger payloads
208
- start_time = Time.now
209
- 100.times { agent.decide(context: context) }
210
- elapsed = Time.now - start_time
211
-
212
- expect(elapsed).to be < 1.0, "100 decisions should complete in under 1 second"
213
- end
214
- end
215
- end
data/spec/scoring_spec.rb DELETED
@@ -1,225 +0,0 @@
1
- require "spec_helper"
2
-
3
- RSpec.describe "Scoring Strategies" do
4
- let(:eval1) do
5
- DecisionAgent::Evaluation.new(
6
- decision: "approve",
7
- weight: 0.6,
8
- reason: "Test 1",
9
- evaluator_name: "Eval1"
10
- )
11
- end
12
-
13
- let(:eval2) do
14
- DecisionAgent::Evaluation.new(
15
- decision: "approve",
16
- weight: 0.8,
17
- reason: "Test 2",
18
- evaluator_name: "Eval2"
19
- )
20
- end
21
-
22
- let(:eval3) do
23
- DecisionAgent::Evaluation.new(
24
- decision: "reject",
25
- weight: 0.5,
26
- reason: "Test 3",
27
- evaluator_name: "Eval3"
28
- )
29
- end
30
-
31
- describe DecisionAgent::Scoring::WeightedAverage do
32
- it "calculates weighted average for single decision" do
33
- strategy = DecisionAgent::Scoring::WeightedAverage.new
34
- result = strategy.score([eval1, eval2])
35
-
36
- expect(result[:decision]).to eq("approve")
37
- expect(result[:confidence]).to eq(1.0)
38
- end
39
-
40
- it "calculates weighted average with conflicts" do
41
- strategy = DecisionAgent::Scoring::WeightedAverage.new
42
- result = strategy.score([eval1, eval2, eval3])
43
-
44
- total_weight = 0.6 + 0.8 + 0.5
45
- approve_weight = 0.6 + 0.8
46
- expected_confidence = approve_weight / total_weight
47
-
48
- expect(result[:decision]).to eq("approve")
49
- expect(result[:confidence]).to be_within(0.0001).of(expected_confidence)
50
- end
51
-
52
- it "returns 0 confidence for empty evaluations" do
53
- strategy = DecisionAgent::Scoring::WeightedAverage.new
54
- result = strategy.score([])
55
-
56
- expect(result[:decision]).to be_nil
57
- expect(result[:confidence]).to eq(0.0)
58
- end
59
-
60
- it "normalizes confidence to [0, 1]" do
61
- strategy = DecisionAgent::Scoring::WeightedAverage.new
62
- result = strategy.score([eval1])
63
-
64
- expect(result[:confidence]).to be_between(0.0, 1.0)
65
- end
66
- end
67
-
68
- describe DecisionAgent::Scoring::MaxWeight do
69
- it "selects decision with maximum weight" do
70
- strategy = DecisionAgent::Scoring::MaxWeight.new
71
- result = strategy.score([eval1, eval2, eval3])
72
-
73
- expect(result[:decision]).to eq("approve")
74
- expect(result[:confidence]).to eq(0.8)
75
- end
76
-
77
- it "uses first evaluation when weights are equal" do
78
- eval_a = DecisionAgent::Evaluation.new(
79
- decision: "option_a",
80
- weight: 0.7,
81
- reason: "Test A",
82
- evaluator_name: "EvalA"
83
- )
84
- eval_b = DecisionAgent::Evaluation.new(
85
- decision: "option_b",
86
- weight: 0.7,
87
- reason: "Test B",
88
- evaluator_name: "EvalB"
89
- )
90
-
91
- strategy = DecisionAgent::Scoring::MaxWeight.new
92
- result = strategy.score([eval_a, eval_b])
93
-
94
- expect(%w[option_a option_b]).to include(result[:decision])
95
- expect(result[:confidence]).to eq(0.7)
96
- end
97
-
98
- it "returns 0 confidence for empty evaluations" do
99
- strategy = DecisionAgent::Scoring::MaxWeight.new
100
- result = strategy.score([])
101
-
102
- expect(result[:decision]).to be_nil
103
- expect(result[:confidence]).to eq(0.0)
104
- end
105
- end
106
-
107
- describe DecisionAgent::Scoring::Consensus do
108
- it "selects decision with highest agreement" do
109
- eval4 = DecisionAgent::Evaluation.new(
110
- decision: "approve",
111
- weight: 0.7,
112
- reason: "Test 4",
113
- evaluator_name: "Eval4"
114
- )
115
-
116
- strategy = DecisionAgent::Scoring::Consensus.new
117
- result = strategy.score([eval1, eval2, eval3, eval4])
118
-
119
- expect(result[:decision]).to eq("approve")
120
- end
121
-
122
- it "considers both agreement and weight" do
123
- low_weight_majority = [
124
- DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "A", evaluator_name: "E1"),
125
- DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "B", evaluator_name: "E2"),
126
- DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "C", evaluator_name: "E3")
127
- ]
128
-
129
- high_weight_minority = [
130
- DecisionAgent::Evaluation.new(decision: "reject", weight: 0.9, reason: "D", evaluator_name: "E4")
131
- ]
132
-
133
- strategy = DecisionAgent::Scoring::Consensus.new
134
- result = strategy.score(low_weight_majority + high_weight_minority)
135
-
136
- expect(result[:decision]).to eq("approve")
137
- end
138
-
139
- it "reduces confidence when minimum agreement not met" do
140
- eval_spread = [
141
- DecisionAgent::Evaluation.new(decision: "option_a", weight: 0.8, reason: "A", evaluator_name: "E1"),
142
- DecisionAgent::Evaluation.new(decision: "option_b", weight: 0.7, reason: "B", evaluator_name: "E2"),
143
- DecisionAgent::Evaluation.new(decision: "option_c", weight: 0.6, reason: "C", evaluator_name: "E3")
144
- ]
145
-
146
- strategy = DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.5)
147
- result = strategy.score(eval_spread)
148
-
149
- expect(result[:confidence]).to be < 0.5
150
- end
151
-
152
- it "allows custom minimum agreement threshold" do
153
- strategy = DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.7)
154
- result = strategy.score([eval1, eval2, eval3])
155
-
156
- expect(result[:decision]).to eq("approve")
157
- end
158
-
159
- it "returns 0 confidence for empty evaluations" do
160
- strategy = DecisionAgent::Scoring::Consensus.new
161
- result = strategy.score([])
162
-
163
- expect(result[:decision]).to be_nil
164
- expect(result[:confidence]).to eq(0.0)
165
- end
166
- end
167
-
168
- describe DecisionAgent::Scoring::Threshold do
169
- it "accepts decision when weight meets threshold" do
170
- strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.7)
171
- result = strategy.score([eval2])
172
-
173
- expect(result[:decision]).to eq("approve")
174
- expect(result[:confidence]).to eq(0.8)
175
- end
176
-
177
- it "returns fallback decision when weight below threshold" do
178
- strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.9, fallback_decision: "manual_review")
179
- result = strategy.score([eval2])
180
-
181
- expect(result[:decision]).to eq("manual_review")
182
- expect(result[:confidence]).to be < 0.9
183
- end
184
-
185
- it "uses average weight across evaluations with same decision" do
186
- strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.7)
187
- result = strategy.score([eval1, eval2])
188
-
189
- avg_weight = (0.6 + 0.8) / 2
190
- expect(result[:decision]).to eq("approve")
191
- expect(result[:confidence]).to eq(avg_weight)
192
- end
193
-
194
- it "uses default fallback decision" do
195
- strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.9)
196
- result = strategy.score([eval1])
197
-
198
- expect(result[:decision]).to eq("no_decision")
199
- end
200
-
201
- it "returns fallback for empty evaluations" do
202
- strategy = DecisionAgent::Scoring::Threshold.new(fallback_decision: "default")
203
- result = strategy.score([])
204
-
205
- expect(result[:decision]).to eq("default")
206
- expect(result[:confidence]).to eq(0.0)
207
- end
208
- end
209
-
210
- describe "confidence bounds" do
211
- it "ensures all strategies return confidence between 0 and 1" do
212
- strategies = [
213
- DecisionAgent::Scoring::WeightedAverage.new,
214
- DecisionAgent::Scoring::MaxWeight.new,
215
- DecisionAgent::Scoring::Consensus.new,
216
- DecisionAgent::Scoring::Threshold.new
217
- ]
218
-
219
- strategies.each do |strategy|
220
- result = strategy.score([eval1, eval2, eval3])
221
- expect(result[:confidence]).to be_between(0.0, 1.0)
222
- end
223
- end
224
- end
225
- end
data/spec/spec_helper.rb DELETED
@@ -1,60 +0,0 @@
1
- require "simplecov"
2
- SimpleCov.start do
3
- add_filter "/spec/"
4
- add_filter "/examples/"
5
- end
6
-
7
- require "decision_agent"
8
-
9
- # Load ActiveRecord for thread-safety and integration tests
10
- begin
11
- require "active_record"
12
- require "sqlite3"
13
- require "decision_agent/versioning/activerecord_adapter"
14
- rescue LoadError
15
- # ActiveRecord is optional - tests will be skipped if not available
16
- end
17
-
18
- # Store original value for cleanup
19
- # rubocop:disable Style/GlobalVars
20
- $original_disable_webui_permissions = nil
21
- # rubocop:enable Style/GlobalVars
22
-
23
- RSpec.configure do |config|
24
- config.expect_with :rspec do |expectations|
25
- expectations.include_chain_clauses_in_custom_matcher_descriptions = true
26
- end
27
-
28
- config.mock_with :rspec do |mocks|
29
- mocks.verify_partial_doubles = true
30
- end
31
-
32
- config.shared_context_metadata_behavior = :apply_to_host_groups
33
- config.filter_run_when_matching :focus
34
- config.example_status_persistence_file_path = "spec/examples.txt"
35
- config.disable_monkey_patching!
36
- config.warnings = true
37
-
38
- config.default_formatter = "doc" if config.files_to_run.one?
39
-
40
- config.order = :random
41
- Kernel.srand config.seed
42
-
43
- # Ensure permissions are enabled for tests
44
- config.before(:suite) do
45
- # rubocop:disable Style/GlobalVars
46
- $original_disable_webui_permissions = ENV.fetch("DISABLE_WEBUI_PERMISSIONS", nil)
47
- # rubocop:enable Style/GlobalVars
48
- ENV["DISABLE_WEBUI_PERMISSIONS"] = "false"
49
- end
50
-
51
- config.after(:suite) do
52
- # rubocop:disable Style/GlobalVars
53
- if $original_disable_webui_permissions
54
- ENV["DISABLE_WEBUI_PERMISSIONS"] = $original_disable_webui_permissions
55
- else
56
- ENV.delete("DISABLE_WEBUI_PERMISSIONS")
57
- end
58
- # rubocop:enable Style/GlobalVars
59
- end
60
- end