decision_agent 0.3.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -14
  3. data/lib/decision_agent/ab_testing/ab_test.rb +5 -1
  4. data/lib/decision_agent/ab_testing/ab_test_assignment.rb +2 -0
  5. data/lib/decision_agent/ab_testing/ab_test_manager.rb +2 -0
  6. data/lib/decision_agent/ab_testing/ab_testing_agent.rb +2 -0
  7. data/lib/decision_agent/ab_testing/storage/activerecord_adapter.rb +2 -13
  8. data/lib/decision_agent/ab_testing/storage/adapter.rb +2 -0
  9. data/lib/decision_agent/ab_testing/storage/memory_adapter.rb +2 -0
  10. data/lib/decision_agent/agent.rb +78 -9
  11. data/lib/decision_agent/audit/adapter.rb +2 -0
  12. data/lib/decision_agent/audit/logger_adapter.rb +2 -0
  13. data/lib/decision_agent/audit/null_adapter.rb +2 -0
  14. data/lib/decision_agent/auth/access_audit_logger.rb +2 -0
  15. data/lib/decision_agent/auth/authenticator.rb +2 -0
  16. data/lib/decision_agent/auth/password_reset_manager.rb +2 -0
  17. data/lib/decision_agent/auth/password_reset_token.rb +2 -0
  18. data/lib/decision_agent/auth/permission.rb +2 -0
  19. data/lib/decision_agent/auth/permission_checker.rb +2 -0
  20. data/lib/decision_agent/auth/rbac_adapter.rb +2 -0
  21. data/lib/decision_agent/auth/rbac_config.rb +2 -0
  22. data/lib/decision_agent/auth/role.rb +2 -0
  23. data/lib/decision_agent/auth/session.rb +2 -0
  24. data/lib/decision_agent/auth/session_manager.rb +2 -0
  25. data/lib/decision_agent/auth/user.rb +2 -0
  26. data/lib/decision_agent/context.rb +14 -0
  27. data/lib/decision_agent/decision.rb +113 -4
  28. data/lib/decision_agent/dmn/adapter.rb +2 -0
  29. data/lib/decision_agent/dmn/cache.rb +2 -2
  30. data/lib/decision_agent/dmn/decision_graph.rb +7 -7
  31. data/lib/decision_agent/dmn/decision_tree.rb +16 -8
  32. data/lib/decision_agent/dmn/errors.rb +2 -0
  33. data/lib/decision_agent/dmn/exporter.rb +2 -0
  34. data/lib/decision_agent/dmn/feel/evaluator.rb +130 -114
  35. data/lib/decision_agent/dmn/feel/functions.rb +2 -0
  36. data/lib/decision_agent/dmn/feel/parser.rb +2 -0
  37. data/lib/decision_agent/dmn/feel/simple_parser.rb +98 -77
  38. data/lib/decision_agent/dmn/feel/transformer.rb +56 -102
  39. data/lib/decision_agent/dmn/feel/types.rb +2 -0
  40. data/lib/decision_agent/dmn/importer.rb +2 -0
  41. data/lib/decision_agent/dmn/model.rb +2 -4
  42. data/lib/decision_agent/dmn/parser.rb +2 -0
  43. data/lib/decision_agent/dmn/testing.rb +3 -2
  44. data/lib/decision_agent/dmn/validator.rb +5 -3
  45. data/lib/decision_agent/dmn/visualizer.rb +7 -6
  46. data/lib/decision_agent/dsl/condition_evaluator.rb +242 -1375
  47. data/lib/decision_agent/dsl/helpers/cache_helpers.rb +82 -0
  48. data/lib/decision_agent/dsl/helpers/comparison_helpers.rb +98 -0
  49. data/lib/decision_agent/dsl/helpers/date_helpers.rb +91 -0
  50. data/lib/decision_agent/dsl/helpers/geospatial_helpers.rb +85 -0
  51. data/lib/decision_agent/dsl/helpers/operator_evaluation_helpers.rb +160 -0
  52. data/lib/decision_agent/dsl/helpers/parameter_parsing_helpers.rb +206 -0
  53. data/lib/decision_agent/dsl/helpers/template_helpers.rb +39 -0
  54. data/lib/decision_agent/dsl/helpers/utility_helpers.rb +45 -0
  55. data/lib/decision_agent/dsl/operators/base.rb +70 -0
  56. data/lib/decision_agent/dsl/operators/basic_comparison_operators.rb +80 -0
  57. data/lib/decision_agent/dsl/operators/collection_operators.rb +60 -0
  58. data/lib/decision_agent/dsl/operators/date_arithmetic_operators.rb +206 -0
  59. data/lib/decision_agent/dsl/operators/date_time_operators.rb +47 -0
  60. data/lib/decision_agent/dsl/operators/duration_operators.rb +149 -0
  61. data/lib/decision_agent/dsl/operators/financial_operators.rb +237 -0
  62. data/lib/decision_agent/dsl/operators/geospatial_operators.rb +106 -0
  63. data/lib/decision_agent/dsl/operators/mathematical_operators.rb +234 -0
  64. data/lib/decision_agent/dsl/operators/moving_window_operators.rb +135 -0
  65. data/lib/decision_agent/dsl/operators/numeric_operators.rb +120 -0
  66. data/lib/decision_agent/dsl/operators/rate_operators.rb +65 -0
  67. data/lib/decision_agent/dsl/operators/statistical_aggregations.rb +187 -0
  68. data/lib/decision_agent/dsl/operators/string_aggregations.rb +84 -0
  69. data/lib/decision_agent/dsl/operators/string_operators.rb +72 -0
  70. data/lib/decision_agent/dsl/operators/time_component_operators.rb +72 -0
  71. data/lib/decision_agent/dsl/rule_parser.rb +2 -0
  72. data/lib/decision_agent/dsl/schema_validator.rb +37 -14
  73. data/lib/decision_agent/errors.rb +2 -0
  74. data/lib/decision_agent/evaluation.rb +14 -2
  75. data/lib/decision_agent/evaluators/base.rb +2 -0
  76. data/lib/decision_agent/evaluators/dmn_evaluator.rb +108 -19
  77. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +56 -11
  78. data/lib/decision_agent/evaluators/static_evaluator.rb +2 -0
  79. data/lib/decision_agent/explainability/condition_trace.rb +85 -0
  80. data/lib/decision_agent/explainability/explainability_result.rb +50 -0
  81. data/lib/decision_agent/explainability/rule_trace.rb +41 -0
  82. data/lib/decision_agent/explainability/trace_collector.rb +26 -0
  83. data/lib/decision_agent/monitoring/alert_manager.rb +7 -16
  84. data/lib/decision_agent/monitoring/dashboard_server.rb +383 -250
  85. data/lib/decision_agent/monitoring/metrics_collector.rb +2 -0
  86. data/lib/decision_agent/monitoring/monitored_agent.rb +2 -0
  87. data/lib/decision_agent/monitoring/prometheus_exporter.rb +3 -1
  88. data/lib/decision_agent/replay/replay.rb +4 -1
  89. data/lib/decision_agent/scoring/base.rb +2 -0
  90. data/lib/decision_agent/scoring/consensus.rb +2 -0
  91. data/lib/decision_agent/scoring/max_weight.rb +2 -0
  92. data/lib/decision_agent/scoring/threshold.rb +2 -0
  93. data/lib/decision_agent/scoring/weighted_average.rb +2 -0
  94. data/lib/decision_agent/simulation/errors.rb +20 -0
  95. data/lib/decision_agent/simulation/impact_analyzer.rb +500 -0
  96. data/lib/decision_agent/simulation/monte_carlo_simulator.rb +638 -0
  97. data/lib/decision_agent/simulation/replay_engine.rb +488 -0
  98. data/lib/decision_agent/simulation/scenario_engine.rb +320 -0
  99. data/lib/decision_agent/simulation/scenario_library.rb +165 -0
  100. data/lib/decision_agent/simulation/shadow_test_engine.rb +274 -0
  101. data/lib/decision_agent/simulation/what_if_analyzer.rb +1008 -0
  102. data/lib/decision_agent/simulation.rb +19 -0
  103. data/lib/decision_agent/testing/batch_test_importer.rb +6 -2
  104. data/lib/decision_agent/testing/batch_test_runner.rb +5 -2
  105. data/lib/decision_agent/testing/test_coverage_analyzer.rb +2 -0
  106. data/lib/decision_agent/testing/test_result_comparator.rb +2 -0
  107. data/lib/decision_agent/testing/test_scenario.rb +2 -0
  108. data/lib/decision_agent/version.rb +3 -1
  109. data/lib/decision_agent/versioning/activerecord_adapter.rb +108 -43
  110. data/lib/decision_agent/versioning/adapter.rb +9 -0
  111. data/lib/decision_agent/versioning/file_storage_adapter.rb +19 -6
  112. data/lib/decision_agent/versioning/version_manager.rb +9 -0
  113. data/lib/decision_agent/web/dmn_editor/serialization.rb +74 -0
  114. data/lib/decision_agent/web/dmn_editor/xml_builder.rb +107 -0
  115. data/lib/decision_agent/web/dmn_editor.rb +8 -67
  116. data/lib/decision_agent/web/middleware/auth_middleware.rb +2 -0
  117. data/lib/decision_agent/web/middleware/permission_middleware.rb +3 -1
  118. data/lib/decision_agent/web/public/app.js +186 -26
  119. data/lib/decision_agent/web/public/batch_testing.html +80 -6
  120. data/lib/decision_agent/web/public/dmn-editor.html +2 -2
  121. data/lib/decision_agent/web/public/dmn-editor.js +74 -8
  122. data/lib/decision_agent/web/public/index.html +69 -3
  123. data/lib/decision_agent/web/public/login.html +1 -1
  124. data/lib/decision_agent/web/public/sample_batch.csv +11 -0
  125. data/lib/decision_agent/web/public/sample_impact.csv +11 -0
  126. data/lib/decision_agent/web/public/sample_replay.csv +11 -0
  127. data/lib/decision_agent/web/public/sample_rules.json +118 -0
  128. data/lib/decision_agent/web/public/sample_shadow.csv +11 -0
  129. data/lib/decision_agent/web/public/sample_whatif.csv +11 -0
  130. data/lib/decision_agent/web/public/simulation.html +146 -0
  131. data/lib/decision_agent/web/public/simulation_impact.html +495 -0
  132. data/lib/decision_agent/web/public/simulation_replay.html +547 -0
  133. data/lib/decision_agent/web/public/simulation_shadow.html +561 -0
  134. data/lib/decision_agent/web/public/simulation_whatif.html +549 -0
  135. data/lib/decision_agent/web/public/styles.css +65 -0
  136. data/lib/decision_agent/web/public/users.html +1 -1
  137. data/lib/decision_agent/web/rack_helpers.rb +106 -0
  138. data/lib/decision_agent/web/rack_request_helpers.rb +196 -0
  139. data/lib/decision_agent/web/server.rb +2126 -1374
  140. data/lib/decision_agent.rb +19 -1
  141. data/lib/generators/decision_agent/install/install_generator.rb +2 -0
  142. data/lib/generators/decision_agent/install/templates/ab_test_assignment_model.rb +2 -0
  143. data/lib/generators/decision_agent/install/templates/ab_test_model.rb +2 -0
  144. data/lib/generators/decision_agent/install/templates/ab_testing_migration.rb +2 -0
  145. data/lib/generators/decision_agent/install/templates/migration.rb +2 -0
  146. data/lib/generators/decision_agent/install/templates/rule.rb +2 -0
  147. data/lib/generators/decision_agent/install/templates/rule_version.rb +2 -0
  148. metadata +103 -89
  149. data/spec/ab_testing/ab_test_assignment_spec.rb +0 -253
  150. data/spec/ab_testing/ab_test_manager_spec.rb +0 -612
  151. data/spec/ab_testing/ab_test_spec.rb +0 -270
  152. data/spec/ab_testing/ab_testing_agent_spec.rb +0 -655
  153. data/spec/ab_testing/storage/adapter_spec.rb +0 -64
  154. data/spec/ab_testing/storage/memory_adapter_spec.rb +0 -485
  155. data/spec/activerecord_thread_safety_spec.rb +0 -553
  156. data/spec/advanced_operators_spec.rb +0 -3150
  157. data/spec/agent_spec.rb +0 -289
  158. data/spec/api_contract_spec.rb +0 -430
  159. data/spec/audit_adapters_spec.rb +0 -92
  160. data/spec/auth/access_audit_logger_spec.rb +0 -394
  161. data/spec/auth/authenticator_spec.rb +0 -112
  162. data/spec/auth/password_reset_spec.rb +0 -294
  163. data/spec/auth/permission_checker_spec.rb +0 -207
  164. data/spec/auth/permission_spec.rb +0 -73
  165. data/spec/auth/rbac_adapter_spec.rb +0 -778
  166. data/spec/auth/rbac_config_spec.rb +0 -82
  167. data/spec/auth/role_spec.rb +0 -51
  168. data/spec/auth/session_manager_spec.rb +0 -172
  169. data/spec/auth/session_spec.rb +0 -112
  170. data/spec/auth/user_spec.rb +0 -130
  171. data/spec/comprehensive_edge_cases_spec.rb +0 -1777
  172. data/spec/context_spec.rb +0 -127
  173. data/spec/decision_agent_spec.rb +0 -96
  174. data/spec/decision_spec.rb +0 -423
  175. data/spec/dmn/decision_graph_spec.rb +0 -282
  176. data/spec/dmn/decision_tree_spec.rb +0 -203
  177. data/spec/dmn/feel/errors_spec.rb +0 -18
  178. data/spec/dmn/feel/functions_spec.rb +0 -400
  179. data/spec/dmn/feel/simple_parser_spec.rb +0 -274
  180. data/spec/dmn/feel/types_spec.rb +0 -176
  181. data/spec/dmn/feel_parser_spec.rb +0 -489
  182. data/spec/dmn/hit_policy_spec.rb +0 -202
  183. data/spec/dmn/integration_spec.rb +0 -226
  184. data/spec/dsl/condition_evaluator_spec.rb +0 -774
  185. data/spec/dsl_validation_spec.rb +0 -648
  186. data/spec/edge_cases_spec.rb +0 -353
  187. data/spec/evaluation_spec.rb +0 -364
  188. data/spec/evaluation_validator_spec.rb +0 -165
  189. data/spec/examples/feedback_aware_evaluator_spec.rb +0 -460
  190. data/spec/examples.txt +0 -1909
  191. data/spec/fixtures/dmn/complex_decision.dmn +0 -81
  192. data/spec/fixtures/dmn/invalid_structure.dmn +0 -31
  193. data/spec/fixtures/dmn/simple_decision.dmn +0 -40
  194. data/spec/issue_verification_spec.rb +0 -759
  195. data/spec/json_rule_evaluator_spec.rb +0 -587
  196. data/spec/monitoring/alert_manager_spec.rb +0 -378
  197. data/spec/monitoring/metrics_collector_spec.rb +0 -501
  198. data/spec/monitoring/monitored_agent_spec.rb +0 -225
  199. data/spec/monitoring/prometheus_exporter_spec.rb +0 -242
  200. data/spec/monitoring/storage/activerecord_adapter_spec.rb +0 -498
  201. data/spec/monitoring/storage/base_adapter_spec.rb +0 -61
  202. data/spec/monitoring/storage/memory_adapter_spec.rb +0 -247
  203. data/spec/performance_optimizations_spec.rb +0 -493
  204. data/spec/replay_edge_cases_spec.rb +0 -699
  205. data/spec/replay_spec.rb +0 -210
  206. data/spec/rfc8785_canonicalization_spec.rb +0 -215
  207. data/spec/scoring_spec.rb +0 -225
  208. data/spec/spec_helper.rb +0 -60
  209. data/spec/testing/batch_test_importer_spec.rb +0 -693
  210. data/spec/testing/batch_test_runner_spec.rb +0 -307
  211. data/spec/testing/test_coverage_analyzer_spec.rb +0 -292
  212. data/spec/testing/test_result_comparator_spec.rb +0 -392
  213. data/spec/testing/test_scenario_spec.rb +0 -113
  214. data/spec/thread_safety_spec.rb +0 -490
  215. data/spec/thread_safety_spec.rb.broken +0 -878
  216. data/spec/versioning/adapter_spec.rb +0 -156
  217. data/spec/versioning_spec.rb +0 -1030
  218. data/spec/web/middleware/auth_middleware_spec.rb +0 -133
  219. data/spec/web/middleware/permission_middleware_spec.rb +0 -247
  220. data/spec/web_ui_rack_spec.rb +0 -2134
data/spec/replay_spec.rb DELETED
@@ -1,210 +0,0 @@
1
- require "spec_helper"
2
-
3
- RSpec.describe DecisionAgent::Replay do
4
- let(:evaluator) do
5
- DecisionAgent::Evaluators::StaticEvaluator.new(
6
- decision: "approve",
7
- weight: 0.8,
8
- reason: "Static approval"
9
- )
10
- end
11
-
12
- let(:agent) do
13
- DecisionAgent::Agent.new(evaluators: [evaluator])
14
- end
15
-
16
- describe ".run" do
17
- it "replays decision from audit payload in strict mode" do
18
- context = { user: "alice", action: "login" }
19
- original_result = agent.decide(context: context)
20
-
21
- replayed_result = DecisionAgent::Replay.run(
22
- original_result.audit_payload,
23
- strict: true
24
- )
25
-
26
- expect(replayed_result.decision).to eq(original_result.decision)
27
- expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
28
- end
29
-
30
- it "raises ReplayMismatchError in strict mode when decision differs" do
31
- context = { user: "alice" }
32
- original_result = agent.decide(context: context)
33
-
34
- modified_payload = original_result.audit_payload.dup
35
- modified_payload[:decision] = "reject"
36
-
37
- expect do
38
- DecisionAgent::Replay.run(modified_payload, strict: true)
39
- end.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
40
- expect(error.differences).to include(/decision mismatch/)
41
- expect(error.expected[:decision]).to eq("reject")
42
- expect(error.actual[:decision]).to eq("approve")
43
- end
44
- end
45
-
46
- it "raises ReplayMismatchError in strict mode when confidence differs" do
47
- context = { user: "alice" }
48
- original_result = agent.decide(context: context)
49
-
50
- modified_payload = original_result.audit_payload.dup
51
- modified_payload[:confidence] = 0.5
52
-
53
- expect do
54
- DecisionAgent::Replay.run(modified_payload, strict: true)
55
- end.to raise_error(DecisionAgent::ReplayMismatchError) do |error|
56
- expect(error.differences).to include(/confidence mismatch/)
57
- end
58
- end
59
-
60
- it "allows differences in non-strict mode" do
61
- context = { user: "alice" }
62
- original_result = agent.decide(context: context)
63
-
64
- modified_payload = original_result.audit_payload.dup
65
- modified_payload[:decision] = "reject"
66
-
67
- expect do
68
- DecisionAgent::Replay.run(modified_payload, strict: false)
69
- end.not_to raise_error
70
- end
71
-
72
- it "logs differences in non-strict mode" do
73
- context = { user: "alice" }
74
- original_result = agent.decide(context: context)
75
-
76
- modified_payload = original_result.audit_payload.dup
77
- modified_payload[:decision] = "reject"
78
-
79
- expect do
80
- DecisionAgent::Replay.run(modified_payload, strict: false)
81
- end.to output(/Decision changed/).to_stderr
82
- end
83
-
84
- it "validates required fields in audit payload" do
85
- invalid_payload = { context: {} }
86
-
87
- expect do
88
- DecisionAgent::Replay.run(invalid_payload, strict: true)
89
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key/)
90
- end
91
-
92
- it "reconstructs evaluations from audit payload" do
93
- eval1 = DecisionAgent::Evaluators::StaticEvaluator.new(
94
- decision: "approve",
95
- weight: 0.7,
96
- reason: "Eval 1",
97
- name: "Evaluator1"
98
- )
99
- eval2 = DecisionAgent::Evaluators::StaticEvaluator.new(
100
- decision: "approve",
101
- weight: 0.9,
102
- reason: "Eval 2",
103
- name: "Evaluator2"
104
- )
105
-
106
- multi_agent = DecisionAgent::Agent.new(evaluators: [eval1, eval2])
107
- original_result = multi_agent.decide(context: { user: "bob" })
108
-
109
- replayed_result = DecisionAgent::Replay.run(
110
- original_result.audit_payload,
111
- strict: true
112
- )
113
-
114
- expect(replayed_result.evaluations.size).to eq(2)
115
- expect(replayed_result.evaluations.map(&:evaluator_name)).to match_array(%w[Evaluator1 Evaluator2])
116
- end
117
-
118
- it "uses correct scoring strategy from audit payload" do
119
- max_weight_agent = DecisionAgent::Agent.new(
120
- evaluators: [evaluator],
121
- scoring_strategy: DecisionAgent::Scoring::MaxWeight.new
122
- )
123
-
124
- original_result = max_weight_agent.decide(context: { user: "charlie" })
125
-
126
- expect(original_result.audit_payload[:scoring_strategy]).to include("MaxWeight")
127
-
128
- replayed_result = DecisionAgent::Replay.run(
129
- original_result.audit_payload,
130
- strict: true
131
- )
132
-
133
- expect(replayed_result.decision).to eq(original_result.decision)
134
- end
135
-
136
- it "handles symbol and string keys in audit payload" do
137
- context = { user: "alice" }
138
- original_result = agent.decide(context: context)
139
-
140
- string_key_payload = JSON.parse(JSON.generate(original_result.audit_payload))
141
-
142
- replayed_result = DecisionAgent::Replay.run(
143
- string_key_payload,
144
- strict: true
145
- )
146
-
147
- expect(replayed_result.decision).to eq(original_result.decision)
148
- end
149
-
150
- it "preserves feedback in replay" do
151
- context = { user: "alice" }
152
- feedback = { source: "manual_override" }
153
-
154
- original_result = agent.decide(context: context, feedback: feedback)
155
-
156
- replayed_result = DecisionAgent::Replay.run(
157
- original_result.audit_payload,
158
- strict: true
159
- )
160
-
161
- expect(replayed_result.audit_payload[:feedback]).to eq(feedback)
162
- end
163
- end
164
-
165
- describe "deterministic replay" do
166
- it "produces identical results for identical inputs across multiple replays" do
167
- context = { user: "alice", priority: "high" }
168
- original_result = agent.decide(context: context)
169
-
170
- results = 5.times.map do
171
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
172
- end
173
-
174
- results.each do |result|
175
- expect(result.decision).to eq(original_result.decision)
176
- expect(result.confidence).to be_within(0.0001).of(original_result.confidence)
177
- end
178
- end
179
- end
180
-
181
- describe "complex scenario replay" do
182
- it "replays decisions from JSON rule evaluators" do
183
- rules = {
184
- version: "1.0",
185
- ruleset: "test",
186
- rules: [
187
- {
188
- id: "high_priority",
189
- if: { field: "priority", op: "eq", value: "high" },
190
- then: { decision: "escalate", weight: 0.9, reason: "High priority issue" }
191
- }
192
- ]
193
- }
194
-
195
- json_evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
196
- json_agent = DecisionAgent::Agent.new(evaluators: [json_evaluator])
197
-
198
- context = { priority: "high", user: "alice" }
199
- original_result = json_agent.decide(context: context)
200
-
201
- replayed_result = DecisionAgent::Replay.run(
202
- original_result.audit_payload,
203
- strict: true
204
- )
205
-
206
- expect(replayed_result.decision).to eq("escalate")
207
- expect(replayed_result.confidence).to be_within(0.0001).of(original_result.confidence)
208
- end
209
- end
210
- end
@@ -1,215 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- RSpec.describe "RFC 8785 JSON Canonicalization" do
6
- let(:evaluator) do
7
- DecisionAgent::Evaluators::JsonRuleEvaluator.new(
8
- rules_json: {
9
- version: "1.0",
10
- ruleset: "test",
11
- rules: [
12
- {
13
- id: "always_approve",
14
- if: { field: "amount", op: "gte", value: 0 },
15
- then: { decision: "approve", weight: 1.0, reason: "Test rule" }
16
- }
17
- ]
18
- }
19
- )
20
- end
21
-
22
- let(:agent) { DecisionAgent::Agent.new(evaluators: [evaluator]) }
23
-
24
- describe "canonical JSON serialization" do
25
- it "produces deterministic hashes using RFC 8785" do
26
- # Same context should produce same hash every time
27
- context = { amount: 100, user: { id: 123, name: "Alice" } }
28
-
29
- decision1 = agent.decide(context: context)
30
- decision2 = agent.decide(context: context)
31
-
32
- hash1 = decision1.audit_payload[:deterministic_hash]
33
- hash2 = decision2.audit_payload[:deterministic_hash]
34
-
35
- expect(hash1).to eq(hash2)
36
- expect(hash1).to be_a(String)
37
- expect(hash1.length).to eq(64) # SHA256 produces 64 hex characters
38
- end
39
-
40
- it "produces different hashes for different contexts" do
41
- context1 = { amount: 100, user: { id: 123 } }
42
- context2 = { amount: 200, user: { id: 456 } }
43
-
44
- decision1 = agent.decide(context: context1)
45
- decision2 = agent.decide(context: context2)
46
-
47
- hash1 = decision1.audit_payload[:deterministic_hash]
48
- hash2 = decision2.audit_payload[:deterministic_hash]
49
-
50
- expect(hash1).not_to eq(hash2)
51
- end
52
-
53
- it "is insensitive to property order (canonicalization)" do
54
- # Different property order should produce same hash
55
- context1 = { amount: 100, user: { id: 123, name: "Alice" } }
56
- context2 = { user: { name: "Alice", id: 123 }, amount: 100 }
57
-
58
- decision1 = agent.decide(context: context1)
59
- decision2 = agent.decide(context: context2)
60
-
61
- hash1 = decision1.audit_payload[:deterministic_hash]
62
- hash2 = decision2.audit_payload[:deterministic_hash]
63
-
64
- expect(hash1).to eq(hash2), "RFC 8785 canonicalization should sort properties"
65
- end
66
-
67
- it "handles special characters correctly" do
68
- # Test Unicode, quotes, and control characters
69
- context = {
70
- amount: 100,
71
- note: "Test with \"quotes\", €uro, and \n newline"
72
- }
73
-
74
- decision = agent.decide(context: context)
75
- hash = decision.audit_payload[:deterministic_hash]
76
-
77
- expect(hash).to be_a(String)
78
- expect(hash.length).to eq(64)
79
- end
80
-
81
- it "handles floating point numbers deterministically" do
82
- # RFC 8785 specifies exact float serialization per IEEE 754
83
- # Note: 99.99 cannot be exactly represented in binary floating point
84
- context = { amount: 100, price: 99.99, tax: 0.075 }
85
-
86
- decision1 = agent.decide(context: context)
87
- decision2 = agent.decide(context: context)
88
-
89
- hash1 = decision1.audit_payload[:deterministic_hash]
90
- hash2 = decision2.audit_payload[:deterministic_hash]
91
-
92
- # Same context should always produce same hash
93
- expect(hash1).to eq(hash2), "RFC 8785 should produce consistent hashes for same values"
94
-
95
- # Verify RFC 8785 uses ECMAScript number serialization
96
- canonical = agent.send(:canonical_json, context)
97
- # RFC 8785 may represent 99.99 as 99.98999999999999 due to IEEE 754
98
- expect(canonical).to match(/99\.\d+/)
99
- expect(canonical).to include("0.075")
100
- end
101
-
102
- it "handles nested structures correctly" do
103
- context = {
104
- amount: 100,
105
- user: {
106
- id: 123,
107
- profile: {
108
- name: "Alice",
109
- tags: %w[premium verified]
110
- }
111
- }
112
- }
113
-
114
- decision = agent.decide(context: context)
115
- hash = decision.audit_payload[:deterministic_hash]
116
-
117
- expect(hash).to be_a(String)
118
- expect(hash.length).to eq(64)
119
- end
120
-
121
- it "handles arrays consistently" do
122
- # Array order should be preserved (not sorted)
123
- context1 = { amount: 100, tags: %w[a b c] }
124
- context2 = { amount: 100, tags: %w[c b a] }
125
-
126
- decision1 = agent.decide(context: context1)
127
- decision2 = agent.decide(context: context2)
128
-
129
- hash1 = decision1.audit_payload[:deterministic_hash]
130
- hash2 = decision2.audit_payload[:deterministic_hash]
131
-
132
- expect(hash1).not_to eq(hash2), "RFC 8785 preserves array order"
133
- end
134
-
135
- it "handles nil values correctly" do
136
- context = { amount: 100, optional_field: nil }
137
-
138
- decision = agent.decide(context: context)
139
- hash = decision.audit_payload[:deterministic_hash]
140
-
141
- expect(hash).to be_a(String)
142
- expect(hash.length).to eq(64)
143
- end
144
-
145
- it "handles boolean values correctly" do
146
- context = { amount: 100, is_verified: true, is_blocked: false }
147
-
148
- decision = agent.decide(context: context)
149
- hash = decision.audit_payload[:deterministic_hash]
150
-
151
- expect(hash).to be_a(String)
152
- expect(hash.length).to eq(64)
153
- end
154
-
155
- it "is thread-safe with concurrent hash computations" do
156
- contexts = 10.times.map { |i| { amount: i * 100, id: i } }
157
- results = []
158
- mutex = Mutex.new
159
-
160
- threads = contexts.map do |ctx|
161
- Thread.new do
162
- decision = agent.decide(context: ctx)
163
- hash = decision.audit_payload[:deterministic_hash]
164
- mutex.synchronize { results << hash }
165
- end
166
- end
167
-
168
- threads.each(&:join)
169
-
170
- expect(results.size).to eq(10)
171
- expect(results.uniq.size).to eq(10), "Each context should produce unique hash"
172
- results.each do |hash|
173
- expect(hash.length).to eq(64)
174
- end
175
- end
176
- end
177
-
178
- describe "RFC 8785 compliance" do
179
- it "uses json-canonicalization gem for canonicalization" do
180
- # Verify we're using the RFC 8785 implementation
181
- test_data = { b: 2, a: 1 }
182
- canonical = agent.send(:canonical_json, test_data)
183
-
184
- # RFC 8785 should sort keys: {"a":1,"b":2}
185
- expect(canonical).to include('"a":1')
186
- expect(canonical).to include('"b":2')
187
- expect(canonical.index('"a"')).to be < canonical.index('"b"')
188
- end
189
-
190
- it "produces compact JSON without whitespace" do
191
- test_data = { amount: 100, user: { id: 123 } }
192
- canonical = agent.send(:canonical_json, test_data)
193
-
194
- # RFC 8785 produces compact JSON
195
- expect(canonical).not_to include("\n")
196
- expect(canonical).not_to include(" ")
197
- end
198
- end
199
-
200
- describe "performance characteristics" do
201
- it "computes hashes efficiently" do
202
- context = {
203
- amount: 100,
204
- user: { id: 123, name: "Alice", tags: (1..100).to_a }
205
- }
206
-
207
- # Should complete quickly even with larger payloads
208
- start_time = Time.now
209
- 100.times { agent.decide(context: context) }
210
- elapsed = Time.now - start_time
211
-
212
- expect(elapsed).to be < 1.0, "100 decisions should complete in under 1 second"
213
- end
214
- end
215
- end
data/spec/scoring_spec.rb DELETED
@@ -1,225 +0,0 @@
1
- require "spec_helper"
2
-
3
- RSpec.describe "Scoring Strategies" do
4
- let(:eval1) do
5
- DecisionAgent::Evaluation.new(
6
- decision: "approve",
7
- weight: 0.6,
8
- reason: "Test 1",
9
- evaluator_name: "Eval1"
10
- )
11
- end
12
-
13
- let(:eval2) do
14
- DecisionAgent::Evaluation.new(
15
- decision: "approve",
16
- weight: 0.8,
17
- reason: "Test 2",
18
- evaluator_name: "Eval2"
19
- )
20
- end
21
-
22
- let(:eval3) do
23
- DecisionAgent::Evaluation.new(
24
- decision: "reject",
25
- weight: 0.5,
26
- reason: "Test 3",
27
- evaluator_name: "Eval3"
28
- )
29
- end
30
-
31
- describe DecisionAgent::Scoring::WeightedAverage do
32
- it "calculates weighted average for single decision" do
33
- strategy = DecisionAgent::Scoring::WeightedAverage.new
34
- result = strategy.score([eval1, eval2])
35
-
36
- expect(result[:decision]).to eq("approve")
37
- expect(result[:confidence]).to eq(1.0)
38
- end
39
-
40
- it "calculates weighted average with conflicts" do
41
- strategy = DecisionAgent::Scoring::WeightedAverage.new
42
- result = strategy.score([eval1, eval2, eval3])
43
-
44
- total_weight = 0.6 + 0.8 + 0.5
45
- approve_weight = 0.6 + 0.8
46
- expected_confidence = approve_weight / total_weight
47
-
48
- expect(result[:decision]).to eq("approve")
49
- expect(result[:confidence]).to be_within(0.0001).of(expected_confidence)
50
- end
51
-
52
- it "returns 0 confidence for empty evaluations" do
53
- strategy = DecisionAgent::Scoring::WeightedAverage.new
54
- result = strategy.score([])
55
-
56
- expect(result[:decision]).to be_nil
57
- expect(result[:confidence]).to eq(0.0)
58
- end
59
-
60
- it "normalizes confidence to [0, 1]" do
61
- strategy = DecisionAgent::Scoring::WeightedAverage.new
62
- result = strategy.score([eval1])
63
-
64
- expect(result[:confidence]).to be_between(0.0, 1.0)
65
- end
66
- end
67
-
68
- describe DecisionAgent::Scoring::MaxWeight do
69
- it "selects decision with maximum weight" do
70
- strategy = DecisionAgent::Scoring::MaxWeight.new
71
- result = strategy.score([eval1, eval2, eval3])
72
-
73
- expect(result[:decision]).to eq("approve")
74
- expect(result[:confidence]).to eq(0.8)
75
- end
76
-
77
- it "uses first evaluation when weights are equal" do
78
- eval_a = DecisionAgent::Evaluation.new(
79
- decision: "option_a",
80
- weight: 0.7,
81
- reason: "Test A",
82
- evaluator_name: "EvalA"
83
- )
84
- eval_b = DecisionAgent::Evaluation.new(
85
- decision: "option_b",
86
- weight: 0.7,
87
- reason: "Test B",
88
- evaluator_name: "EvalB"
89
- )
90
-
91
- strategy = DecisionAgent::Scoring::MaxWeight.new
92
- result = strategy.score([eval_a, eval_b])
93
-
94
- expect(%w[option_a option_b]).to include(result[:decision])
95
- expect(result[:confidence]).to eq(0.7)
96
- end
97
-
98
- it "returns 0 confidence for empty evaluations" do
99
- strategy = DecisionAgent::Scoring::MaxWeight.new
100
- result = strategy.score([])
101
-
102
- expect(result[:decision]).to be_nil
103
- expect(result[:confidence]).to eq(0.0)
104
- end
105
- end
106
-
107
- describe DecisionAgent::Scoring::Consensus do
108
- it "selects decision with highest agreement" do
109
- eval4 = DecisionAgent::Evaluation.new(
110
- decision: "approve",
111
- weight: 0.7,
112
- reason: "Test 4",
113
- evaluator_name: "Eval4"
114
- )
115
-
116
- strategy = DecisionAgent::Scoring::Consensus.new
117
- result = strategy.score([eval1, eval2, eval3, eval4])
118
-
119
- expect(result[:decision]).to eq("approve")
120
- end
121
-
122
- it "considers both agreement and weight" do
123
- low_weight_majority = [
124
- DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "A", evaluator_name: "E1"),
125
- DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "B", evaluator_name: "E2"),
126
- DecisionAgent::Evaluation.new(decision: "approve", weight: 0.3, reason: "C", evaluator_name: "E3")
127
- ]
128
-
129
- high_weight_minority = [
130
- DecisionAgent::Evaluation.new(decision: "reject", weight: 0.9, reason: "D", evaluator_name: "E4")
131
- ]
132
-
133
- strategy = DecisionAgent::Scoring::Consensus.new
134
- result = strategy.score(low_weight_majority + high_weight_minority)
135
-
136
- expect(result[:decision]).to eq("approve")
137
- end
138
-
139
- it "reduces confidence when minimum agreement not met" do
140
- eval_spread = [
141
- DecisionAgent::Evaluation.new(decision: "option_a", weight: 0.8, reason: "A", evaluator_name: "E1"),
142
- DecisionAgent::Evaluation.new(decision: "option_b", weight: 0.7, reason: "B", evaluator_name: "E2"),
143
- DecisionAgent::Evaluation.new(decision: "option_c", weight: 0.6, reason: "C", evaluator_name: "E3")
144
- ]
145
-
146
- strategy = DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.5)
147
- result = strategy.score(eval_spread)
148
-
149
- expect(result[:confidence]).to be < 0.5
150
- end
151
-
152
- it "allows custom minimum agreement threshold" do
153
- strategy = DecisionAgent::Scoring::Consensus.new(minimum_agreement: 0.7)
154
- result = strategy.score([eval1, eval2, eval3])
155
-
156
- expect(result[:decision]).to eq("approve")
157
- end
158
-
159
- it "returns 0 confidence for empty evaluations" do
160
- strategy = DecisionAgent::Scoring::Consensus.new
161
- result = strategy.score([])
162
-
163
- expect(result[:decision]).to be_nil
164
- expect(result[:confidence]).to eq(0.0)
165
- end
166
- end
167
-
168
- describe DecisionAgent::Scoring::Threshold do
169
- it "accepts decision when weight meets threshold" do
170
- strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.7)
171
- result = strategy.score([eval2])
172
-
173
- expect(result[:decision]).to eq("approve")
174
- expect(result[:confidence]).to eq(0.8)
175
- end
176
-
177
- it "returns fallback decision when weight below threshold" do
178
- strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.9, fallback_decision: "manual_review")
179
- result = strategy.score([eval2])
180
-
181
- expect(result[:decision]).to eq("manual_review")
182
- expect(result[:confidence]).to be < 0.9
183
- end
184
-
185
- it "uses average weight across evaluations with same decision" do
186
- strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.7)
187
- result = strategy.score([eval1, eval2])
188
-
189
- avg_weight = (0.6 + 0.8) / 2
190
- expect(result[:decision]).to eq("approve")
191
- expect(result[:confidence]).to eq(avg_weight)
192
- end
193
-
194
- it "uses default fallback decision" do
195
- strategy = DecisionAgent::Scoring::Threshold.new(threshold: 0.9)
196
- result = strategy.score([eval1])
197
-
198
- expect(result[:decision]).to eq("no_decision")
199
- end
200
-
201
- it "returns fallback for empty evaluations" do
202
- strategy = DecisionAgent::Scoring::Threshold.new(fallback_decision: "default")
203
- result = strategy.score([])
204
-
205
- expect(result[:decision]).to eq("default")
206
- expect(result[:confidence]).to eq(0.0)
207
- end
208
- end
209
-
210
- describe "confidence bounds" do
211
- it "ensures all strategies return confidence between 0 and 1" do
212
- strategies = [
213
- DecisionAgent::Scoring::WeightedAverage.new,
214
- DecisionAgent::Scoring::MaxWeight.new,
215
- DecisionAgent::Scoring::Consensus.new,
216
- DecisionAgent::Scoring::Threshold.new
217
- ]
218
-
219
- strategies.each do |strategy|
220
- result = strategy.score([eval1, eval2, eval3])
221
- expect(result[:confidence]).to be_between(0.0, 1.0)
222
- end
223
- end
224
- end
225
- end
data/spec/spec_helper.rb DELETED
@@ -1,60 +0,0 @@
1
- require "simplecov"
2
- SimpleCov.start do
3
- add_filter "/spec/"
4
- add_filter "/examples/"
5
- end
6
-
7
- require "decision_agent"
8
-
9
- # Load ActiveRecord for thread-safety and integration tests
10
- begin
11
- require "active_record"
12
- require "sqlite3"
13
- require "decision_agent/versioning/activerecord_adapter"
14
- rescue LoadError
15
- # ActiveRecord is optional - tests will be skipped if not available
16
- end
17
-
18
- # Store original value for cleanup
19
- # rubocop:disable Style/GlobalVars
20
- $original_disable_webui_permissions = nil
21
- # rubocop:enable Style/GlobalVars
22
-
23
- RSpec.configure do |config|
24
- config.expect_with :rspec do |expectations|
25
- expectations.include_chain_clauses_in_custom_matcher_descriptions = true
26
- end
27
-
28
- config.mock_with :rspec do |mocks|
29
- mocks.verify_partial_doubles = true
30
- end
31
-
32
- config.shared_context_metadata_behavior = :apply_to_host_groups
33
- config.filter_run_when_matching :focus
34
- config.example_status_persistence_file_path = "spec/examples.txt"
35
- config.disable_monkey_patching!
36
- config.warnings = true
37
-
38
- config.default_formatter = "doc" if config.files_to_run.one?
39
-
40
- config.order = :random
41
- Kernel.srand config.seed
42
-
43
- # Ensure permissions are enabled for tests
44
- config.before(:suite) do
45
- # rubocop:disable Style/GlobalVars
46
- $original_disable_webui_permissions = ENV.fetch("DISABLE_WEBUI_PERMISSIONS", nil)
47
- # rubocop:enable Style/GlobalVars
48
- ENV["DISABLE_WEBUI_PERMISSIONS"] = "false"
49
- end
50
-
51
- config.after(:suite) do
52
- # rubocop:disable Style/GlobalVars
53
- if $original_disable_webui_permissions
54
- ENV["DISABLE_WEBUI_PERMISSIONS"] = $original_disable_webui_permissions
55
- else
56
- ENV.delete("DISABLE_WEBUI_PERMISSIONS")
57
- end
58
- # rubocop:enable Style/GlobalVars
59
- end
60
- end