decision_agent 0.3.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -14
  3. data/lib/decision_agent/ab_testing/ab_test.rb +5 -1
  4. data/lib/decision_agent/ab_testing/ab_test_assignment.rb +2 -0
  5. data/lib/decision_agent/ab_testing/ab_test_manager.rb +2 -0
  6. data/lib/decision_agent/ab_testing/ab_testing_agent.rb +2 -0
  7. data/lib/decision_agent/ab_testing/storage/activerecord_adapter.rb +2 -13
  8. data/lib/decision_agent/ab_testing/storage/adapter.rb +2 -0
  9. data/lib/decision_agent/ab_testing/storage/memory_adapter.rb +2 -0
  10. data/lib/decision_agent/agent.rb +78 -9
  11. data/lib/decision_agent/audit/adapter.rb +2 -0
  12. data/lib/decision_agent/audit/logger_adapter.rb +2 -0
  13. data/lib/decision_agent/audit/null_adapter.rb +2 -0
  14. data/lib/decision_agent/auth/access_audit_logger.rb +2 -0
  15. data/lib/decision_agent/auth/authenticator.rb +2 -0
  16. data/lib/decision_agent/auth/password_reset_manager.rb +2 -0
  17. data/lib/decision_agent/auth/password_reset_token.rb +2 -0
  18. data/lib/decision_agent/auth/permission.rb +2 -0
  19. data/lib/decision_agent/auth/permission_checker.rb +2 -0
  20. data/lib/decision_agent/auth/rbac_adapter.rb +2 -0
  21. data/lib/decision_agent/auth/rbac_config.rb +2 -0
  22. data/lib/decision_agent/auth/role.rb +2 -0
  23. data/lib/decision_agent/auth/session.rb +2 -0
  24. data/lib/decision_agent/auth/session_manager.rb +2 -0
  25. data/lib/decision_agent/auth/user.rb +2 -0
  26. data/lib/decision_agent/context.rb +14 -0
  27. data/lib/decision_agent/decision.rb +113 -4
  28. data/lib/decision_agent/dmn/adapter.rb +2 -0
  29. data/lib/decision_agent/dmn/cache.rb +2 -2
  30. data/lib/decision_agent/dmn/decision_graph.rb +7 -7
  31. data/lib/decision_agent/dmn/decision_tree.rb +16 -8
  32. data/lib/decision_agent/dmn/errors.rb +2 -0
  33. data/lib/decision_agent/dmn/exporter.rb +2 -0
  34. data/lib/decision_agent/dmn/feel/evaluator.rb +130 -114
  35. data/lib/decision_agent/dmn/feel/functions.rb +2 -0
  36. data/lib/decision_agent/dmn/feel/parser.rb +2 -0
  37. data/lib/decision_agent/dmn/feel/simple_parser.rb +98 -77
  38. data/lib/decision_agent/dmn/feel/transformer.rb +56 -102
  39. data/lib/decision_agent/dmn/feel/types.rb +2 -0
  40. data/lib/decision_agent/dmn/importer.rb +2 -0
  41. data/lib/decision_agent/dmn/model.rb +2 -4
  42. data/lib/decision_agent/dmn/parser.rb +2 -0
  43. data/lib/decision_agent/dmn/testing.rb +3 -2
  44. data/lib/decision_agent/dmn/validator.rb +5 -3
  45. data/lib/decision_agent/dmn/visualizer.rb +7 -6
  46. data/lib/decision_agent/dsl/condition_evaluator.rb +242 -1375
  47. data/lib/decision_agent/dsl/helpers/cache_helpers.rb +82 -0
  48. data/lib/decision_agent/dsl/helpers/comparison_helpers.rb +98 -0
  49. data/lib/decision_agent/dsl/helpers/date_helpers.rb +91 -0
  50. data/lib/decision_agent/dsl/helpers/geospatial_helpers.rb +85 -0
  51. data/lib/decision_agent/dsl/helpers/operator_evaluation_helpers.rb +160 -0
  52. data/lib/decision_agent/dsl/helpers/parameter_parsing_helpers.rb +206 -0
  53. data/lib/decision_agent/dsl/helpers/template_helpers.rb +39 -0
  54. data/lib/decision_agent/dsl/helpers/utility_helpers.rb +45 -0
  55. data/lib/decision_agent/dsl/operators/base.rb +70 -0
  56. data/lib/decision_agent/dsl/operators/basic_comparison_operators.rb +80 -0
  57. data/lib/decision_agent/dsl/operators/collection_operators.rb +60 -0
  58. data/lib/decision_agent/dsl/operators/date_arithmetic_operators.rb +206 -0
  59. data/lib/decision_agent/dsl/operators/date_time_operators.rb +47 -0
  60. data/lib/decision_agent/dsl/operators/duration_operators.rb +149 -0
  61. data/lib/decision_agent/dsl/operators/financial_operators.rb +237 -0
  62. data/lib/decision_agent/dsl/operators/geospatial_operators.rb +106 -0
  63. data/lib/decision_agent/dsl/operators/mathematical_operators.rb +234 -0
  64. data/lib/decision_agent/dsl/operators/moving_window_operators.rb +135 -0
  65. data/lib/decision_agent/dsl/operators/numeric_operators.rb +120 -0
  66. data/lib/decision_agent/dsl/operators/rate_operators.rb +65 -0
  67. data/lib/decision_agent/dsl/operators/statistical_aggregations.rb +187 -0
  68. data/lib/decision_agent/dsl/operators/string_aggregations.rb +84 -0
  69. data/lib/decision_agent/dsl/operators/string_operators.rb +72 -0
  70. data/lib/decision_agent/dsl/operators/time_component_operators.rb +72 -0
  71. data/lib/decision_agent/dsl/rule_parser.rb +2 -0
  72. data/lib/decision_agent/dsl/schema_validator.rb +37 -14
  73. data/lib/decision_agent/errors.rb +2 -0
  74. data/lib/decision_agent/evaluation.rb +14 -2
  75. data/lib/decision_agent/evaluators/base.rb +2 -0
  76. data/lib/decision_agent/evaluators/dmn_evaluator.rb +108 -19
  77. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +56 -11
  78. data/lib/decision_agent/evaluators/static_evaluator.rb +2 -0
  79. data/lib/decision_agent/explainability/condition_trace.rb +85 -0
  80. data/lib/decision_agent/explainability/explainability_result.rb +50 -0
  81. data/lib/decision_agent/explainability/rule_trace.rb +41 -0
  82. data/lib/decision_agent/explainability/trace_collector.rb +26 -0
  83. data/lib/decision_agent/monitoring/alert_manager.rb +7 -16
  84. data/lib/decision_agent/monitoring/dashboard_server.rb +383 -250
  85. data/lib/decision_agent/monitoring/metrics_collector.rb +2 -0
  86. data/lib/decision_agent/monitoring/monitored_agent.rb +2 -0
  87. data/lib/decision_agent/monitoring/prometheus_exporter.rb +3 -1
  88. data/lib/decision_agent/replay/replay.rb +4 -1
  89. data/lib/decision_agent/scoring/base.rb +2 -0
  90. data/lib/decision_agent/scoring/consensus.rb +2 -0
  91. data/lib/decision_agent/scoring/max_weight.rb +2 -0
  92. data/lib/decision_agent/scoring/threshold.rb +2 -0
  93. data/lib/decision_agent/scoring/weighted_average.rb +2 -0
  94. data/lib/decision_agent/simulation/errors.rb +20 -0
  95. data/lib/decision_agent/simulation/impact_analyzer.rb +500 -0
  96. data/lib/decision_agent/simulation/monte_carlo_simulator.rb +638 -0
  97. data/lib/decision_agent/simulation/replay_engine.rb +488 -0
  98. data/lib/decision_agent/simulation/scenario_engine.rb +320 -0
  99. data/lib/decision_agent/simulation/scenario_library.rb +165 -0
  100. data/lib/decision_agent/simulation/shadow_test_engine.rb +274 -0
  101. data/lib/decision_agent/simulation/what_if_analyzer.rb +1008 -0
  102. data/lib/decision_agent/simulation.rb +19 -0
  103. data/lib/decision_agent/testing/batch_test_importer.rb +6 -2
  104. data/lib/decision_agent/testing/batch_test_runner.rb +5 -2
  105. data/lib/decision_agent/testing/test_coverage_analyzer.rb +2 -0
  106. data/lib/decision_agent/testing/test_result_comparator.rb +2 -0
  107. data/lib/decision_agent/testing/test_scenario.rb +2 -0
  108. data/lib/decision_agent/version.rb +3 -1
  109. data/lib/decision_agent/versioning/activerecord_adapter.rb +108 -43
  110. data/lib/decision_agent/versioning/adapter.rb +9 -0
  111. data/lib/decision_agent/versioning/file_storage_adapter.rb +19 -6
  112. data/lib/decision_agent/versioning/version_manager.rb +9 -0
  113. data/lib/decision_agent/web/dmn_editor/serialization.rb +74 -0
  114. data/lib/decision_agent/web/dmn_editor/xml_builder.rb +107 -0
  115. data/lib/decision_agent/web/dmn_editor.rb +8 -67
  116. data/lib/decision_agent/web/middleware/auth_middleware.rb +2 -0
  117. data/lib/decision_agent/web/middleware/permission_middleware.rb +3 -1
  118. data/lib/decision_agent/web/public/app.js +186 -26
  119. data/lib/decision_agent/web/public/batch_testing.html +80 -6
  120. data/lib/decision_agent/web/public/dmn-editor.html +2 -2
  121. data/lib/decision_agent/web/public/dmn-editor.js +74 -8
  122. data/lib/decision_agent/web/public/index.html +69 -3
  123. data/lib/decision_agent/web/public/login.html +1 -1
  124. data/lib/decision_agent/web/public/sample_batch.csv +11 -0
  125. data/lib/decision_agent/web/public/sample_impact.csv +11 -0
  126. data/lib/decision_agent/web/public/sample_replay.csv +11 -0
  127. data/lib/decision_agent/web/public/sample_rules.json +118 -0
  128. data/lib/decision_agent/web/public/sample_shadow.csv +11 -0
  129. data/lib/decision_agent/web/public/sample_whatif.csv +11 -0
  130. data/lib/decision_agent/web/public/simulation.html +146 -0
  131. data/lib/decision_agent/web/public/simulation_impact.html +495 -0
  132. data/lib/decision_agent/web/public/simulation_replay.html +547 -0
  133. data/lib/decision_agent/web/public/simulation_shadow.html +561 -0
  134. data/lib/decision_agent/web/public/simulation_whatif.html +549 -0
  135. data/lib/decision_agent/web/public/styles.css +65 -0
  136. data/lib/decision_agent/web/public/users.html +1 -1
  137. data/lib/decision_agent/web/rack_helpers.rb +106 -0
  138. data/lib/decision_agent/web/rack_request_helpers.rb +196 -0
  139. data/lib/decision_agent/web/server.rb +2126 -1374
  140. data/lib/decision_agent.rb +19 -1
  141. data/lib/generators/decision_agent/install/install_generator.rb +2 -0
  142. data/lib/generators/decision_agent/install/templates/ab_test_assignment_model.rb +2 -0
  143. data/lib/generators/decision_agent/install/templates/ab_test_model.rb +2 -0
  144. data/lib/generators/decision_agent/install/templates/ab_testing_migration.rb +2 -0
  145. data/lib/generators/decision_agent/install/templates/migration.rb +2 -0
  146. data/lib/generators/decision_agent/install/templates/rule.rb +2 -0
  147. data/lib/generators/decision_agent/install/templates/rule_version.rb +2 -0
  148. metadata +103 -89
  149. data/spec/ab_testing/ab_test_assignment_spec.rb +0 -253
  150. data/spec/ab_testing/ab_test_manager_spec.rb +0 -612
  151. data/spec/ab_testing/ab_test_spec.rb +0 -270
  152. data/spec/ab_testing/ab_testing_agent_spec.rb +0 -655
  153. data/spec/ab_testing/storage/adapter_spec.rb +0 -64
  154. data/spec/ab_testing/storage/memory_adapter_spec.rb +0 -485
  155. data/spec/activerecord_thread_safety_spec.rb +0 -553
  156. data/spec/advanced_operators_spec.rb +0 -3150
  157. data/spec/agent_spec.rb +0 -289
  158. data/spec/api_contract_spec.rb +0 -430
  159. data/spec/audit_adapters_spec.rb +0 -92
  160. data/spec/auth/access_audit_logger_spec.rb +0 -394
  161. data/spec/auth/authenticator_spec.rb +0 -112
  162. data/spec/auth/password_reset_spec.rb +0 -294
  163. data/spec/auth/permission_checker_spec.rb +0 -207
  164. data/spec/auth/permission_spec.rb +0 -73
  165. data/spec/auth/rbac_adapter_spec.rb +0 -778
  166. data/spec/auth/rbac_config_spec.rb +0 -82
  167. data/spec/auth/role_spec.rb +0 -51
  168. data/spec/auth/session_manager_spec.rb +0 -172
  169. data/spec/auth/session_spec.rb +0 -112
  170. data/spec/auth/user_spec.rb +0 -130
  171. data/spec/comprehensive_edge_cases_spec.rb +0 -1777
  172. data/spec/context_spec.rb +0 -127
  173. data/spec/decision_agent_spec.rb +0 -96
  174. data/spec/decision_spec.rb +0 -423
  175. data/spec/dmn/decision_graph_spec.rb +0 -282
  176. data/spec/dmn/decision_tree_spec.rb +0 -203
  177. data/spec/dmn/feel/errors_spec.rb +0 -18
  178. data/spec/dmn/feel/functions_spec.rb +0 -400
  179. data/spec/dmn/feel/simple_parser_spec.rb +0 -274
  180. data/spec/dmn/feel/types_spec.rb +0 -176
  181. data/spec/dmn/feel_parser_spec.rb +0 -489
  182. data/spec/dmn/hit_policy_spec.rb +0 -202
  183. data/spec/dmn/integration_spec.rb +0 -226
  184. data/spec/dsl/condition_evaluator_spec.rb +0 -774
  185. data/spec/dsl_validation_spec.rb +0 -648
  186. data/spec/edge_cases_spec.rb +0 -353
  187. data/spec/evaluation_spec.rb +0 -364
  188. data/spec/evaluation_validator_spec.rb +0 -165
  189. data/spec/examples/feedback_aware_evaluator_spec.rb +0 -460
  190. data/spec/examples.txt +0 -1909
  191. data/spec/fixtures/dmn/complex_decision.dmn +0 -81
  192. data/spec/fixtures/dmn/invalid_structure.dmn +0 -31
  193. data/spec/fixtures/dmn/simple_decision.dmn +0 -40
  194. data/spec/issue_verification_spec.rb +0 -759
  195. data/spec/json_rule_evaluator_spec.rb +0 -587
  196. data/spec/monitoring/alert_manager_spec.rb +0 -378
  197. data/spec/monitoring/metrics_collector_spec.rb +0 -501
  198. data/spec/monitoring/monitored_agent_spec.rb +0 -225
  199. data/spec/monitoring/prometheus_exporter_spec.rb +0 -242
  200. data/spec/monitoring/storage/activerecord_adapter_spec.rb +0 -498
  201. data/spec/monitoring/storage/base_adapter_spec.rb +0 -61
  202. data/spec/monitoring/storage/memory_adapter_spec.rb +0 -247
  203. data/spec/performance_optimizations_spec.rb +0 -493
  204. data/spec/replay_edge_cases_spec.rb +0 -699
  205. data/spec/replay_spec.rb +0 -210
  206. data/spec/rfc8785_canonicalization_spec.rb +0 -215
  207. data/spec/scoring_spec.rb +0 -225
  208. data/spec/spec_helper.rb +0 -60
  209. data/spec/testing/batch_test_importer_spec.rb +0 -693
  210. data/spec/testing/batch_test_runner_spec.rb +0 -307
  211. data/spec/testing/test_coverage_analyzer_spec.rb +0 -292
  212. data/spec/testing/test_result_comparator_spec.rb +0 -392
  213. data/spec/testing/test_scenario_spec.rb +0 -113
  214. data/spec/thread_safety_spec.rb +0 -490
  215. data/spec/thread_safety_spec.rb.broken +0 -878
  216. data/spec/versioning/adapter_spec.rb +0 -156
  217. data/spec/versioning_spec.rb +0 -1030
  218. data/spec/web/middleware/auth_middleware_spec.rb +0 -133
  219. data/spec/web/middleware/permission_middleware_spec.rb +0 -247
  220. data/spec/web_ui_rack_spec.rb +0 -2134
@@ -1,392 +0,0 @@
1
- require "spec_helper"
2
- require "tempfile"
3
-
4
- RSpec.describe DecisionAgent::Testing::TestResultComparator do
5
- let(:comparator) { DecisionAgent::Testing::TestResultComparator.new }
6
-
7
- describe "#compare" do
8
- let(:scenarios) do
9
- [
10
- DecisionAgent::Testing::TestScenario.new(
11
- id: "test_1",
12
- context: { user_id: 123 },
13
- expected_decision: "approve",
14
- expected_confidence: 0.95
15
- ),
16
- DecisionAgent::Testing::TestScenario.new(
17
- id: "test_2",
18
- context: { user_id: 456 },
19
- expected_decision: "reject",
20
- expected_confidence: 0.80
21
- )
22
- ]
23
- end
24
-
25
- let(:results) do
26
- [
27
- DecisionAgent::Testing::TestResult.new(
28
- scenario_id: "test_1",
29
- decision: "approve",
30
- confidence: 0.95
31
- ),
32
- DecisionAgent::Testing::TestResult.new(
33
- scenario_id: "test_2",
34
- decision: "reject",
35
- confidence: 0.80
36
- )
37
- ]
38
- end
39
-
40
- it "compares results with expected outcomes" do
41
- summary = comparator.compare(results, scenarios)
42
-
43
- expect(summary[:total]).to eq(2)
44
- expect(summary[:matches]).to eq(2)
45
- expect(summary[:mismatches]).to eq(0)
46
- expect(summary[:accuracy_rate]).to eq(1.0)
47
- end
48
-
49
- it "identifies mismatches" do
50
- mismatched_results = [
51
- DecisionAgent::Testing::TestResult.new(
52
- scenario_id: "test_1",
53
- decision: "reject", # Wrong decision
54
- confidence: 0.95
55
- ),
56
- DecisionAgent::Testing::TestResult.new(
57
- scenario_id: "test_2",
58
- decision: "reject",
59
- confidence: 0.50 # Wrong confidence
60
- )
61
- ]
62
-
63
- summary = comparator.compare(mismatched_results, scenarios)
64
-
65
- expect(summary[:matches]).to eq(0)
66
- expect(summary[:mismatches]).to eq(2)
67
- expect(summary[:accuracy_rate]).to eq(0.0)
68
- expect(summary[:mismatches_detail].size).to eq(2)
69
- end
70
-
71
- it "handles confidence tolerance" do
72
- comparator_with_tolerance = DecisionAgent::Testing::TestResultComparator.new(
73
- confidence_tolerance: 0.1
74
- )
75
-
76
- results_with_tolerance = [
77
- DecisionAgent::Testing::TestResult.new(
78
- scenario_id: "test_1",
79
- decision: "approve",
80
- confidence: 0.96 # Within 0.1 tolerance of 0.95
81
- )
82
- ]
83
-
84
- scenarios_single = [scenarios[0]]
85
- summary = comparator_with_tolerance.compare(results_with_tolerance, scenarios_single)
86
-
87
- expect(summary[:matches]).to eq(1)
88
- expect(summary[:confidence_accuracy]).to eq(1.0)
89
- end
90
-
91
- it "handles fuzzy matching" do
92
- comparator_fuzzy = DecisionAgent::Testing::TestResultComparator.new(fuzzy_match: true)
93
-
94
- scenarios_fuzzy = [
95
- DecisionAgent::Testing::TestScenario.new(
96
- id: "test_1",
97
- context: { user_id: 123 },
98
- expected_decision: "APPROVE", # Uppercase
99
- expected_confidence: 0.95
100
- )
101
- ]
102
-
103
- results_fuzzy = [
104
- DecisionAgent::Testing::TestResult.new(
105
- scenario_id: "test_1",
106
- decision: "approve", # Lowercase - should match with fuzzy
107
- confidence: 0.95
108
- )
109
- ]
110
-
111
- summary = comparator_fuzzy.compare(results_fuzzy, scenarios_fuzzy)
112
- expect(summary[:matches]).to eq(1)
113
- end
114
-
115
- it "handles fuzzy matching with whitespace" do
116
- comparator_fuzzy = DecisionAgent::Testing::TestResultComparator.new(fuzzy_match: true)
117
-
118
- scenarios_fuzzy = [
119
- DecisionAgent::Testing::TestScenario.new(
120
- id: "test_1",
121
- context: { user_id: 123 },
122
- expected_decision: " approve ", # With spaces
123
- expected_confidence: 0.95
124
- )
125
- ]
126
-
127
- results_fuzzy = [
128
- DecisionAgent::Testing::TestResult.new(
129
- scenario_id: "test_1",
130
- decision: "approve", # Without spaces - should match with fuzzy
131
- confidence: 0.95
132
- )
133
- ]
134
-
135
- summary = comparator_fuzzy.compare(results_fuzzy, scenarios_fuzzy)
136
- expect(summary[:matches]).to eq(1)
137
- end
138
-
139
- it "handles nil expected confidence" do
140
- scenarios_nil_conf = [
141
- DecisionAgent::Testing::TestScenario.new(
142
- id: "test_1",
143
- context: { user_id: 123 },
144
- expected_decision: "approve",
145
- expected_confidence: nil
146
- )
147
- ]
148
-
149
- results_nil_conf = [
150
- DecisionAgent::Testing::TestResult.new(
151
- scenario_id: "test_1",
152
- decision: "approve",
153
- confidence: 0.95
154
- )
155
- ]
156
-
157
- summary = comparator.compare(results_nil_conf, scenarios_nil_conf)
158
- expect(summary[:matches]).to eq(1)
159
- end
160
-
161
- it "handles nil actual confidence when expected is present" do
162
- scenarios_with_conf = [
163
- DecisionAgent::Testing::TestScenario.new(
164
- id: "test_1",
165
- context: { user_id: 123 },
166
- expected_decision: "approve",
167
- expected_confidence: 0.95
168
- )
169
- ]
170
-
171
- results_no_conf = [
172
- DecisionAgent::Testing::TestResult.new(
173
- scenario_id: "test_1",
174
- decision: "approve",
175
- confidence: nil
176
- )
177
- ]
178
-
179
- summary = comparator.compare(results_no_conf, scenarios_with_conf)
180
- expect(summary[:matches]).to eq(0)
181
- expect(summary[:mismatches]).to eq(1)
182
- end
183
-
184
- it "handles missing results for scenarios" do
185
- scenarios_missing = [
186
- DecisionAgent::Testing::TestScenario.new(
187
- id: "test_1",
188
- context: { user_id: 123 },
189
- expected_decision: "approve",
190
- expected_confidence: 0.95
191
- ),
192
- DecisionAgent::Testing::TestScenario.new(
193
- id: "test_2",
194
- context: { user_id: 456 },
195
- expected_decision: "reject",
196
- expected_confidence: 0.80
197
- )
198
- ]
199
-
200
- # Only provide result for test_1
201
- results_missing = [
202
- DecisionAgent::Testing::TestResult.new(
203
- scenario_id: "test_1",
204
- decision: "approve",
205
- confidence: 0.95
206
- )
207
- ]
208
-
209
- summary = comparator.compare(results_missing, scenarios_missing)
210
- # Should only compare test_1 since test_2 has no result
211
- expect(summary[:total]).to eq(1)
212
- end
213
-
214
- it "handles confidence outside tolerance" do
215
- comparator_strict = DecisionAgent::Testing::TestResultComparator.new(
216
- confidence_tolerance: 0.01
217
- )
218
-
219
- scenarios_strict = [
220
- DecisionAgent::Testing::TestScenario.new(
221
- id: "test_1",
222
- context: { user_id: 123 },
223
- expected_decision: "approve",
224
- expected_confidence: 0.95
225
- )
226
- ]
227
-
228
- results_outside = [
229
- DecisionAgent::Testing::TestResult.new(
230
- scenario_id: "test_1",
231
- decision: "approve",
232
- confidence: 0.98 # Outside 0.01 tolerance
233
- )
234
- ]
235
-
236
- summary = comparator_strict.compare(results_outside, scenarios_strict)
237
- expect(summary[:matches]).to eq(0)
238
- expect(summary[:confidence_accuracy]).to eq(0.0)
239
- end
240
-
241
- it "handles missing expected results" do
242
- scenarios_no_expected = [
243
- DecisionAgent::Testing::TestScenario.new(
244
- id: "test_1",
245
- context: { user_id: 123 }
246
- # No expected_decision
247
- )
248
- ]
249
-
250
- summary = comparator.compare(results, scenarios_no_expected)
251
-
252
- # Should not compare scenarios without expected results
253
- expect(summary[:total]).to eq(0)
254
- end
255
-
256
- it "handles failed test results" do
257
- failed_results = [
258
- DecisionAgent::Testing::TestResult.new(
259
- scenario_id: "test_1",
260
- error: StandardError.new("Test failed")
261
- )
262
- ]
263
-
264
- # Only compare scenarios that have expected results
265
- scenarios_with_expected = scenarios.select(&:expected_result?)
266
- summary = comparator.compare(failed_results, scenarios_with_expected)
267
-
268
- expect(summary[:mismatches]).to eq(1)
269
- expect(comparator.comparison_results[0].match).to be false
270
- end
271
- end
272
-
273
- describe "#generate_summary" do
274
- it "returns empty summary when no comparisons" do
275
- summary = comparator.generate_summary
276
-
277
- expect(summary[:total]).to eq(0)
278
- expect(summary[:matches]).to eq(0)
279
- expect(summary[:accuracy_rate]).to eq(0.0)
280
- end
281
- end
282
-
283
- describe "#export_csv" do
284
- it "exports comparison results to CSV" do
285
- scenarios = [
286
- DecisionAgent::Testing::TestScenario.new(
287
- id: "test_1",
288
- context: { user_id: 123 },
289
- expected_decision: "approve",
290
- expected_confidence: 0.95
291
- )
292
- ]
293
-
294
- results = [
295
- DecisionAgent::Testing::TestResult.new(
296
- scenario_id: "test_1",
297
- decision: "approve",
298
- confidence: 0.95
299
- )
300
- ]
301
-
302
- comparator.compare(results, scenarios)
303
-
304
- file = Tempfile.new(["comparison", ".csv"])
305
- comparator.export_csv(file.path)
306
-
307
- content = File.read(file.path)
308
- expect(content).to include("scenario_id")
309
- expect(content).to include("test_1")
310
- expect(content).to include("true") # match
311
-
312
- file.unlink
313
- end
314
- end
315
-
316
- describe "#export_json" do
317
- it "exports comparison results to JSON" do
318
- scenarios = [
319
- DecisionAgent::Testing::TestScenario.new(
320
- id: "test_1",
321
- context: { user_id: 123 },
322
- expected_decision: "approve",
323
- expected_confidence: 0.95
324
- )
325
- ]
326
-
327
- results = [
328
- DecisionAgent::Testing::TestResult.new(
329
- scenario_id: "test_1",
330
- decision: "approve",
331
- confidence: 0.95
332
- )
333
- ]
334
-
335
- comparator.compare(results, scenarios)
336
-
337
- file = Tempfile.new(["comparison", ".json"])
338
- comparator.export_json(file.path)
339
-
340
- content = JSON.parse(File.read(file.path))
341
- expect(content).to have_key("summary")
342
- expect(content).to have_key("results")
343
- expect(content["summary"]["total"]).to eq(1)
344
-
345
- file.unlink
346
- end
347
-
348
- it "handles empty comparison results" do
349
- file = Tempfile.new(["comparison", ".csv"])
350
- comparator.export_csv(file.path)
351
-
352
- content = File.read(file.path)
353
- expect(content).to include("scenario_id")
354
-
355
- file.unlink
356
- end
357
- end
358
-
359
- describe "ComparisonResult" do
360
- let(:comparison_result) do
361
- DecisionAgent::Testing::ComparisonResult.new(
362
- scenario_id: "test_1",
363
- match: true,
364
- decision_match: true,
365
- confidence_match: true,
366
- differences: [],
367
- actual: { decision: "approve", confidence: 0.95 },
368
- expected: { decision: "approve", confidence: 0.95 }
369
- )
370
- end
371
-
372
- it "creates a comparison result" do
373
- expect(comparison_result.scenario_id).to eq("test_1")
374
- expect(comparison_result.match).to be true
375
- expect(comparison_result.decision_match).to be true
376
- expect(comparison_result.confidence_match).to be true
377
- end
378
-
379
- it "converts to hash" do
380
- hash = comparison_result.to_h
381
-
382
- expect(hash[:scenario_id]).to eq("test_1")
383
- expect(hash[:match]).to be true
384
- expect(hash[:actual][:decision]).to eq("approve")
385
- expect(hash[:expected][:decision]).to eq("approve")
386
- end
387
-
388
- it "freezes the comparison result" do
389
- expect(comparison_result.frozen?).to be true
390
- end
391
- end
392
- end
@@ -1,113 +0,0 @@
1
- require "spec_helper"
2
-
3
- RSpec.describe DecisionAgent::Testing::TestScenario do
4
- describe "#initialize" do
5
- it "creates a test scenario with required fields" do
6
- scenario = DecisionAgent::Testing::TestScenario.new(
7
- id: "test_1",
8
- context: { user_id: 123, amount: 1000 }
9
- )
10
-
11
- expect(scenario.id).to eq("test_1")
12
- expect(scenario.context).to eq({ user_id: 123, amount: 1000 })
13
- expect(scenario.expected_decision).to be_nil
14
- expect(scenario.expected_confidence).to be_nil
15
- end
16
-
17
- it "creates a test scenario with expected results" do
18
- scenario = DecisionAgent::Testing::TestScenario.new(
19
- id: "test_2",
20
- context: { user_id: 456 },
21
- expected_decision: "approve",
22
- expected_confidence: 0.95
23
- )
24
-
25
- expect(scenario.expected_decision).to eq("approve")
26
- expect(scenario.expected_confidence).to eq(0.95)
27
- end
28
-
29
- it "freezes the scenario for immutability" do
30
- scenario = DecisionAgent::Testing::TestScenario.new(
31
- id: "test_3",
32
- context: { key: "value" }
33
- )
34
-
35
- expect(scenario.frozen?).to be true
36
- end
37
- end
38
-
39
- describe "#expected_result?" do
40
- it "returns true when expected_decision is set" do
41
- scenario = DecisionAgent::Testing::TestScenario.new(
42
- id: "test_1",
43
- context: { key: "value" },
44
- expected_decision: "approve"
45
- )
46
-
47
- expect(scenario.expected_result?).to be true
48
- end
49
-
50
- it "returns false when expected_decision is nil" do
51
- scenario = DecisionAgent::Testing::TestScenario.new(
52
- id: "test_1",
53
- context: { key: "value" }
54
- )
55
-
56
- expect(scenario.expected_result?).to be false
57
- end
58
- end
59
-
60
- describe "#to_h" do
61
- it "converts scenario to hash" do
62
- scenario = DecisionAgent::Testing::TestScenario.new(
63
- id: "test_1",
64
- context: { user_id: 123 },
65
- expected_decision: "approve",
66
- expected_confidence: 0.9,
67
- metadata: { source: "csv" }
68
- )
69
-
70
- hash = scenario.to_h
71
-
72
- expect(hash).to eq({
73
- id: "test_1",
74
- context: { user_id: 123 },
75
- expected_decision: "approve",
76
- expected_confidence: 0.9,
77
- metadata: { source: "csv" }
78
- })
79
- end
80
- end
81
-
82
- describe "#==" do
83
- it "returns true for equal scenarios" do
84
- scenario1 = DecisionAgent::Testing::TestScenario.new(
85
- id: "test_1",
86
- context: { user_id: 123 },
87
- expected_decision: "approve"
88
- )
89
-
90
- scenario2 = DecisionAgent::Testing::TestScenario.new(
91
- id: "test_1",
92
- context: { user_id: 123 },
93
- expected_decision: "approve"
94
- )
95
-
96
- expect(scenario1).to eq(scenario2)
97
- end
98
-
99
- it "returns false for different scenarios" do
100
- scenario1 = DecisionAgent::Testing::TestScenario.new(
101
- id: "test_1",
102
- context: { user_id: 123 }
103
- )
104
-
105
- scenario2 = DecisionAgent::Testing::TestScenario.new(
106
- id: "test_2",
107
- context: { user_id: 123 }
108
- )
109
-
110
- expect(scenario1).not_to eq(scenario2)
111
- end
112
- end
113
- end