decision_agent 0.3.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -14
  3. data/lib/decision_agent/ab_testing/ab_test.rb +5 -1
  4. data/lib/decision_agent/ab_testing/ab_test_assignment.rb +2 -0
  5. data/lib/decision_agent/ab_testing/ab_test_manager.rb +2 -0
  6. data/lib/decision_agent/ab_testing/ab_testing_agent.rb +2 -0
  7. data/lib/decision_agent/ab_testing/storage/activerecord_adapter.rb +2 -13
  8. data/lib/decision_agent/ab_testing/storage/adapter.rb +2 -0
  9. data/lib/decision_agent/ab_testing/storage/memory_adapter.rb +2 -0
  10. data/lib/decision_agent/agent.rb +78 -9
  11. data/lib/decision_agent/audit/adapter.rb +2 -0
  12. data/lib/decision_agent/audit/logger_adapter.rb +2 -0
  13. data/lib/decision_agent/audit/null_adapter.rb +2 -0
  14. data/lib/decision_agent/auth/access_audit_logger.rb +2 -0
  15. data/lib/decision_agent/auth/authenticator.rb +2 -0
  16. data/lib/decision_agent/auth/password_reset_manager.rb +2 -0
  17. data/lib/decision_agent/auth/password_reset_token.rb +2 -0
  18. data/lib/decision_agent/auth/permission.rb +2 -0
  19. data/lib/decision_agent/auth/permission_checker.rb +2 -0
  20. data/lib/decision_agent/auth/rbac_adapter.rb +2 -0
  21. data/lib/decision_agent/auth/rbac_config.rb +2 -0
  22. data/lib/decision_agent/auth/role.rb +2 -0
  23. data/lib/decision_agent/auth/session.rb +2 -0
  24. data/lib/decision_agent/auth/session_manager.rb +2 -0
  25. data/lib/decision_agent/auth/user.rb +2 -0
  26. data/lib/decision_agent/context.rb +14 -0
  27. data/lib/decision_agent/decision.rb +113 -4
  28. data/lib/decision_agent/dmn/adapter.rb +2 -0
  29. data/lib/decision_agent/dmn/cache.rb +2 -2
  30. data/lib/decision_agent/dmn/decision_graph.rb +7 -7
  31. data/lib/decision_agent/dmn/decision_tree.rb +16 -8
  32. data/lib/decision_agent/dmn/errors.rb +2 -0
  33. data/lib/decision_agent/dmn/exporter.rb +2 -0
  34. data/lib/decision_agent/dmn/feel/evaluator.rb +130 -114
  35. data/lib/decision_agent/dmn/feel/functions.rb +2 -0
  36. data/lib/decision_agent/dmn/feel/parser.rb +2 -0
  37. data/lib/decision_agent/dmn/feel/simple_parser.rb +98 -77
  38. data/lib/decision_agent/dmn/feel/transformer.rb +56 -102
  39. data/lib/decision_agent/dmn/feel/types.rb +2 -0
  40. data/lib/decision_agent/dmn/importer.rb +2 -0
  41. data/lib/decision_agent/dmn/model.rb +2 -4
  42. data/lib/decision_agent/dmn/parser.rb +2 -0
  43. data/lib/decision_agent/dmn/testing.rb +3 -2
  44. data/lib/decision_agent/dmn/validator.rb +5 -3
  45. data/lib/decision_agent/dmn/visualizer.rb +7 -6
  46. data/lib/decision_agent/dsl/condition_evaluator.rb +242 -1375
  47. data/lib/decision_agent/dsl/helpers/cache_helpers.rb +82 -0
  48. data/lib/decision_agent/dsl/helpers/comparison_helpers.rb +98 -0
  49. data/lib/decision_agent/dsl/helpers/date_helpers.rb +91 -0
  50. data/lib/decision_agent/dsl/helpers/geospatial_helpers.rb +85 -0
  51. data/lib/decision_agent/dsl/helpers/operator_evaluation_helpers.rb +160 -0
  52. data/lib/decision_agent/dsl/helpers/parameter_parsing_helpers.rb +206 -0
  53. data/lib/decision_agent/dsl/helpers/template_helpers.rb +39 -0
  54. data/lib/decision_agent/dsl/helpers/utility_helpers.rb +45 -0
  55. data/lib/decision_agent/dsl/operators/base.rb +70 -0
  56. data/lib/decision_agent/dsl/operators/basic_comparison_operators.rb +80 -0
  57. data/lib/decision_agent/dsl/operators/collection_operators.rb +60 -0
  58. data/lib/decision_agent/dsl/operators/date_arithmetic_operators.rb +206 -0
  59. data/lib/decision_agent/dsl/operators/date_time_operators.rb +47 -0
  60. data/lib/decision_agent/dsl/operators/duration_operators.rb +149 -0
  61. data/lib/decision_agent/dsl/operators/financial_operators.rb +237 -0
  62. data/lib/decision_agent/dsl/operators/geospatial_operators.rb +106 -0
  63. data/lib/decision_agent/dsl/operators/mathematical_operators.rb +234 -0
  64. data/lib/decision_agent/dsl/operators/moving_window_operators.rb +135 -0
  65. data/lib/decision_agent/dsl/operators/numeric_operators.rb +120 -0
  66. data/lib/decision_agent/dsl/operators/rate_operators.rb +65 -0
  67. data/lib/decision_agent/dsl/operators/statistical_aggregations.rb +187 -0
  68. data/lib/decision_agent/dsl/operators/string_aggregations.rb +84 -0
  69. data/lib/decision_agent/dsl/operators/string_operators.rb +72 -0
  70. data/lib/decision_agent/dsl/operators/time_component_operators.rb +72 -0
  71. data/lib/decision_agent/dsl/rule_parser.rb +2 -0
  72. data/lib/decision_agent/dsl/schema_validator.rb +37 -14
  73. data/lib/decision_agent/errors.rb +2 -0
  74. data/lib/decision_agent/evaluation.rb +14 -2
  75. data/lib/decision_agent/evaluators/base.rb +2 -0
  76. data/lib/decision_agent/evaluators/dmn_evaluator.rb +108 -19
  77. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +56 -11
  78. data/lib/decision_agent/evaluators/static_evaluator.rb +2 -0
  79. data/lib/decision_agent/explainability/condition_trace.rb +85 -0
  80. data/lib/decision_agent/explainability/explainability_result.rb +50 -0
  81. data/lib/decision_agent/explainability/rule_trace.rb +41 -0
  82. data/lib/decision_agent/explainability/trace_collector.rb +26 -0
  83. data/lib/decision_agent/monitoring/alert_manager.rb +7 -16
  84. data/lib/decision_agent/monitoring/dashboard_server.rb +383 -250
  85. data/lib/decision_agent/monitoring/metrics_collector.rb +2 -0
  86. data/lib/decision_agent/monitoring/monitored_agent.rb +2 -0
  87. data/lib/decision_agent/monitoring/prometheus_exporter.rb +3 -1
  88. data/lib/decision_agent/replay/replay.rb +4 -1
  89. data/lib/decision_agent/scoring/base.rb +2 -0
  90. data/lib/decision_agent/scoring/consensus.rb +2 -0
  91. data/lib/decision_agent/scoring/max_weight.rb +2 -0
  92. data/lib/decision_agent/scoring/threshold.rb +2 -0
  93. data/lib/decision_agent/scoring/weighted_average.rb +2 -0
  94. data/lib/decision_agent/simulation/errors.rb +20 -0
  95. data/lib/decision_agent/simulation/impact_analyzer.rb +500 -0
  96. data/lib/decision_agent/simulation/monte_carlo_simulator.rb +638 -0
  97. data/lib/decision_agent/simulation/replay_engine.rb +488 -0
  98. data/lib/decision_agent/simulation/scenario_engine.rb +320 -0
  99. data/lib/decision_agent/simulation/scenario_library.rb +165 -0
  100. data/lib/decision_agent/simulation/shadow_test_engine.rb +274 -0
  101. data/lib/decision_agent/simulation/what_if_analyzer.rb +1008 -0
  102. data/lib/decision_agent/simulation.rb +19 -0
  103. data/lib/decision_agent/testing/batch_test_importer.rb +6 -2
  104. data/lib/decision_agent/testing/batch_test_runner.rb +5 -2
  105. data/lib/decision_agent/testing/test_coverage_analyzer.rb +2 -0
  106. data/lib/decision_agent/testing/test_result_comparator.rb +2 -0
  107. data/lib/decision_agent/testing/test_scenario.rb +2 -0
  108. data/lib/decision_agent/version.rb +3 -1
  109. data/lib/decision_agent/versioning/activerecord_adapter.rb +108 -43
  110. data/lib/decision_agent/versioning/adapter.rb +9 -0
  111. data/lib/decision_agent/versioning/file_storage_adapter.rb +19 -6
  112. data/lib/decision_agent/versioning/version_manager.rb +9 -0
  113. data/lib/decision_agent/web/dmn_editor/serialization.rb +74 -0
  114. data/lib/decision_agent/web/dmn_editor/xml_builder.rb +107 -0
  115. data/lib/decision_agent/web/dmn_editor.rb +8 -67
  116. data/lib/decision_agent/web/middleware/auth_middleware.rb +2 -0
  117. data/lib/decision_agent/web/middleware/permission_middleware.rb +3 -1
  118. data/lib/decision_agent/web/public/app.js +186 -26
  119. data/lib/decision_agent/web/public/batch_testing.html +80 -6
  120. data/lib/decision_agent/web/public/dmn-editor.html +2 -2
  121. data/lib/decision_agent/web/public/dmn-editor.js +74 -8
  122. data/lib/decision_agent/web/public/index.html +69 -3
  123. data/lib/decision_agent/web/public/login.html +1 -1
  124. data/lib/decision_agent/web/public/sample_batch.csv +11 -0
  125. data/lib/decision_agent/web/public/sample_impact.csv +11 -0
  126. data/lib/decision_agent/web/public/sample_replay.csv +11 -0
  127. data/lib/decision_agent/web/public/sample_rules.json +118 -0
  128. data/lib/decision_agent/web/public/sample_shadow.csv +11 -0
  129. data/lib/decision_agent/web/public/sample_whatif.csv +11 -0
  130. data/lib/decision_agent/web/public/simulation.html +146 -0
  131. data/lib/decision_agent/web/public/simulation_impact.html +495 -0
  132. data/lib/decision_agent/web/public/simulation_replay.html +547 -0
  133. data/lib/decision_agent/web/public/simulation_shadow.html +561 -0
  134. data/lib/decision_agent/web/public/simulation_whatif.html +549 -0
  135. data/lib/decision_agent/web/public/styles.css +65 -0
  136. data/lib/decision_agent/web/public/users.html +1 -1
  137. data/lib/decision_agent/web/rack_helpers.rb +106 -0
  138. data/lib/decision_agent/web/rack_request_helpers.rb +196 -0
  139. data/lib/decision_agent/web/server.rb +2126 -1374
  140. data/lib/decision_agent.rb +19 -1
  141. data/lib/generators/decision_agent/install/install_generator.rb +2 -0
  142. data/lib/generators/decision_agent/install/templates/ab_test_assignment_model.rb +2 -0
  143. data/lib/generators/decision_agent/install/templates/ab_test_model.rb +2 -0
  144. data/lib/generators/decision_agent/install/templates/ab_testing_migration.rb +2 -0
  145. data/lib/generators/decision_agent/install/templates/migration.rb +2 -0
  146. data/lib/generators/decision_agent/install/templates/rule.rb +2 -0
  147. data/lib/generators/decision_agent/install/templates/rule_version.rb +2 -0
  148. metadata +103 -89
  149. data/spec/ab_testing/ab_test_assignment_spec.rb +0 -253
  150. data/spec/ab_testing/ab_test_manager_spec.rb +0 -612
  151. data/spec/ab_testing/ab_test_spec.rb +0 -270
  152. data/spec/ab_testing/ab_testing_agent_spec.rb +0 -655
  153. data/spec/ab_testing/storage/adapter_spec.rb +0 -64
  154. data/spec/ab_testing/storage/memory_adapter_spec.rb +0 -485
  155. data/spec/activerecord_thread_safety_spec.rb +0 -553
  156. data/spec/advanced_operators_spec.rb +0 -3150
  157. data/spec/agent_spec.rb +0 -289
  158. data/spec/api_contract_spec.rb +0 -430
  159. data/spec/audit_adapters_spec.rb +0 -92
  160. data/spec/auth/access_audit_logger_spec.rb +0 -394
  161. data/spec/auth/authenticator_spec.rb +0 -112
  162. data/spec/auth/password_reset_spec.rb +0 -294
  163. data/spec/auth/permission_checker_spec.rb +0 -207
  164. data/spec/auth/permission_spec.rb +0 -73
  165. data/spec/auth/rbac_adapter_spec.rb +0 -778
  166. data/spec/auth/rbac_config_spec.rb +0 -82
  167. data/spec/auth/role_spec.rb +0 -51
  168. data/spec/auth/session_manager_spec.rb +0 -172
  169. data/spec/auth/session_spec.rb +0 -112
  170. data/spec/auth/user_spec.rb +0 -130
  171. data/spec/comprehensive_edge_cases_spec.rb +0 -1777
  172. data/spec/context_spec.rb +0 -127
  173. data/spec/decision_agent_spec.rb +0 -96
  174. data/spec/decision_spec.rb +0 -423
  175. data/spec/dmn/decision_graph_spec.rb +0 -282
  176. data/spec/dmn/decision_tree_spec.rb +0 -203
  177. data/spec/dmn/feel/errors_spec.rb +0 -18
  178. data/spec/dmn/feel/functions_spec.rb +0 -400
  179. data/spec/dmn/feel/simple_parser_spec.rb +0 -274
  180. data/spec/dmn/feel/types_spec.rb +0 -176
  181. data/spec/dmn/feel_parser_spec.rb +0 -489
  182. data/spec/dmn/hit_policy_spec.rb +0 -202
  183. data/spec/dmn/integration_spec.rb +0 -226
  184. data/spec/dsl/condition_evaluator_spec.rb +0 -774
  185. data/spec/dsl_validation_spec.rb +0 -648
  186. data/spec/edge_cases_spec.rb +0 -353
  187. data/spec/evaluation_spec.rb +0 -364
  188. data/spec/evaluation_validator_spec.rb +0 -165
  189. data/spec/examples/feedback_aware_evaluator_spec.rb +0 -460
  190. data/spec/examples.txt +0 -1909
  191. data/spec/fixtures/dmn/complex_decision.dmn +0 -81
  192. data/spec/fixtures/dmn/invalid_structure.dmn +0 -31
  193. data/spec/fixtures/dmn/simple_decision.dmn +0 -40
  194. data/spec/issue_verification_spec.rb +0 -759
  195. data/spec/json_rule_evaluator_spec.rb +0 -587
  196. data/spec/monitoring/alert_manager_spec.rb +0 -378
  197. data/spec/monitoring/metrics_collector_spec.rb +0 -501
  198. data/spec/monitoring/monitored_agent_spec.rb +0 -225
  199. data/spec/monitoring/prometheus_exporter_spec.rb +0 -242
  200. data/spec/monitoring/storage/activerecord_adapter_spec.rb +0 -498
  201. data/spec/monitoring/storage/base_adapter_spec.rb +0 -61
  202. data/spec/monitoring/storage/memory_adapter_spec.rb +0 -247
  203. data/spec/performance_optimizations_spec.rb +0 -493
  204. data/spec/replay_edge_cases_spec.rb +0 -699
  205. data/spec/replay_spec.rb +0 -210
  206. data/spec/rfc8785_canonicalization_spec.rb +0 -215
  207. data/spec/scoring_spec.rb +0 -225
  208. data/spec/spec_helper.rb +0 -60
  209. data/spec/testing/batch_test_importer_spec.rb +0 -693
  210. data/spec/testing/batch_test_runner_spec.rb +0 -307
  211. data/spec/testing/test_coverage_analyzer_spec.rb +0 -292
  212. data/spec/testing/test_result_comparator_spec.rb +0 -392
  213. data/spec/testing/test_scenario_spec.rb +0 -113
  214. data/spec/thread_safety_spec.rb +0 -490
  215. data/spec/thread_safety_spec.rb.broken +0 -878
  216. data/spec/versioning/adapter_spec.rb +0 -156
  217. data/spec/versioning_spec.rb +0 -1030
  218. data/spec/web/middleware/auth_middleware_spec.rb +0 -133
  219. data/spec/web/middleware/permission_middleware_spec.rb +0 -247
  220. data/spec/web_ui_rack_spec.rb +0 -2134
@@ -1,699 +0,0 @@
1
- require "spec_helper"
2
-
3
- RSpec.describe "DecisionAgent::Replay Edge Cases" do
4
- describe "handling rule changes" do
5
- let(:original_rules) do
6
- {
7
- version: "1.0",
8
- ruleset: "approval",
9
- rules: [
10
- {
11
- id: "auto_approve",
12
- if: { field: "score", op: "gte", value: 80 },
13
- then: { decision: "approve", weight: 0.9, reason: "High score" }
14
- }
15
- ]
16
- }
17
- end
18
-
19
- let(:modified_rules) do
20
- {
21
- version: "2.0",
22
- ruleset: "approval",
23
- rules: [
24
- {
25
- id: "auto_approve",
26
- if: { field: "score", op: "gte", value: 90 }, # Changed threshold
27
- then: { decision: "approve", weight: 0.9, reason: "Very high score" }
28
- }
29
- ]
30
- }
31
- end
32
-
33
- it "successfully replays with strict mode when rules haven't changed" do
34
- evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
35
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
36
-
37
- original_result = agent.decide(context: { score: 85 })
38
-
39
- expect do
40
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
41
- end.not_to raise_error
42
- end
43
-
44
- it "detects differences in strict mode when rules have changed" do
45
- # Original decision with old rules
46
- evaluator_v1 = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
47
- agent_v1 = DecisionAgent::Agent.new(evaluators: [evaluator_v1])
48
- original_result = agent_v1.decide(context: { score: 85 })
49
-
50
- # Now the rules have changed (threshold increased from 80 to 90)
51
- # Score of 85 no longer matches, so replay should detect a difference
52
-
53
- # Replay uses the stored evaluations (not re-evaluating rules)
54
- # So it should succeed because replay uses static evaluators from the audit payload
55
- expect do
56
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
57
- end.not_to raise_error
58
-
59
- # The replayed result should match the original
60
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
61
- expect(replayed_result.decision).to eq(original_result.decision)
62
- expect(replayed_result.confidence).to eq(original_result.confidence)
63
- end
64
-
65
- it "allows evolution in non-strict mode" do
66
- evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: original_rules)
67
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
68
-
69
- original_result = agent.decide(context: { score: 85 })
70
-
71
- # In non-strict mode, differences are logged but don't raise errors
72
- expect do
73
- DecisionAgent::Replay.run(original_result.audit_payload, strict: false)
74
- end.not_to raise_error
75
- end
76
- end
77
-
78
- describe "metadata comparison" do
79
- it "preserves and replays metadata correctly" do
80
- rules = {
81
- version: "1.0",
82
- ruleset: "test",
83
- rules: [
84
- {
85
- id: "metadata_test_rule",
86
- if: { field: "user", op: "eq", value: "alice" },
87
- then: {
88
- decision: "approve",
89
- weight: 0.8,
90
- reason: "Trusted user"
91
- }
92
- }
93
- ]
94
- }
95
-
96
- evaluator = DecisionAgent::Evaluators::JsonRuleEvaluator.new(rules_json: rules)
97
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
98
-
99
- original_result = agent.decide(context: { user: "alice" })
100
-
101
- # Verify metadata is in the audit payload
102
- expect(original_result.audit_payload[:evaluations].first[:metadata]).to include(
103
- rule_id: "metadata_test_rule"
104
- )
105
-
106
- # Replay should preserve metadata
107
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
108
-
109
- expect(replayed_result.evaluations.first.metadata).to eq(
110
- original_result.evaluations.first.metadata
111
- )
112
- end
113
-
114
- it "handles metadata from static evaluators" do
115
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
116
- decision: "approve",
117
- weight: 0.7,
118
- reason: "No custom metadata"
119
- )
120
-
121
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
122
- original_result = agent.decide(context: { user: "bob" })
123
-
124
- # StaticEvaluator adds type: "static" by default
125
- expect(original_result.evaluations.first.metadata).to eq({ type: "static" })
126
-
127
- expect do
128
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
129
- end.not_to raise_error
130
-
131
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
132
- expect(replayed_result.evaluations.first.metadata).to eq({ type: "static" })
133
- end
134
-
135
- it "handles complex nested metadata" do
136
- evaluation = DecisionAgent::Evaluation.new(
137
- decision: "escalate",
138
- weight: 0.85,
139
- reason: "Complex case",
140
- evaluator_name: "CustomEvaluator",
141
- metadata: {
142
- user: { id: 123, role: "admin" },
143
- tags: %w[urgent important],
144
- history: [
145
- { action: "created", timestamp: "2025-01-01" },
146
- { action: "updated", timestamp: "2025-01-02" }
147
- ]
148
- }
149
- )
150
-
151
- static_eval = DecisionAgent::Evaluators::StaticEvaluator.new(
152
- decision: evaluation.decision,
153
- weight: evaluation.weight,
154
- reason: evaluation.reason
155
- )
156
-
157
- agent = DecisionAgent::Agent.new(evaluators: [static_eval])
158
- original_result = agent.decide(context: { test: true })
159
-
160
- # Manually construct audit payload with complex metadata
161
- payload = original_result.audit_payload.dup
162
- payload[:evaluations] = [evaluation.to_h]
163
-
164
- replayed_result = DecisionAgent::Replay.run(payload, strict: false)
165
-
166
- expect(replayed_result.evaluations.first.metadata).to be_a(Hash)
167
- end
168
- end
169
-
170
- describe "handling missing evaluators in replay" do
171
- it "replays successfully even if original evaluator class doesn't exist" do
172
- # This simulates a scenario where we had a CustomEvaluator that no longer exists
173
- # but we can still replay the decision from the audit log
174
-
175
- # WeightedAverage normalizes confidence: with one eval of weight 0.9, confidence = 0.9/0.9 = 1.0
176
- # So we need to use the correct confidence value that WeightedAverage would produce
177
- audit_payload = {
178
- timestamp: "2025-01-15T10:00:00.123456Z",
179
- context: { user: "charlie", action: "login" },
180
- feedback: {},
181
- evaluations: [
182
- {
183
- decision: "allow",
184
- weight: 0.9,
185
- reason: "User authenticated successfully",
186
- evaluator_name: "DeletedCustomAuthEvaluator", # This evaluator no longer exists
187
- metadata: { auth_method: "oauth", provider: "google" }
188
- }
189
- ],
190
- decision: "allow",
191
- confidence: 1.0, # WeightedAverage normalizes single eval to 1.0
192
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
193
- agent_version: "0.1.0",
194
- deterministic_hash: "abc123"
195
- }
196
-
197
- # Replay should work because it uses StaticEvaluator, not the original evaluator
198
- expect do
199
- DecisionAgent::Replay.run(audit_payload, strict: true)
200
- end.not_to raise_error
201
-
202
- replayed_result = DecisionAgent::Replay.run(audit_payload, strict: true)
203
-
204
- expect(replayed_result.decision).to eq("allow")
205
- expect(replayed_result.confidence).to eq(1.0)
206
- expect(replayed_result.evaluations.first.evaluator_name).to eq("DeletedCustomAuthEvaluator")
207
- end
208
-
209
- it "handles multiple evaluators where some are missing" do
210
- # WeightedAverage with two evals agreeing: confidence = (0.8 + 0.7) / (0.8 + 0.7) = 1.0
211
- audit_payload = {
212
- timestamp: "2025-01-15T10:00:00.123456Z",
213
- context: { user: "dave" },
214
- feedback: {},
215
- evaluations: [
216
- {
217
- decision: "approve",
218
- weight: 0.8,
219
- reason: "Rule matched",
220
- evaluator_name: "RuleEngine",
221
- metadata: { rule_id: "rule_123" }
222
- },
223
- {
224
- decision: "approve",
225
- weight: 0.7,
226
- reason: "ML model prediction",
227
- evaluator_name: "NonExistentMLEvaluator", # Missing evaluator
228
- metadata: { model_version: "v2.1" }
229
- }
230
- ],
231
- decision: "approve",
232
- confidence: 1.0, # Both agree, so 100% confidence
233
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
234
- agent_version: "0.1.0",
235
- deterministic_hash: "def456"
236
- }
237
-
238
- replayed_result = DecisionAgent::Replay.run(audit_payload, strict: true)
239
-
240
- expect(replayed_result.decision).to eq("approve")
241
- expect(replayed_result.evaluations.size).to eq(2)
242
- expect(replayed_result.evaluations.map(&:evaluator_name)).to include("NonExistentMLEvaluator")
243
- end
244
- end
245
-
246
- describe "scoring strategy evolution" do
247
- it "handles unknown scoring strategies gracefully" do
248
- audit_payload = {
249
- timestamp: "2025-01-15T10:00:00.123456Z",
250
- context: { test: true },
251
- feedback: {},
252
- evaluations: [
253
- {
254
- decision: "approve",
255
- weight: 0.9,
256
- reason: "Test",
257
- evaluator_name: "TestEvaluator",
258
- metadata: {}
259
- }
260
- ],
261
- decision: "approve",
262
- confidence: 0.9,
263
- scoring_strategy: "DecisionAgent::Scoring::DeprecatedBayesianStrategy", # Doesn't exist
264
- agent_version: "0.1.0",
265
- deterministic_hash: "ghi789"
266
- }
267
-
268
- # Should fall back to WeightedAverage
269
- expect do
270
- DecisionAgent::Replay.run(audit_payload, strict: false)
271
- end.not_to raise_error
272
-
273
- replayed_result = DecisionAgent::Replay.run(audit_payload, strict: false)
274
- expect(replayed_result.decision).to eq("approve")
275
- end
276
-
277
- it "detects scoring strategy mismatch in strict mode" do
278
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
279
- decision: "approve",
280
- weight: 0.6,
281
- reason: "Test"
282
- )
283
-
284
- # Create decision with WeightedAverage
285
- agent_weighted = DecisionAgent::Agent.new(
286
- evaluators: [evaluator],
287
- scoring_strategy: DecisionAgent::Scoring::WeightedAverage.new
288
- )
289
-
290
- original_result = agent_weighted.decide(context: { test: true })
291
-
292
- # Replay uses the stored scoring strategy from the audit payload
293
- # So it should replay successfully
294
- expect do
295
- DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
296
- end.not_to raise_error
297
- end
298
- end
299
-
300
- describe "audit payload validation" do
301
- it "requires context field" do
302
- incomplete_payload = {
303
- evaluations: [],
304
- decision: "test",
305
- confidence: 0.5
306
- }
307
-
308
- expect do
309
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
310
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: context/)
311
- end
312
-
313
- it "requires evaluations field" do
314
- incomplete_payload = {
315
- context: { test: true },
316
- decision: "test",
317
- confidence: 0.5
318
- }
319
-
320
- expect do
321
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
322
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: evaluations/)
323
- end
324
-
325
- it "requires decision field" do
326
- incomplete_payload = {
327
- context: { test: true },
328
- evaluations: [],
329
- confidence: 0.5
330
- }
331
-
332
- expect do
333
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
334
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: decision/)
335
- end
336
-
337
- it "requires confidence field" do
338
- incomplete_payload = {
339
- context: { test: true },
340
- evaluations: [],
341
- decision: "test"
342
- }
343
-
344
- expect do
345
- DecisionAgent::Replay.run(incomplete_payload, strict: false)
346
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /missing required key: confidence/)
347
- end
348
-
349
- it "accepts both symbol and string keys" do
350
- # Use MaxWeight strategy which preserves the original weight as confidence
351
- payload_with_strings = {
352
- "timestamp" => "2025-01-15T10:00:00.123456Z",
353
- "context" => { "test" => true },
354
- "feedback" => {},
355
- "evaluations" => [
356
- {
357
- "decision" => "approve",
358
- "weight" => 0.9,
359
- "reason" => "Test",
360
- "evaluator_name" => "TestEvaluator",
361
- "metadata" => {}
362
- }
363
- ],
364
- "decision" => "approve",
365
- "confidence" => 0.9,
366
- "scoring_strategy" => "DecisionAgent::Scoring::MaxWeight"
367
- }
368
-
369
- expect do
370
- DecisionAgent::Replay.run(payload_with_strings, strict: true)
371
- end.not_to raise_error
372
- end
373
- end
374
-
375
- describe "deterministic hash verification" do
376
- it "can verify replay produced the same deterministic hash" do
377
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
378
- decision: "approve",
379
- weight: 0.8,
380
- reason: "Test"
381
- )
382
-
383
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
384
- original_result = agent.decide(context: { user: "test" })
385
-
386
- original_hash = original_result.audit_payload[:deterministic_hash]
387
-
388
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
389
- replayed_hash = replayed_result.audit_payload[:deterministic_hash]
390
-
391
- # Hashes should match because same context, evaluations, decision, confidence, and strategy
392
- expect(replayed_hash).to eq(original_hash)
393
- end
394
-
395
- it "hash changes when context changes" do
396
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
397
- decision: "approve",
398
- weight: 0.8,
399
- reason: "Test"
400
- )
401
-
402
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
403
-
404
- result1 = agent.decide(context: { user: "alice" })
405
- result2 = agent.decide(context: { user: "bob" })
406
-
407
- expect(result1.audit_payload[:deterministic_hash]).not_to eq(
408
- result2.audit_payload[:deterministic_hash]
409
- )
410
- end
411
- end
412
-
413
- describe "feedback preservation in replay" do
414
- it "preserves original feedback in replay" do
415
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
416
- decision: "approve",
417
- weight: 0.8,
418
- reason: "Test"
419
- )
420
-
421
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
422
-
423
- original_feedback = { user_id: "manager_123", source: "manual_review" }
424
- original_result = agent.decide(context: { test: true }, feedback: original_feedback)
425
-
426
- expect(original_result.audit_payload[:feedback]).to eq(original_feedback)
427
-
428
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
429
-
430
- expect(replayed_result.audit_payload[:feedback]).to eq(original_feedback)
431
- end
432
-
433
- it "handles empty feedback" do
434
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
435
- decision: "approve",
436
- weight: 0.8,
437
- reason: "Test"
438
- )
439
-
440
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
441
- original_result = agent.decide(context: { test: true })
442
-
443
- expect(original_result.audit_payload[:feedback]).to eq({})
444
-
445
- replayed_result = DecisionAgent::Replay.run(original_result.audit_payload, strict: true)
446
- expect(replayed_result.audit_payload[:feedback]).to eq({})
447
- end
448
- end
449
-
450
- describe "version mismatch scenarios" do
451
- it "logs warning when agent_version differs in non-strict mode" do
452
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
453
- decision: "approve",
454
- weight: 0.8,
455
- reason: "Test"
456
- )
457
-
458
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
459
- original_result = agent.decide(context: { test: true })
460
-
461
- # Modify agent_version
462
- modified_payload = original_result.audit_payload.dup
463
- modified_payload[:agent_version] = "99.0.0" # Different version
464
-
465
- # Non-strict mode should log but not raise
466
- expect do
467
- DecisionAgent::Replay.run(modified_payload, strict: false)
468
- end.not_to raise_error
469
-
470
- # Should successfully replay despite version difference
471
- replayed_result = DecisionAgent::Replay.run(modified_payload, strict: false)
472
- expect(replayed_result.decision).to eq("approve")
473
- end
474
-
475
- it "accepts different agent_version in non-strict mode" do
476
- audit_payload = {
477
- timestamp: "2025-01-15T10:00:00.123456Z",
478
- context: { test: true },
479
- feedback: {},
480
- evaluations: [
481
- {
482
- decision: "approve",
483
- weight: 0.9,
484
- reason: "Test",
485
- evaluator_name: "TestEvaluator",
486
- metadata: {}
487
- }
488
- ],
489
- decision: "approve",
490
- confidence: 1.0,
491
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
492
- agent_version: "0.0.1", # Old version
493
- deterministic_hash: "old_hash"
494
- }
495
-
496
- # Should accept and replay successfully
497
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
498
- expect(result.decision).to eq("approve")
499
- end
500
-
501
- it "replays successfully in strict mode regardless of version" do
502
- evaluator = DecisionAgent::Evaluators::StaticEvaluator.new(
503
- decision: "approve",
504
- weight: 0.8,
505
- reason: "Test"
506
- )
507
-
508
- agent = DecisionAgent::Agent.new(evaluators: [evaluator])
509
- original_result = agent.decide(context: { test: true })
510
-
511
- # Modify agent_version
512
- modified_payload = original_result.audit_payload.dup
513
- modified_payload[:agent_version] = "2.0.0"
514
-
515
- # Strict mode should still work because version is not part of deterministic comparison
516
- # (only decision and confidence are compared in strict mode)
517
- expect do
518
- DecisionAgent::Replay.run(modified_payload, strict: true)
519
- end.not_to raise_error
520
- end
521
- end
522
-
523
- describe "corrupted audit payload scenarios" do
524
- it "handles missing deterministic_hash gracefully" do
525
- audit_payload = {
526
- timestamp: "2025-01-15T10:00:00.123456Z",
527
- context: { test: true },
528
- feedback: {},
529
- evaluations: [
530
- {
531
- decision: "approve",
532
- weight: 0.9,
533
- reason: "Test",
534
- evaluator_name: "TestEvaluator",
535
- metadata: {}
536
- }
537
- ],
538
- decision: "approve",
539
- confidence: 1.0,
540
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
541
- agent_version: "0.1.0"
542
- # deterministic_hash is missing
543
- }
544
-
545
- # Should not raise error, just creates new hash during replay
546
- expect do
547
- DecisionAgent::Replay.run(audit_payload, strict: false)
548
- end.not_to raise_error
549
-
550
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
551
- expect(result.decision).to eq("approve")
552
- expect(result.audit_payload[:deterministic_hash]).to be_a(String)
553
- end
554
-
555
- it "handles invalid deterministic_hash gracefully" do
556
- audit_payload = {
557
- timestamp: "2025-01-15T10:00:00.123456Z",
558
- context: { test: true },
559
- feedback: {},
560
- evaluations: [
561
- {
562
- decision: "approve",
563
- weight: 0.9,
564
- reason: "Test",
565
- evaluator_name: "TestEvaluator",
566
- metadata: {}
567
- }
568
- ],
569
- decision: "approve",
570
- confidence: 1.0,
571
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage",
572
- agent_version: "0.1.0",
573
- deterministic_hash: "corrupted_invalid_hash_12345"
574
- }
575
-
576
- # Should replay successfully, generating new hash
577
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
578
- expect(result.decision).to eq("approve")
579
- # New hash should be different from corrupted one
580
- expect(result.audit_payload[:deterministic_hash]).not_to eq("corrupted_invalid_hash_12345")
581
- end
582
-
583
- it "validates required fields before replay" do
584
- # Missing context
585
- expect do
586
- DecisionAgent::Replay.run({ decision: "test", confidence: 0.5, evaluations: [] }, strict: true)
587
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /context/)
588
-
589
- # Missing evaluations
590
- expect do
591
- DecisionAgent::Replay.run({ context: {}, decision: "test", confidence: 0.5 }, strict: true)
592
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /evaluations/)
593
-
594
- # Missing decision
595
- expect do
596
- DecisionAgent::Replay.run({ context: {}, evaluations: [], confidence: 0.5 }, strict: true)
597
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /decision/)
598
-
599
- # Missing confidence
600
- expect do
601
- DecisionAgent::Replay.run({ context: {}, evaluations: [], decision: "test" }, strict: true)
602
- end.to raise_error(DecisionAgent::InvalidRuleDslError, /confidence/)
603
- end
604
-
605
- it "handles evaluation with invalid weight" do
606
- audit_payload = {
607
- timestamp: "2025-01-15T10:00:00.123456Z",
608
- context: { test: true },
609
- feedback: {},
610
- evaluations: [
611
- {
612
- decision: "approve",
613
- weight: 2.5, # Weight > 1.0, invalid
614
- reason: "Test",
615
- evaluator_name: "TestEvaluator",
616
- metadata: {}
617
- }
618
- ],
619
- decision: "approve",
620
- confidence: 1.0,
621
- scoring_strategy: "DecisionAgent::Scoring::WeightedAverage"
622
- }
623
-
624
- # Invalid weight (> 1.0) should raise error when creating Evaluation
625
- expect do
626
- DecisionAgent::Replay.run(audit_payload, strict: false)
627
- end.to raise_error(DecisionAgent::InvalidWeightError)
628
- end
629
-
630
- it "handles completely empty audit payload" do
631
- expect do
632
- DecisionAgent::Replay.run({}, strict: false)
633
- end.to raise_error(DecisionAgent::InvalidRuleDslError)
634
- end
635
-
636
- it "handles nil audit payload" do
637
- expect do
638
- DecisionAgent::Replay.run(nil, strict: false)
639
- end.to raise_error
640
- end
641
- end
642
-
643
- describe "scoring strategy class rename scenarios" do
644
- it "handles renamed scoring strategy class in non-strict mode" do
645
- audit_payload = {
646
- timestamp: "2025-01-15T10:00:00.123456Z",
647
- context: { test: true },
648
- feedback: {},
649
- evaluations: [
650
- {
651
- decision: "approve",
652
- weight: 0.9,
653
- reason: "Test",
654
- evaluator_name: "TestEvaluator",
655
- metadata: {}
656
- }
657
- ],
658
- decision: "approve",
659
- confidence: 0.9,
660
- scoring_strategy: "DecisionAgent::Scoring::OldStrategyName", # Renamed or deleted
661
- agent_version: "0.1.0"
662
- }
663
-
664
- # Should fall back to default strategy (WeightedAverage)
665
- expect do
666
- DecisionAgent::Replay.run(audit_payload, strict: false)
667
- end.not_to raise_error
668
-
669
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
670
- expect(result.decision).to eq("approve")
671
- end
672
-
673
- it "handles custom scoring strategy not in current codebase" do
674
- audit_payload = {
675
- timestamp: "2025-01-15T10:00:00.123456Z",
676
- context: { test: true },
677
- feedback: {},
678
- evaluations: [
679
- {
680
- decision: "approve",
681
- weight: 0.85,
682
- reason: "Test",
683
- evaluator_name: "TestEvaluator",
684
- metadata: {}
685
- }
686
- ],
687
- decision: "approve",
688
- confidence: 0.85,
689
- scoring_strategy: "MyCompany::CustomMLBasedScoringStrategy", # Custom strategy
690
- agent_version: "0.1.0"
691
- }
692
-
693
- # Should use fallback strategy
694
- result = DecisionAgent::Replay.run(audit_payload, strict: false)
695
- expect(result).not_to be_nil
696
- expect(result.decision).to eq("approve")
697
- end
698
- end
699
- end