decision_agent 0.3.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -14
  3. data/lib/decision_agent/ab_testing/ab_test.rb +5 -1
  4. data/lib/decision_agent/ab_testing/ab_test_assignment.rb +2 -0
  5. data/lib/decision_agent/ab_testing/ab_test_manager.rb +2 -0
  6. data/lib/decision_agent/ab_testing/ab_testing_agent.rb +2 -0
  7. data/lib/decision_agent/ab_testing/storage/activerecord_adapter.rb +2 -13
  8. data/lib/decision_agent/ab_testing/storage/adapter.rb +2 -0
  9. data/lib/decision_agent/ab_testing/storage/memory_adapter.rb +2 -0
  10. data/lib/decision_agent/agent.rb +78 -9
  11. data/lib/decision_agent/audit/adapter.rb +2 -0
  12. data/lib/decision_agent/audit/logger_adapter.rb +2 -0
  13. data/lib/decision_agent/audit/null_adapter.rb +2 -0
  14. data/lib/decision_agent/auth/access_audit_logger.rb +2 -0
  15. data/lib/decision_agent/auth/authenticator.rb +2 -0
  16. data/lib/decision_agent/auth/password_reset_manager.rb +2 -0
  17. data/lib/decision_agent/auth/password_reset_token.rb +2 -0
  18. data/lib/decision_agent/auth/permission.rb +2 -0
  19. data/lib/decision_agent/auth/permission_checker.rb +2 -0
  20. data/lib/decision_agent/auth/rbac_adapter.rb +2 -0
  21. data/lib/decision_agent/auth/rbac_config.rb +2 -0
  22. data/lib/decision_agent/auth/role.rb +2 -0
  23. data/lib/decision_agent/auth/session.rb +2 -0
  24. data/lib/decision_agent/auth/session_manager.rb +2 -0
  25. data/lib/decision_agent/auth/user.rb +2 -0
  26. data/lib/decision_agent/context.rb +14 -0
  27. data/lib/decision_agent/decision.rb +113 -4
  28. data/lib/decision_agent/dmn/adapter.rb +2 -0
  29. data/lib/decision_agent/dmn/cache.rb +2 -2
  30. data/lib/decision_agent/dmn/decision_graph.rb +7 -7
  31. data/lib/decision_agent/dmn/decision_tree.rb +16 -8
  32. data/lib/decision_agent/dmn/errors.rb +2 -0
  33. data/lib/decision_agent/dmn/exporter.rb +2 -0
  34. data/lib/decision_agent/dmn/feel/evaluator.rb +130 -114
  35. data/lib/decision_agent/dmn/feel/functions.rb +2 -0
  36. data/lib/decision_agent/dmn/feel/parser.rb +2 -0
  37. data/lib/decision_agent/dmn/feel/simple_parser.rb +98 -77
  38. data/lib/decision_agent/dmn/feel/transformer.rb +56 -102
  39. data/lib/decision_agent/dmn/feel/types.rb +2 -0
  40. data/lib/decision_agent/dmn/importer.rb +2 -0
  41. data/lib/decision_agent/dmn/model.rb +2 -4
  42. data/lib/decision_agent/dmn/parser.rb +2 -0
  43. data/lib/decision_agent/dmn/testing.rb +3 -2
  44. data/lib/decision_agent/dmn/validator.rb +5 -3
  45. data/lib/decision_agent/dmn/visualizer.rb +7 -6
  46. data/lib/decision_agent/dsl/condition_evaluator.rb +242 -1375
  47. data/lib/decision_agent/dsl/helpers/cache_helpers.rb +82 -0
  48. data/lib/decision_agent/dsl/helpers/comparison_helpers.rb +98 -0
  49. data/lib/decision_agent/dsl/helpers/date_helpers.rb +91 -0
  50. data/lib/decision_agent/dsl/helpers/geospatial_helpers.rb +85 -0
  51. data/lib/decision_agent/dsl/helpers/operator_evaluation_helpers.rb +160 -0
  52. data/lib/decision_agent/dsl/helpers/parameter_parsing_helpers.rb +206 -0
  53. data/lib/decision_agent/dsl/helpers/template_helpers.rb +39 -0
  54. data/lib/decision_agent/dsl/helpers/utility_helpers.rb +45 -0
  55. data/lib/decision_agent/dsl/operators/base.rb +70 -0
  56. data/lib/decision_agent/dsl/operators/basic_comparison_operators.rb +80 -0
  57. data/lib/decision_agent/dsl/operators/collection_operators.rb +60 -0
  58. data/lib/decision_agent/dsl/operators/date_arithmetic_operators.rb +206 -0
  59. data/lib/decision_agent/dsl/operators/date_time_operators.rb +47 -0
  60. data/lib/decision_agent/dsl/operators/duration_operators.rb +149 -0
  61. data/lib/decision_agent/dsl/operators/financial_operators.rb +237 -0
  62. data/lib/decision_agent/dsl/operators/geospatial_operators.rb +106 -0
  63. data/lib/decision_agent/dsl/operators/mathematical_operators.rb +234 -0
  64. data/lib/decision_agent/dsl/operators/moving_window_operators.rb +135 -0
  65. data/lib/decision_agent/dsl/operators/numeric_operators.rb +120 -0
  66. data/lib/decision_agent/dsl/operators/rate_operators.rb +65 -0
  67. data/lib/decision_agent/dsl/operators/statistical_aggregations.rb +187 -0
  68. data/lib/decision_agent/dsl/operators/string_aggregations.rb +84 -0
  69. data/lib/decision_agent/dsl/operators/string_operators.rb +72 -0
  70. data/lib/decision_agent/dsl/operators/time_component_operators.rb +72 -0
  71. data/lib/decision_agent/dsl/rule_parser.rb +2 -0
  72. data/lib/decision_agent/dsl/schema_validator.rb +37 -14
  73. data/lib/decision_agent/errors.rb +2 -0
  74. data/lib/decision_agent/evaluation.rb +14 -2
  75. data/lib/decision_agent/evaluators/base.rb +2 -0
  76. data/lib/decision_agent/evaluators/dmn_evaluator.rb +108 -19
  77. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +56 -11
  78. data/lib/decision_agent/evaluators/static_evaluator.rb +2 -0
  79. data/lib/decision_agent/explainability/condition_trace.rb +85 -0
  80. data/lib/decision_agent/explainability/explainability_result.rb +50 -0
  81. data/lib/decision_agent/explainability/rule_trace.rb +41 -0
  82. data/lib/decision_agent/explainability/trace_collector.rb +26 -0
  83. data/lib/decision_agent/monitoring/alert_manager.rb +7 -16
  84. data/lib/decision_agent/monitoring/dashboard_server.rb +383 -250
  85. data/lib/decision_agent/monitoring/metrics_collector.rb +2 -0
  86. data/lib/decision_agent/monitoring/monitored_agent.rb +2 -0
  87. data/lib/decision_agent/monitoring/prometheus_exporter.rb +3 -1
  88. data/lib/decision_agent/replay/replay.rb +4 -1
  89. data/lib/decision_agent/scoring/base.rb +2 -0
  90. data/lib/decision_agent/scoring/consensus.rb +2 -0
  91. data/lib/decision_agent/scoring/max_weight.rb +2 -0
  92. data/lib/decision_agent/scoring/threshold.rb +2 -0
  93. data/lib/decision_agent/scoring/weighted_average.rb +2 -0
  94. data/lib/decision_agent/simulation/errors.rb +20 -0
  95. data/lib/decision_agent/simulation/impact_analyzer.rb +500 -0
  96. data/lib/decision_agent/simulation/monte_carlo_simulator.rb +638 -0
  97. data/lib/decision_agent/simulation/replay_engine.rb +488 -0
  98. data/lib/decision_agent/simulation/scenario_engine.rb +320 -0
  99. data/lib/decision_agent/simulation/scenario_library.rb +165 -0
  100. data/lib/decision_agent/simulation/shadow_test_engine.rb +274 -0
  101. data/lib/decision_agent/simulation/what_if_analyzer.rb +1008 -0
  102. data/lib/decision_agent/simulation.rb +19 -0
  103. data/lib/decision_agent/testing/batch_test_importer.rb +6 -2
  104. data/lib/decision_agent/testing/batch_test_runner.rb +5 -2
  105. data/lib/decision_agent/testing/test_coverage_analyzer.rb +2 -0
  106. data/lib/decision_agent/testing/test_result_comparator.rb +2 -0
  107. data/lib/decision_agent/testing/test_scenario.rb +2 -0
  108. data/lib/decision_agent/version.rb +3 -1
  109. data/lib/decision_agent/versioning/activerecord_adapter.rb +108 -43
  110. data/lib/decision_agent/versioning/adapter.rb +9 -0
  111. data/lib/decision_agent/versioning/file_storage_adapter.rb +19 -6
  112. data/lib/decision_agent/versioning/version_manager.rb +9 -0
  113. data/lib/decision_agent/web/dmn_editor/serialization.rb +74 -0
  114. data/lib/decision_agent/web/dmn_editor/xml_builder.rb +107 -0
  115. data/lib/decision_agent/web/dmn_editor.rb +8 -67
  116. data/lib/decision_agent/web/middleware/auth_middleware.rb +2 -0
  117. data/lib/decision_agent/web/middleware/permission_middleware.rb +3 -1
  118. data/lib/decision_agent/web/public/app.js +186 -26
  119. data/lib/decision_agent/web/public/batch_testing.html +80 -6
  120. data/lib/decision_agent/web/public/dmn-editor.html +2 -2
  121. data/lib/decision_agent/web/public/dmn-editor.js +74 -8
  122. data/lib/decision_agent/web/public/index.html +69 -3
  123. data/lib/decision_agent/web/public/login.html +1 -1
  124. data/lib/decision_agent/web/public/sample_batch.csv +11 -0
  125. data/lib/decision_agent/web/public/sample_impact.csv +11 -0
  126. data/lib/decision_agent/web/public/sample_replay.csv +11 -0
  127. data/lib/decision_agent/web/public/sample_rules.json +118 -0
  128. data/lib/decision_agent/web/public/sample_shadow.csv +11 -0
  129. data/lib/decision_agent/web/public/sample_whatif.csv +11 -0
  130. data/lib/decision_agent/web/public/simulation.html +146 -0
  131. data/lib/decision_agent/web/public/simulation_impact.html +495 -0
  132. data/lib/decision_agent/web/public/simulation_replay.html +547 -0
  133. data/lib/decision_agent/web/public/simulation_shadow.html +561 -0
  134. data/lib/decision_agent/web/public/simulation_whatif.html +549 -0
  135. data/lib/decision_agent/web/public/styles.css +65 -0
  136. data/lib/decision_agent/web/public/users.html +1 -1
  137. data/lib/decision_agent/web/rack_helpers.rb +106 -0
  138. data/lib/decision_agent/web/rack_request_helpers.rb +196 -0
  139. data/lib/decision_agent/web/server.rb +2126 -1374
  140. data/lib/decision_agent.rb +19 -1
  141. data/lib/generators/decision_agent/install/install_generator.rb +2 -0
  142. data/lib/generators/decision_agent/install/templates/ab_test_assignment_model.rb +2 -0
  143. data/lib/generators/decision_agent/install/templates/ab_test_model.rb +2 -0
  144. data/lib/generators/decision_agent/install/templates/ab_testing_migration.rb +2 -0
  145. data/lib/generators/decision_agent/install/templates/migration.rb +2 -0
  146. data/lib/generators/decision_agent/install/templates/rule.rb +2 -0
  147. data/lib/generators/decision_agent/install/templates/rule_version.rb +2 -0
  148. metadata +103 -89
  149. data/spec/ab_testing/ab_test_assignment_spec.rb +0 -253
  150. data/spec/ab_testing/ab_test_manager_spec.rb +0 -612
  151. data/spec/ab_testing/ab_test_spec.rb +0 -270
  152. data/spec/ab_testing/ab_testing_agent_spec.rb +0 -655
  153. data/spec/ab_testing/storage/adapter_spec.rb +0 -64
  154. data/spec/ab_testing/storage/memory_adapter_spec.rb +0 -485
  155. data/spec/activerecord_thread_safety_spec.rb +0 -553
  156. data/spec/advanced_operators_spec.rb +0 -3150
  157. data/spec/agent_spec.rb +0 -289
  158. data/spec/api_contract_spec.rb +0 -430
  159. data/spec/audit_adapters_spec.rb +0 -92
  160. data/spec/auth/access_audit_logger_spec.rb +0 -394
  161. data/spec/auth/authenticator_spec.rb +0 -112
  162. data/spec/auth/password_reset_spec.rb +0 -294
  163. data/spec/auth/permission_checker_spec.rb +0 -207
  164. data/spec/auth/permission_spec.rb +0 -73
  165. data/spec/auth/rbac_adapter_spec.rb +0 -778
  166. data/spec/auth/rbac_config_spec.rb +0 -82
  167. data/spec/auth/role_spec.rb +0 -51
  168. data/spec/auth/session_manager_spec.rb +0 -172
  169. data/spec/auth/session_spec.rb +0 -112
  170. data/spec/auth/user_spec.rb +0 -130
  171. data/spec/comprehensive_edge_cases_spec.rb +0 -1777
  172. data/spec/context_spec.rb +0 -127
  173. data/spec/decision_agent_spec.rb +0 -96
  174. data/spec/decision_spec.rb +0 -423
  175. data/spec/dmn/decision_graph_spec.rb +0 -282
  176. data/spec/dmn/decision_tree_spec.rb +0 -203
  177. data/spec/dmn/feel/errors_spec.rb +0 -18
  178. data/spec/dmn/feel/functions_spec.rb +0 -400
  179. data/spec/dmn/feel/simple_parser_spec.rb +0 -274
  180. data/spec/dmn/feel/types_spec.rb +0 -176
  181. data/spec/dmn/feel_parser_spec.rb +0 -489
  182. data/spec/dmn/hit_policy_spec.rb +0 -202
  183. data/spec/dmn/integration_spec.rb +0 -226
  184. data/spec/dsl/condition_evaluator_spec.rb +0 -774
  185. data/spec/dsl_validation_spec.rb +0 -648
  186. data/spec/edge_cases_spec.rb +0 -353
  187. data/spec/evaluation_spec.rb +0 -364
  188. data/spec/evaluation_validator_spec.rb +0 -165
  189. data/spec/examples/feedback_aware_evaluator_spec.rb +0 -460
  190. data/spec/examples.txt +0 -1909
  191. data/spec/fixtures/dmn/complex_decision.dmn +0 -81
  192. data/spec/fixtures/dmn/invalid_structure.dmn +0 -31
  193. data/spec/fixtures/dmn/simple_decision.dmn +0 -40
  194. data/spec/issue_verification_spec.rb +0 -759
  195. data/spec/json_rule_evaluator_spec.rb +0 -587
  196. data/spec/monitoring/alert_manager_spec.rb +0 -378
  197. data/spec/monitoring/metrics_collector_spec.rb +0 -501
  198. data/spec/monitoring/monitored_agent_spec.rb +0 -225
  199. data/spec/monitoring/prometheus_exporter_spec.rb +0 -242
  200. data/spec/monitoring/storage/activerecord_adapter_spec.rb +0 -498
  201. data/spec/monitoring/storage/base_adapter_spec.rb +0 -61
  202. data/spec/monitoring/storage/memory_adapter_spec.rb +0 -247
  203. data/spec/performance_optimizations_spec.rb +0 -493
  204. data/spec/replay_edge_cases_spec.rb +0 -699
  205. data/spec/replay_spec.rb +0 -210
  206. data/spec/rfc8785_canonicalization_spec.rb +0 -215
  207. data/spec/scoring_spec.rb +0 -225
  208. data/spec/spec_helper.rb +0 -60
  209. data/spec/testing/batch_test_importer_spec.rb +0 -693
  210. data/spec/testing/batch_test_runner_spec.rb +0 -307
  211. data/spec/testing/test_coverage_analyzer_spec.rb +0 -292
  212. data/spec/testing/test_result_comparator_spec.rb +0 -392
  213. data/spec/testing/test_scenario_spec.rb +0 -113
  214. data/spec/thread_safety_spec.rb +0 -490
  215. data/spec/thread_safety_spec.rb.broken +0 -878
  216. data/spec/versioning/adapter_spec.rb +0 -156
  217. data/spec/versioning_spec.rb +0 -1030
  218. data/spec/web/middleware/auth_middleware_spec.rb +0 -133
  219. data/spec/web/middleware/permission_middleware_spec.rb +0 -247
  220. data/spec/web_ui_rack_spec.rb +0 -2134
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "monitor"
2
4
 
3
5
  module DecisionAgent
@@ -6,7 +8,7 @@ module DecisionAgent
6
8
  class PrometheusExporter
7
9
  include MonitorMixin
8
10
 
9
- CONTENT_TYPE = "text/plain; version=0.0.4".freeze
11
+ CONTENT_TYPE = "text/plain; version=0.0.4"
10
12
 
11
13
  def initialize(metrics_collector:, namespace: "decision_agent")
12
14
  super()
@@ -1,9 +1,12 @@
1
- require "digest"
1
+ # frozen_string_literal: true
2
+
2
3
  require "json"
3
4
 
4
5
  module DecisionAgent
5
6
  module Replay
6
7
  def self.run(audit_payload, strict: true)
8
+ raise ArgumentError, "audit_payload cannot be nil" if audit_payload.nil?
9
+
7
10
  validate_payload!(audit_payload)
8
11
 
9
12
  context = Context.new(audit_payload[:context] || audit_payload["context"])
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module DecisionAgent
2
4
  module Scoring
3
5
  class Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module DecisionAgent
2
4
  module Scoring
3
5
  class Consensus < Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module DecisionAgent
2
4
  module Scoring
3
5
  class MaxWeight < Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module DecisionAgent
2
4
  module Scoring
3
5
  class Threshold < Base
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module DecisionAgent
2
4
  module Scoring
3
5
  class WeightedAverage < Base
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DecisionAgent
4
+ module Simulation
5
+ # Base error class for simulation module
6
+ class SimulationError < StandardError; end
7
+
8
+ # Error raised when scenario execution fails
9
+ class ScenarioExecutionError < SimulationError; end
10
+
11
+ # Error raised when historical data is invalid
12
+ class InvalidHistoricalDataError < SimulationError; end
13
+
14
+ # Error raised when version comparison fails
15
+ class VersionComparisonError < SimulationError; end
16
+
17
+ # Error raised when shadow test configuration is invalid
18
+ class InvalidShadowTestError < SimulationError; end
19
+ end
20
+ end
@@ -0,0 +1,500 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "errors"
4
+
5
+ module DecisionAgent
6
+ module Simulation
7
+ # Analyzer for quantifying rule change impact
8
+ # rubocop:disable Metrics/ClassLength
9
+ class ImpactAnalyzer
10
+ attr_reader :version_manager
11
+
12
+ def initialize(version_manager: nil)
13
+ @version_manager = version_manager || Versioning::VersionManager.new
14
+ end
15
+
16
+ # Analyze impact of a proposed rule change
17
+ # @param baseline_version [String, Integer, Hash] Baseline rule version
18
+ # @param proposed_version [String, Integer, Hash] Proposed rule version
19
+ # @param test_data [Array<Hash>] Test contexts to evaluate
20
+ # @param options [Hash] Analysis options
21
+ # - :parallel [Boolean] Use parallel execution (default: true)
22
+ # - :thread_count [Integer] Number of threads (default: 4)
23
+ # - :calculate_risk [Boolean] Calculate risk score (default: true)
24
+ # @return [Hash] Impact analysis report
25
+ def analyze(baseline_version:, proposed_version:, test_data:, options: {})
26
+ options = {
27
+ parallel: true,
28
+ thread_count: 4,
29
+ calculate_risk: true
30
+ }.merge(options)
31
+
32
+ baseline_agent = build_agent_from_version(baseline_version)
33
+ proposed_agent = build_agent_from_version(proposed_version)
34
+
35
+ # Execute both versions on test data
36
+ results = execute_comparison(test_data, baseline_agent, proposed_agent, options)
37
+
38
+ # Build impact report
39
+ build_impact_report(results, options)
40
+ end
41
+
42
+ # Calculate risk score for a rule change
43
+ # @param results [Array<Hash>] Comparison results
44
+ # @return [Float] Risk score between 0.0 (low risk) and 1.0 (high risk)
45
+ def calculate_risk_score(results)
46
+ return 0.0 if results.empty?
47
+
48
+ total = results.size
49
+ decision_changes = results.count { |r| r[:decision_changed] }
50
+ large_confidence_shifts = results.count { |r| (r[:confidence_delta] || 0).abs > 0.2 }
51
+ rejections_increased = count_rejection_increases(results)
52
+
53
+ # Risk factors
54
+ change_rate = decision_changes.to_f / total
55
+ confidence_volatility = large_confidence_shifts.to_f / total
56
+ rejection_risk = rejections_increased.to_f / total
57
+
58
+ # Weighted risk score
59
+ risk_score = (
60
+ (change_rate * 0.4) +
61
+ (confidence_volatility * 0.3) +
62
+ (rejection_risk * 0.3)
63
+ )
64
+
65
+ [risk_score, 1.0].min # Cap at 1.0
66
+ end
67
+
68
+ private
69
+
70
+ def build_agent_from_version(version)
71
+ version_hash = resolve_version(version)
72
+ evaluators = build_evaluators_from_version(version_hash)
73
+ Agent.new(
74
+ evaluators: evaluators,
75
+ scoring_strategy: Scoring::WeightedAverage.new,
76
+ audit_adapter: Audit::NullAdapter.new
77
+ )
78
+ end
79
+
80
+ def resolve_version(version)
81
+ case version
82
+ when String, Integer
83
+ version_data = @version_manager.get_version(version_id: version)
84
+ raise VersionComparisonError, "Version not found: #{version}" unless version_data
85
+
86
+ version_data
87
+ when Hash
88
+ version
89
+ else
90
+ raise VersionComparisonError, "Invalid version format: #{version.class}"
91
+ end
92
+ end
93
+
94
+ def build_evaluators_from_version(version)
95
+ content = version[:content] || version["content"]
96
+ raise VersionComparisonError, "Version has no content" unless content
97
+
98
+ if content.is_a?(Hash) && content[:evaluators]
99
+ build_evaluators_from_config(content[:evaluators])
100
+ elsif content.is_a?(Hash) && (content[:rules] || content["rules"])
101
+ [Evaluators::JsonRuleEvaluator.new(rules_json: content)]
102
+ else
103
+ raise VersionComparisonError, "Cannot build evaluators from version content"
104
+ end
105
+ end
106
+
107
+ def build_evaluators_from_config(configs)
108
+ Array(configs).map do |config|
109
+ case config[:type] || config["type"]
110
+ when "json_rule"
111
+ Evaluators::JsonRuleEvaluator.new(rules_json: config[:rules] || config["rules"])
112
+ when "dmn"
113
+ model = config[:model] || config["model"]
114
+ decision_id = config[:decision_id] || config["decision_id"]
115
+ Evaluators::DmnEvaluator.new(model: model, decision_id: decision_id)
116
+ else
117
+ raise VersionComparisonError, "Unknown evaluator type: #{config[:type]}"
118
+ end
119
+ end
120
+ end
121
+
122
+ def execute_comparison(test_data, baseline_agent, proposed_agent, options)
123
+ results = []
124
+ mutex = Mutex.new
125
+
126
+ if options[:parallel] && test_data.size > 1
127
+ execute_parallel(test_data, baseline_agent, proposed_agent, options, mutex) do |result|
128
+ mutex.synchronize { results << result }
129
+ end
130
+ else
131
+ test_data.each do |context|
132
+ result = execute_single_comparison(context, baseline_agent, proposed_agent)
133
+ results << result
134
+ end
135
+ end
136
+
137
+ results
138
+ end
139
+
140
+ def execute_parallel(test_data, baseline_agent, proposed_agent, options, _mutex)
141
+ thread_count = [options[:thread_count], test_data.size].min
142
+ queue = Queue.new
143
+ test_data.each { |c| queue << c }
144
+
145
+ threads = Array.new(thread_count) do
146
+ Thread.new do
147
+ loop do
148
+ context = begin
149
+ queue.pop(true)
150
+ rescue ThreadError
151
+ nil
152
+ end
153
+ break unless context
154
+
155
+ result = execute_single_comparison(context, baseline_agent, proposed_agent)
156
+ yield result
157
+ end
158
+ end
159
+ end
160
+
161
+ threads.each(&:join)
162
+ end
163
+
164
+ def execute_single_comparison(context, baseline_agent, proposed_agent)
165
+ ctx = context.is_a?(Context) ? context : Context.new(context)
166
+
167
+ baseline_metrics = measure_decision_metrics(ctx, baseline_agent, :baseline)
168
+ proposed_metrics = measure_decision_metrics(ctx, proposed_agent, :proposed)
169
+ delta_metrics = calculate_decision_delta(baseline_metrics, proposed_metrics)
170
+
171
+ build_comparison_result(ctx, baseline_metrics, proposed_metrics, delta_metrics)
172
+ end
173
+
174
+ def measure_decision_metrics(context, agent, _label)
175
+ start_time = Time.now
176
+ begin
177
+ decision = agent.decide(context: context)
178
+ duration_ms = (Time.now - start_time) * 1000
179
+ evaluations_count = decision.evaluations&.size || 0
180
+ { decision: decision, duration_ms: duration_ms, evaluations_count: evaluations_count }
181
+ rescue NoEvaluationsError
182
+ duration_ms = (Time.now - start_time) * 1000
183
+ { decision: nil, duration_ms: duration_ms, evaluations_count: 0 }
184
+ end
185
+ end
186
+
187
+ def calculate_decision_delta(baseline_metrics, proposed_metrics)
188
+ baseline_decision = baseline_metrics[:decision]
189
+ proposed_decision = proposed_metrics[:decision]
190
+
191
+ decision_changed, confidence_delta = if baseline_decision.nil? && proposed_decision.nil?
192
+ [false, 0]
193
+ elsif baseline_decision.nil?
194
+ [true, proposed_decision.confidence]
195
+ elsif proposed_decision.nil?
196
+ [true, -baseline_decision.confidence]
197
+ else
198
+ [
199
+ baseline_decision.decision != proposed_decision.decision,
200
+ proposed_decision.confidence - baseline_decision.confidence
201
+ ]
202
+ end
203
+
204
+ baseline_duration = baseline_metrics[:duration_ms]
205
+ proposed_duration = proposed_metrics[:duration_ms]
206
+ performance_delta_ms = proposed_duration - baseline_duration
207
+ performance_delta_percent = baseline_duration.positive? ? (performance_delta_ms / baseline_duration * 100) : 0
208
+
209
+ {
210
+ decision_changed: decision_changed,
211
+ confidence_delta: confidence_delta,
212
+ performance_delta_ms: performance_delta_ms,
213
+ performance_delta_percent: performance_delta_percent
214
+ }
215
+ end
216
+
217
+ def build_comparison_result(context, baseline_metrics, proposed_metrics, delta_metrics)
218
+ baseline_decision = baseline_metrics[:decision]
219
+ proposed_decision = proposed_metrics[:decision]
220
+
221
+ {
222
+ context: context.to_h,
223
+ baseline_decision: baseline_decision&.decision,
224
+ baseline_confidence: baseline_decision&.confidence || 0,
225
+ baseline_duration_ms: baseline_metrics[:duration_ms],
226
+ baseline_evaluations_count: baseline_metrics[:evaluations_count],
227
+ proposed_decision: proposed_decision&.decision,
228
+ proposed_confidence: proposed_decision&.confidence || 0,
229
+ proposed_duration_ms: proposed_metrics[:duration_ms],
230
+ proposed_evaluations_count: proposed_metrics[:evaluations_count],
231
+ decision_changed: delta_metrics[:decision_changed],
232
+ confidence_delta: delta_metrics[:confidence_delta],
233
+ confidence_shift_magnitude: delta_metrics[:confidence_delta].abs,
234
+ performance_delta_ms: delta_metrics[:performance_delta_ms],
235
+ performance_delta_percent: delta_metrics[:performance_delta_percent]
236
+ }
237
+ end
238
+
239
+ def build_impact_report(results, options)
240
+ report = build_base_report(results)
241
+ report[:confidence_impact] = build_confidence_impact(results)
242
+ report[:rule_execution_frequency] = build_rule_frequency(results)
243
+ report[:performance_impact] = calculate_performance_impact(results)
244
+ add_risk_analysis(report, results, options)
245
+ report
246
+ end
247
+
248
+ def build_base_report(results)
249
+ total = results.size
250
+ decision_changes = results.count { |r| r[:decision_changed] }
251
+ baseline_distribution = results.group_by { |r| r[:baseline_decision] }.transform_values(&:count)
252
+ proposed_distribution = results.group_by { |r| r[:proposed_decision] }.transform_values(&:count)
253
+
254
+ {
255
+ total_contexts: total,
256
+ decision_changes: decision_changes,
257
+ change_rate: total.positive? ? (decision_changes.to_f / total) : 0,
258
+ decision_distribution: {
259
+ baseline: baseline_distribution,
260
+ proposed: proposed_distribution
261
+ },
262
+ results: results
263
+ }
264
+ end
265
+
266
+ def build_confidence_impact(results)
267
+ confidence_deltas = results.map { |r| r[:confidence_delta] }.compact
268
+ avg_confidence_delta = confidence_deltas.any? ? confidence_deltas.sum / confidence_deltas.size : 0
269
+ max_confidence_shift = confidence_deltas.map(&:abs).max || 0
270
+
271
+ {
272
+ average_delta: avg_confidence_delta,
273
+ max_shift: max_confidence_shift,
274
+ positive_shifts: confidence_deltas.count(&:positive?),
275
+ negative_shifts: confidence_deltas.count(&:negative?)
276
+ }
277
+ end
278
+
279
+ def build_rule_frequency(results)
280
+ {
281
+ baseline: calculate_rule_frequency(results, :baseline_decision),
282
+ proposed: calculate_rule_frequency(results, :proposed_decision)
283
+ }
284
+ end
285
+
286
+ def add_risk_analysis(report, results, options)
287
+ return unless options[:calculate_risk]
288
+
289
+ report[:risk_score] = calculate_risk_score(results)
290
+ report[:risk_level] = categorize_risk(report[:risk_score])
291
+ end
292
+
293
+ def calculate_rule_frequency(results, decision_key)
294
+ # Approximate rule frequency from decision distribution
295
+ # In a real implementation, this would track which rules fired
296
+ results.group_by { |r| r[decision_key] }.transform_values { |v| v.size.to_f / results.size }
297
+ end
298
+
299
+ def count_rejection_increases(results)
300
+ results.count do |r|
301
+ baseline = r[:baseline_decision].to_s.downcase
302
+ proposed = r[:proposed_decision].to_s.downcase
303
+ (baseline.include?("approve") || baseline.include?("accept")) &&
304
+ (proposed.include?("reject") || proposed.include?("deny"))
305
+ end
306
+ end
307
+
308
+ def categorize_risk(risk_score)
309
+ case risk_score
310
+ when 0.0...0.2
311
+ "low"
312
+ when 0.2...0.5
313
+ "medium"
314
+ when 0.5...0.8
315
+ "high"
316
+ else
317
+ "critical"
318
+ end
319
+ end
320
+
321
+ # Calculate performance impact metrics
322
+ # @param results [Array<Hash>] Comparison results with performance data
323
+ # @return [Hash] Performance impact metrics
324
+ def calculate_performance_impact(results)
325
+ return {} if results.empty?
326
+
327
+ metrics = extract_performance_metrics(results)
328
+ latency_stats = calculate_latency_statistics(metrics)
329
+ throughput_stats = calculate_throughput_statistics(latency_stats)
330
+ complexity_stats = calculate_complexity_statistics(metrics)
331
+ performance_deltas = calculate_performance_deltas(metrics, latency_stats, throughput_stats)
332
+
333
+ build_performance_impact_hash(latency_stats, throughput_stats, complexity_stats, performance_deltas)
334
+ end
335
+
336
+ def extract_performance_metrics(results)
337
+ {
338
+ baseline_durations: results.map { |r| r[:baseline_duration_ms] }.compact,
339
+ proposed_durations: results.map { |r| r[:proposed_duration_ms] }.compact,
340
+ performance_deltas: results.map { |r| r[:performance_delta_ms] }.compact,
341
+ performance_delta_percents: results.map { |r| r[:performance_delta_percent] }.compact,
342
+ baseline_evaluations: results.map { |r| r[:baseline_evaluations_count] }.compact,
343
+ proposed_evaluations: results.map { |r| r[:proposed_evaluations_count] }.compact
344
+ }
345
+ end
346
+
347
+ def calculate_latency_statistics(metrics)
348
+ baseline_durations = metrics[:baseline_durations]
349
+ proposed_durations = metrics[:proposed_durations]
350
+
351
+ {
352
+ baseline_avg: calculate_average(baseline_durations),
353
+ baseline_min: baseline_durations.min || 0,
354
+ baseline_max: baseline_durations.max || 0,
355
+ proposed_avg: calculate_average(proposed_durations),
356
+ proposed_min: proposed_durations.min || 0,
357
+ proposed_max: proposed_durations.max || 0
358
+ }
359
+ end
360
+
361
+ def calculate_throughput_statistics(latency_stats)
362
+ baseline_throughput = latency_stats[:baseline_avg].positive? ? (1000.0 / latency_stats[:baseline_avg]) : 0
363
+ proposed_throughput = latency_stats[:proposed_avg].positive? ? (1000.0 / latency_stats[:proposed_avg]) : 0
364
+
365
+ {
366
+ baseline: baseline_throughput,
367
+ proposed: proposed_throughput
368
+ }
369
+ end
370
+
371
+ def calculate_complexity_statistics(metrics)
372
+ baseline_avg = calculate_average(metrics[:baseline_evaluations], as_float: true)
373
+ proposed_avg = calculate_average(metrics[:proposed_evaluations], as_float: true)
374
+
375
+ {
376
+ baseline_avg: baseline_avg,
377
+ proposed_avg: proposed_avg,
378
+ delta: proposed_avg - baseline_avg
379
+ }
380
+ end
381
+
382
+ def calculate_performance_deltas(metrics, _latency_stats, throughput_stats)
383
+ avg_delta_ms = calculate_average(metrics[:performance_deltas])
384
+ avg_delta_percent = calculate_average(metrics[:performance_delta_percents])
385
+ baseline_throughput = throughput_stats[:baseline]
386
+ proposed_throughput = throughput_stats[:proposed]
387
+ throughput_delta_percent = baseline_throughput.positive? ? ((proposed_throughput - baseline_throughput) / baseline_throughput * 100) : 0
388
+
389
+ {
390
+ avg_delta_ms: avg_delta_ms,
391
+ avg_delta_percent: avg_delta_percent,
392
+ throughput_delta_percent: throughput_delta_percent
393
+ }
394
+ end
395
+
396
+ def calculate_average(values, as_float: false)
397
+ return 0 if values.empty?
398
+
399
+ sum = as_float ? values.sum.to_f : values.sum
400
+ sum / values.size
401
+ end
402
+
403
+ def build_performance_impact_hash(latency_stats, throughput_stats, complexity_stats, performance_deltas)
404
+ {
405
+ latency: build_latency_impact(latency_stats, performance_deltas),
406
+ throughput: build_throughput_impact(throughput_stats, performance_deltas),
407
+ rule_complexity: build_complexity_impact(complexity_stats),
408
+ impact_level: categorize_performance_impact(performance_deltas[:avg_delta_percent]),
409
+ summary: build_performance_summary(
410
+ performance_deltas[:avg_delta_percent],
411
+ performance_deltas[:throughput_delta_percent],
412
+ complexity_stats[:delta]
413
+ )
414
+ }
415
+ end
416
+
417
+ def build_latency_impact(latency_stats, performance_deltas)
418
+ {
419
+ baseline: {
420
+ average_ms: latency_stats[:baseline_avg].round(4),
421
+ min_ms: latency_stats[:baseline_min].round(4),
422
+ max_ms: latency_stats[:baseline_max].round(4)
423
+ },
424
+ proposed: {
425
+ average_ms: latency_stats[:proposed_avg].round(4),
426
+ min_ms: latency_stats[:proposed_min].round(4),
427
+ max_ms: latency_stats[:proposed_max].round(4)
428
+ },
429
+ delta_ms: performance_deltas[:avg_delta_ms].round(4),
430
+ delta_percent: performance_deltas[:avg_delta_percent].round(2)
431
+ }
432
+ end
433
+
434
+ def build_throughput_impact(throughput_stats, performance_deltas)
435
+ {
436
+ baseline_decisions_per_second: throughput_stats[:baseline].round(2),
437
+ proposed_decisions_per_second: throughput_stats[:proposed].round(2),
438
+ delta_percent: performance_deltas[:throughput_delta_percent].round(2)
439
+ }
440
+ end
441
+
442
+ def build_complexity_impact(complexity_stats)
443
+ {
444
+ baseline_avg_evaluations: complexity_stats[:baseline_avg].round(2),
445
+ proposed_avg_evaluations: complexity_stats[:proposed_avg].round(2),
446
+ evaluations_delta: complexity_stats[:delta].round(2)
447
+ }
448
+ end
449
+
450
+ # Categorize performance impact level
451
+ # @param delta_percent [Float] Performance delta percentage
452
+ # @return [String] Impact level: "improvement", "neutral", "minor_degradation", "moderate_degradation", "significant_degradation"
453
+ def categorize_performance_impact(delta_percent)
454
+ case delta_percent
455
+ when -Float::INFINITY...-5.0
456
+ "improvement"
457
+ when -5.0...5.0
458
+ "neutral"
459
+ when 5.0...15.0
460
+ "minor_degradation"
461
+ when 15.0...30.0
462
+ "moderate_degradation"
463
+ else
464
+ "significant_degradation"
465
+ end
466
+ end
467
+
468
+ # Build human-readable performance summary
469
+ # @param latency_delta_percent [Float] Latency delta percentage
470
+ # @param throughput_delta_percent [Float] Throughput delta percentage
471
+ # @param evaluations_delta [Float] Evaluations delta
472
+ # @return [String] Summary text
473
+ def build_performance_summary(latency_delta_percent, throughput_delta_percent, evaluations_delta)
474
+ parts = []
475
+
476
+ if latency_delta_percent.abs > 5.0
477
+ direction = latency_delta_percent.positive? ? "slower" : "faster"
478
+ parts << "Average latency is #{latency_delta_percent.abs.round(2)}% #{direction}"
479
+ end
480
+
481
+ if throughput_delta_percent.abs > 5.0
482
+ direction = throughput_delta_percent.positive? ? "higher" : "lower"
483
+ parts << "Throughput is #{throughput_delta_percent.abs.round(2)}% #{direction}"
484
+ end
485
+
486
+ if evaluations_delta.abs > 0.5
487
+ direction = evaluations_delta.positive? ? "more" : "fewer"
488
+ parts << "Average #{direction} #{evaluations_delta.abs.round(2)} rule evaluations per decision"
489
+ end
490
+
491
+ if parts.empty?
492
+ "Performance impact is minimal (<5% change)"
493
+ else
494
+ "#{parts.join('. ')}."
495
+ end
496
+ end
497
+ # rubocop:enable Metrics/ClassLength
498
+ end
499
+ end
500
+ end