decision_agent 0.3.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +272 -7
  3. data/lib/decision_agent/agent.rb +72 -1
  4. data/lib/decision_agent/context.rb +1 -0
  5. data/lib/decision_agent/data_enrichment/cache/memory_adapter.rb +86 -0
  6. data/lib/decision_agent/data_enrichment/cache_adapter.rb +49 -0
  7. data/lib/decision_agent/data_enrichment/circuit_breaker.rb +135 -0
  8. data/lib/decision_agent/data_enrichment/client.rb +220 -0
  9. data/lib/decision_agent/data_enrichment/config.rb +78 -0
  10. data/lib/decision_agent/data_enrichment/errors.rb +36 -0
  11. data/lib/decision_agent/decision.rb +102 -2
  12. data/lib/decision_agent/dmn/feel/evaluator.rb +28 -6
  13. data/lib/decision_agent/dsl/condition_evaluator.rb +982 -839
  14. data/lib/decision_agent/dsl/schema_validator.rb +51 -13
  15. data/lib/decision_agent/evaluators/dmn_evaluator.rb +106 -19
  16. data/lib/decision_agent/evaluators/json_rule_evaluator.rb +69 -9
  17. data/lib/decision_agent/explainability/condition_trace.rb +83 -0
  18. data/lib/decision_agent/explainability/explainability_result.rb +52 -0
  19. data/lib/decision_agent/explainability/rule_trace.rb +39 -0
  20. data/lib/decision_agent/explainability/trace_collector.rb +24 -0
  21. data/lib/decision_agent/monitoring/alert_manager.rb +5 -1
  22. data/lib/decision_agent/simulation/errors.rb +18 -0
  23. data/lib/decision_agent/simulation/impact_analyzer.rb +498 -0
  24. data/lib/decision_agent/simulation/monte_carlo_simulator.rb +635 -0
  25. data/lib/decision_agent/simulation/replay_engine.rb +486 -0
  26. data/lib/decision_agent/simulation/scenario_engine.rb +318 -0
  27. data/lib/decision_agent/simulation/scenario_library.rb +163 -0
  28. data/lib/decision_agent/simulation/shadow_test_engine.rb +287 -0
  29. data/lib/decision_agent/simulation/what_if_analyzer.rb +1002 -0
  30. data/lib/decision_agent/simulation.rb +17 -0
  31. data/lib/decision_agent/version.rb +1 -1
  32. data/lib/decision_agent/versioning/activerecord_adapter.rb +23 -8
  33. data/lib/decision_agent/web/public/app.js +119 -0
  34. data/lib/decision_agent/web/public/index.html +49 -0
  35. data/lib/decision_agent/web/public/simulation.html +130 -0
  36. data/lib/decision_agent/web/public/simulation_impact.html +478 -0
  37. data/lib/decision_agent/web/public/simulation_replay.html +551 -0
  38. data/lib/decision_agent/web/public/simulation_shadow.html +546 -0
  39. data/lib/decision_agent/web/public/simulation_whatif.html +532 -0
  40. data/lib/decision_agent/web/public/styles.css +65 -0
  41. data/lib/decision_agent/web/server.rb +594 -23
  42. data/lib/decision_agent.rb +60 -2
  43. metadata +53 -73
  44. data/spec/ab_testing/ab_test_assignment_spec.rb +0 -253
  45. data/spec/ab_testing/ab_test_manager_spec.rb +0 -612
  46. data/spec/ab_testing/ab_test_spec.rb +0 -270
  47. data/spec/ab_testing/ab_testing_agent_spec.rb +0 -655
  48. data/spec/ab_testing/storage/adapter_spec.rb +0 -64
  49. data/spec/ab_testing/storage/memory_adapter_spec.rb +0 -485
  50. data/spec/activerecord_thread_safety_spec.rb +0 -553
  51. data/spec/advanced_operators_spec.rb +0 -3150
  52. data/spec/agent_spec.rb +0 -289
  53. data/spec/api_contract_spec.rb +0 -430
  54. data/spec/audit_adapters_spec.rb +0 -92
  55. data/spec/auth/access_audit_logger_spec.rb +0 -394
  56. data/spec/auth/authenticator_spec.rb +0 -112
  57. data/spec/auth/password_reset_spec.rb +0 -294
  58. data/spec/auth/permission_checker_spec.rb +0 -207
  59. data/spec/auth/permission_spec.rb +0 -73
  60. data/spec/auth/rbac_adapter_spec.rb +0 -778
  61. data/spec/auth/rbac_config_spec.rb +0 -82
  62. data/spec/auth/role_spec.rb +0 -51
  63. data/spec/auth/session_manager_spec.rb +0 -172
  64. data/spec/auth/session_spec.rb +0 -112
  65. data/spec/auth/user_spec.rb +0 -130
  66. data/spec/comprehensive_edge_cases_spec.rb +0 -1777
  67. data/spec/context_spec.rb +0 -127
  68. data/spec/decision_agent_spec.rb +0 -96
  69. data/spec/decision_spec.rb +0 -423
  70. data/spec/dmn/decision_graph_spec.rb +0 -282
  71. data/spec/dmn/decision_tree_spec.rb +0 -203
  72. data/spec/dmn/feel/errors_spec.rb +0 -18
  73. data/spec/dmn/feel/functions_spec.rb +0 -400
  74. data/spec/dmn/feel/simple_parser_spec.rb +0 -274
  75. data/spec/dmn/feel/types_spec.rb +0 -176
  76. data/spec/dmn/feel_parser_spec.rb +0 -489
  77. data/spec/dmn/hit_policy_spec.rb +0 -202
  78. data/spec/dmn/integration_spec.rb +0 -226
  79. data/spec/dsl/condition_evaluator_spec.rb +0 -774
  80. data/spec/dsl_validation_spec.rb +0 -648
  81. data/spec/edge_cases_spec.rb +0 -353
  82. data/spec/evaluation_spec.rb +0 -364
  83. data/spec/evaluation_validator_spec.rb +0 -165
  84. data/spec/examples/feedback_aware_evaluator_spec.rb +0 -460
  85. data/spec/examples.txt +0 -1909
  86. data/spec/fixtures/dmn/complex_decision.dmn +0 -81
  87. data/spec/fixtures/dmn/invalid_structure.dmn +0 -31
  88. data/spec/fixtures/dmn/simple_decision.dmn +0 -40
  89. data/spec/issue_verification_spec.rb +0 -759
  90. data/spec/json_rule_evaluator_spec.rb +0 -587
  91. data/spec/monitoring/alert_manager_spec.rb +0 -378
  92. data/spec/monitoring/metrics_collector_spec.rb +0 -501
  93. data/spec/monitoring/monitored_agent_spec.rb +0 -225
  94. data/spec/monitoring/prometheus_exporter_spec.rb +0 -242
  95. data/spec/monitoring/storage/activerecord_adapter_spec.rb +0 -498
  96. data/spec/monitoring/storage/base_adapter_spec.rb +0 -61
  97. data/spec/monitoring/storage/memory_adapter_spec.rb +0 -247
  98. data/spec/performance_optimizations_spec.rb +0 -493
  99. data/spec/replay_edge_cases_spec.rb +0 -699
  100. data/spec/replay_spec.rb +0 -210
  101. data/spec/rfc8785_canonicalization_spec.rb +0 -215
  102. data/spec/scoring_spec.rb +0 -225
  103. data/spec/spec_helper.rb +0 -60
  104. data/spec/testing/batch_test_importer_spec.rb +0 -693
  105. data/spec/testing/batch_test_runner_spec.rb +0 -307
  106. data/spec/testing/test_coverage_analyzer_spec.rb +0 -292
  107. data/spec/testing/test_result_comparator_spec.rb +0 -392
  108. data/spec/testing/test_scenario_spec.rb +0 -113
  109. data/spec/thread_safety_spec.rb +0 -490
  110. data/spec/thread_safety_spec.rb.broken +0 -878
  111. data/spec/versioning/adapter_spec.rb +0 -156
  112. data/spec/versioning_spec.rb +0 -1030
  113. data/spec/web/middleware/auth_middleware_spec.rb +0 -133
  114. data/spec/web/middleware/permission_middleware_spec.rb +0 -247
  115. data/spec/web_ui_rack_spec.rb +0 -2134
@@ -0,0 +1,486 @@
1
+ require "csv"
2
+ require "json"
3
+ require_relative "errors"
4
+
5
+ # Conditionally require ActiveRecord if available
6
+ begin
7
+ require "active_record"
8
+ rescue LoadError
9
+ # ActiveRecord not available - database queries will raise an error
10
+ end
11
+
12
+ module DecisionAgent
13
+ module Simulation
14
+ # Engine for replaying historical decisions and backtesting rule changes
15
+ # rubocop:disable Metrics/ClassLength
16
+ class ReplayEngine
17
+ attr_reader :agent, :version_manager
18
+
19
+ def initialize(agent:, version_manager: nil)
20
+ @agent = agent
21
+ @version_manager = version_manager || Versioning::VersionManager.new
22
+ end
23
+
24
+ # Replay historical decisions with a specific rule version
25
+ # @param historical_data [String, Array<Hash>, Hash] Path to CSV/JSON file, array of context hashes, or database query config
26
+ # Database config format: { database: { connection: {...}, query: "SELECT ..." } }
27
+ # or { database: { connection: {...}, table: "table_name", where: {...} } }
28
+ # @param rule_version [String, Integer, Hash, nil] Version ID, version hash, or nil to use current agent
29
+ # @param compare_with [String, Integer, Hash, nil] Optional baseline version to compare against
30
+ # @param options [Hash] Execution options
31
+ # - :parallel [Boolean] Use parallel execution (default: true)
32
+ # - :thread_count [Integer] Number of threads (default: 4)
33
+ # - :progress_callback [Proc] Progress callback
34
+ # @return [Hash] Replay results with comparison data
35
+ def replay(historical_data:, rule_version: nil, compare_with: nil, options: {})
36
+ contexts = load_historical_data(historical_data)
37
+ options = {
38
+ parallel: true,
39
+ thread_count: 4,
40
+ progress_callback: nil
41
+ }.merge(options)
42
+
43
+ # Build agent with specified version
44
+ replay_agent = build_agent_from_version(rule_version) if rule_version
45
+ replay_agent ||= @agent
46
+
47
+ # Build baseline agent if comparison requested
48
+ baseline_agent = build_agent_from_version(compare_with) if compare_with
49
+
50
+ # Execute replay
51
+ results = execute_replay(contexts, replay_agent, baseline_agent, options)
52
+
53
+ # Build comparison report
54
+ build_comparison_report(results, baseline_agent)
55
+ end
56
+
57
+ # Backtest a rule change against historical data
58
+ # @param historical_data [String, Array<Hash>, Hash] Historical context data (file path, array, or database config)
59
+ # @param proposed_version [String, Integer, Hash] Proposed rule version
60
+ # @param baseline_version [String, Integer, Hash, nil] Baseline version (default: active version)
61
+ # @param options [Hash] Execution options
62
+ # @return [Hash] Backtest results with impact analysis
63
+ def backtest(historical_data:, proposed_version:, baseline_version: nil, options: {})
64
+ baseline_version ||= get_active_version_for_rule(proposed_version)
65
+ replay(
66
+ historical_data: historical_data,
67
+ rule_version: proposed_version,
68
+ compare_with: baseline_version,
69
+ options: options
70
+ )
71
+ end
72
+
73
+ private
74
+
75
+ def load_historical_data(data)
76
+ case data
77
+ when String
78
+ load_from_file(data)
79
+ when Array
80
+ data
81
+ when Hash
82
+ unless data.key?(:database) || data.key?("database")
83
+ raise InvalidHistoricalDataError, "Historical data Hash must contain :database key for database queries"
84
+ end
85
+
86
+ load_database(data[:database] || data["database"])
87
+
88
+ else
89
+ raise InvalidHistoricalDataError, "Historical data must be a file path (String), array of contexts, or database query config (Hash)"
90
+ end
91
+ end
92
+
93
+ def load_from_file(file_path)
94
+ case File.extname(file_path).downcase
95
+ when ".csv"
96
+ load_csv(file_path)
97
+ when ".json"
98
+ load_json(file_path)
99
+ else
100
+ raise InvalidHistoricalDataError, "Unsupported file format. Use CSV or JSON"
101
+ end
102
+ end
103
+
104
+ def load_csv(file_path)
105
+ contexts = []
106
+ CSV.foreach(file_path, headers: true, header_converters: :symbol) do |row|
107
+ context = row.to_h
108
+ # Convert numeric strings to numbers for better evaluator compatibility
109
+ context = context.transform_values do |v|
110
+ # Try to convert to number if it looks like a number
111
+ if v.is_a?(String) && v.match?(/^-?\d+(\.\d+)?$/)
112
+ v.include?(".") ? v.to_f : v.to_i
113
+ else
114
+ v
115
+ end
116
+ end
117
+ contexts << context
118
+ end
119
+ contexts
120
+ rescue StandardError => e
121
+ raise InvalidHistoricalDataError, "Failed to load CSV: #{e.message}"
122
+ end
123
+
124
+ def load_json(file_path)
125
+ content = File.read(file_path)
126
+ data = JSON.parse(content, symbolize_names: true)
127
+ data.is_a?(Array) ? data : [data]
128
+ rescue StandardError => e
129
+ raise InvalidHistoricalDataError, "Failed to load JSON: #{e.message}"
130
+ end
131
+
132
+ def load_database(config)
133
+ unless defined?(ActiveRecord)
134
+ raise InvalidHistoricalDataError, "ActiveRecord is required for database queries. Add 'activerecord' to your Gemfile."
135
+ end
136
+
137
+ config = {} unless config.is_a?(Hash)
138
+ connection_config = config[:connection] || config["connection"]
139
+ query = config[:query] || config["query"]
140
+ table = config[:table] || config["table"]
141
+ where_clause = config[:where] || config["where"]
142
+
143
+ raise InvalidHistoricalDataError, "Database config must include :connection" unless connection_config
144
+
145
+ # Check if query or table is provided
146
+ raise InvalidHistoricalDataError, "Database config must include :query or :table" unless query || table
147
+
148
+ # Establish connection
149
+ connection = establish_database_connection(connection_config)
150
+
151
+ # Build and execute query
152
+ execute_database_query(connection, query: query, table: table, where: where_clause)
153
+ rescue ActiveRecord::ActiveRecordError => e
154
+ raise InvalidHistoricalDataError, "Database query failed: #{e.message}"
155
+ rescue StandardError => e
156
+ # Check if it's the missing query/table error
157
+ raise InvalidHistoricalDataError, "Database config must include :query or :table" if e.message.include?("query or :table")
158
+
159
+ raise InvalidHistoricalDataError, "Failed to load from database: #{e.message}"
160
+ end
161
+
162
+ def establish_database_connection(config)
163
+ # If config is a string, assume it's a connection name/key or "default"
164
+ # Otherwise, treat it as connection parameters
165
+ if config.is_a?(String)
166
+ if config == "default" || config.empty?
167
+ # Use default ActiveRecord connection
168
+ end
169
+ # Try to find existing connection by name
170
+ # For now, fall back to default connection
171
+ ActiveRecord::Base.connection
172
+ elsif config.is_a?(Hash)
173
+ # Create a properly named class to avoid "Anonymous class is not allowed" error
174
+ # Generate a unique class name
175
+ class_name = "DecisionAgentReplayConnection#{object_id}#{Thread.current.object_id}#{Time.now.to_f.to_s.gsub(/[^0-9]/, '')}"
176
+
177
+ # Create the class in the DecisionAgent module namespace
178
+ DecisionAgent.const_set(:ReplayConnections, Module.new) unless defined?(DecisionAgent::ReplayConnections)
179
+
180
+ connection_class = Class.new(ActiveRecord::Base) do
181
+ self.abstract_class = true
182
+ end
183
+
184
+ # Set the class name properly to avoid anonymous class error
185
+ DecisionAgent::ReplayConnections.const_set(class_name, connection_class)
186
+ connection_class.establish_connection(config)
187
+ connection_class.connection
188
+ else
189
+ raise InvalidHistoricalDataError, "Connection config must be a Hash or String"
190
+ end
191
+ rescue LoadError => e
192
+ raise InvalidHistoricalDataError, "Failed to establish database connection: #{e.message}"
193
+ rescue ActiveRecord::ActiveRecordError => e
194
+ raise InvalidHistoricalDataError, "Database connection failed: #{e.message}"
195
+ end
196
+
197
+ def execute_database_query(connection, query: nil, table: nil, where: nil)
198
+ if query
199
+ # Execute raw SQL query
200
+ results = connection.select_all(query)
201
+ convert_query_results_to_contexts(results)
202
+ elsif table
203
+ # Build SQL query from table and where clause
204
+ sql = build_table_query(connection, table, where)
205
+ results = connection.select_all(sql)
206
+ convert_query_results_to_contexts(results)
207
+ else
208
+ raise InvalidHistoricalDataError, "Database config must include :query or :table"
209
+ end
210
+ end
211
+
212
+ def build_table_query(connection, table, where)
213
+ table_name = connection.quote_table_name(table)
214
+ sql = "SELECT * FROM #{table_name}"
215
+
216
+ if where.is_a?(Hash) && !where.empty?
217
+ where_conditions = where.map do |key, value|
218
+ quoted_key = connection.quote_column_name(key.to_s)
219
+ quoted_value = connection.quote(value)
220
+ "#{quoted_key} = #{quoted_value}"
221
+ end.join(" AND ")
222
+ sql += " WHERE #{where_conditions}"
223
+ end
224
+
225
+ sql
226
+ end
227
+
228
+ def convert_query_results_to_contexts(results)
229
+ if results.respond_to?(:columns) && results.respond_to?(:rows)
230
+ convert_activerecord_results(results)
231
+ elsif results.is_a?(Array)
232
+ convert_array_results(results)
233
+ elsif results.respond_to?(:each)
234
+ convert_enumerable_results(results)
235
+ else
236
+ raise InvalidHistoricalDataError, "Unexpected query result format: #{results.class}"
237
+ end
238
+ end
239
+
240
+ def convert_activerecord_results(results)
241
+ columns = results.columns.map(&:to_sym)
242
+ results.rows.each_with_object([]) do |row, contexts|
243
+ context = build_context_from_row(row, columns)
244
+ contexts << context if context.any?
245
+ end
246
+ end
247
+
248
+ def build_context_from_row(row, columns)
249
+ columns.each_with_object({}) do |column, context|
250
+ index = columns.index(column)
251
+ next if skip_metadata_field?(column, row[index])
252
+
253
+ value = parse_json_value(row[index])
254
+ context[column] = value
255
+ end
256
+ end
257
+
258
+ def skip_metadata_field?(column, value)
259
+ %i[id created_at updated_at].include?(column) && value.nil?
260
+ end
261
+
262
+ def parse_json_value(value)
263
+ return value unless value.is_a?(String)
264
+ return value unless value.start_with?("{") || value.start_with?("[")
265
+
266
+ JSON.parse(value, symbolize_names: true)
267
+ rescue JSON::ParserError
268
+ value
269
+ end
270
+
271
+ def convert_array_results(results)
272
+ results.each_with_object([]) do |row, contexts|
273
+ context = normalize_row_to_hash(row)
274
+ cleaned_context = clean_context(context)
275
+ contexts << cleaned_context if cleaned_context.any?
276
+ end
277
+ end
278
+
279
+ def convert_enumerable_results(results)
280
+ results.each_with_object([]) do |row, contexts|
281
+ context = normalize_row_to_hash(row)
282
+ cleaned_context = clean_context(context)
283
+ contexts << cleaned_context if cleaned_context.any?
284
+ end
285
+ end
286
+
287
+ def normalize_row_to_hash(row)
288
+ if row.is_a?(Hash)
289
+ row.transform_keys(&:to_sym)
290
+ elsif row.respond_to?(:to_h)
291
+ row.to_h.transform_keys(&:to_sym)
292
+ else
293
+ {}
294
+ end
295
+ end
296
+
297
+ def clean_context(context)
298
+ context.reject { |k, v| %i[id created_at updated_at].include?(k) && v.nil? }
299
+ end
300
+
301
+ def build_agent_from_version(version)
302
+ version_hash = resolve_version(version)
303
+ evaluators = build_evaluators_from_version(version_hash)
304
+ Agent.new(
305
+ evaluators: evaluators,
306
+ scoring_strategy: @agent.scoring_strategy,
307
+ audit_adapter: Audit::NullAdapter.new
308
+ )
309
+ end
310
+
311
+ def resolve_version(version)
312
+ case version
313
+ when String, Integer
314
+ version_data = @version_manager.get_version(version_id: version)
315
+ raise VersionComparisonError, "Version not found: #{version}" unless version_data
316
+
317
+ version_data
318
+ when Hash
319
+ version
320
+ else
321
+ raise VersionComparisonError, "Invalid version format: #{version.class}"
322
+ end
323
+ end
324
+
325
+ def build_evaluators_from_version(version)
326
+ content = version[:content] || version["content"]
327
+ return @agent.evaluators unless content
328
+
329
+ if content.is_a?(Hash) && content[:evaluators]
330
+ build_evaluators_from_config(content[:evaluators])
331
+ elsif content.is_a?(Hash) && (content[:rules] || content["rules"])
332
+ [Evaluators::JsonRuleEvaluator.new(rules_json: content)]
333
+ else
334
+ @agent.evaluators
335
+ end
336
+ end
337
+
338
+ def build_evaluators_from_config(configs)
339
+ Array(configs).map do |config|
340
+ case config[:type] || config["type"]
341
+ when "json_rule"
342
+ Evaluators::JsonRuleEvaluator.new(rules_json: config[:rules] || config["rules"])
343
+ when "dmn"
344
+ model = config[:model] || config["model"]
345
+ decision_id = config[:decision_id] || config["decision_id"]
346
+ Evaluators::DmnEvaluator.new(model: model, decision_id: decision_id)
347
+ else
348
+ raise VersionComparisonError, "Unknown evaluator type: #{config[:type]}"
349
+ end
350
+ end
351
+ end
352
+
353
+ def get_active_version_for_rule(proposed_version)
354
+ version_hash = resolve_version(proposed_version)
355
+ rule_id = version_hash[:rule_id] || version_hash["rule_id"]
356
+ return nil unless rule_id
357
+
358
+ @version_manager.get_active_version(rule_id: rule_id)
359
+ end
360
+
361
+ def execute_replay(contexts, replay_agent, baseline_agent, options)
362
+ results = []
363
+ mutex = Mutex.new
364
+ completed = 0
365
+ total = contexts.size
366
+
367
+ if options[:parallel] && contexts.size > 1
368
+ execute_parallel(contexts, replay_agent, baseline_agent, options, mutex) do |result|
369
+ mutex.synchronize do
370
+ results << result
371
+ completed += 1
372
+ options[:progress_callback]&.call(
373
+ completed: completed,
374
+ total: total,
375
+ percentage: (completed.to_f / total * 100).round(2)
376
+ )
377
+ end
378
+ end
379
+ else
380
+ contexts.each_with_index do |context, index|
381
+ result = execute_single_replay(context, replay_agent, baseline_agent)
382
+ results << result
383
+ completed = index + 1
384
+ options[:progress_callback]&.call(
385
+ completed: completed,
386
+ total: total,
387
+ percentage: (completed.to_f / total * 100).round(2)
388
+ )
389
+ end
390
+ end
391
+
392
+ results
393
+ end
394
+
395
+ def execute_parallel(contexts, replay_agent, baseline_agent, options, _mutex)
396
+ thread_count = [options[:thread_count], contexts.size].min
397
+ queue = Queue.new
398
+ contexts.each { |c| queue << c }
399
+
400
+ threads = Array.new(thread_count) do
401
+ Thread.new do
402
+ loop do
403
+ context = begin
404
+ queue.pop(true)
405
+ rescue StandardError
406
+ nil
407
+ end
408
+ break unless context
409
+
410
+ result = execute_single_replay(context, replay_agent, baseline_agent)
411
+ yield result
412
+ end
413
+ end
414
+ end
415
+
416
+ threads.each(&:join)
417
+ end
418
+
419
+ def execute_single_replay(context, replay_agent, baseline_agent)
420
+ ctx = context.is_a?(Context) ? context : Context.new(context)
421
+
422
+ begin
423
+ replay_decision = replay_agent.decide(context: ctx)
424
+ rescue NoEvaluationsError
425
+ # If no evaluators match, return a default result
426
+ return {
427
+ context: ctx.to_h,
428
+ replay_decision: nil,
429
+ replay_confidence: 0.0,
430
+ baseline_decision: nil,
431
+ baseline_confidence: 0.0,
432
+ changed: false,
433
+ confidence_delta: nil,
434
+ error: "No evaluators returned a decision"
435
+ }
436
+ end
437
+
438
+ begin
439
+ baseline_decision = baseline_agent&.decide(context: ctx)
440
+ rescue NoEvaluationsError
441
+ baseline_decision = nil
442
+ end
443
+
444
+ {
445
+ context: ctx.to_h,
446
+ replay_decision: replay_decision.decision,
447
+ replay_confidence: replay_decision.confidence,
448
+ baseline_decision: baseline_decision&.decision,
449
+ baseline_confidence: baseline_decision&.confidence,
450
+ changed: (baseline_decision&.decision || nil) != replay_decision.decision,
451
+ confidence_delta: baseline_decision ? (replay_decision.confidence - baseline_decision.confidence) : nil
452
+ }
453
+ end
454
+
455
+ def build_comparison_report(results, baseline_agent)
456
+ # Filter out results with errors for statistics, but count all for total_decisions
457
+ valid_results = results.reject { |r| r[:error] }
458
+ total = results.size # Total contexts processed
459
+ changed = valid_results.count { |r| r[:changed] }
460
+ unchanged = valid_results.size - changed
461
+
462
+ confidence_deltas = valid_results.map { |r| r[:confidence_delta] }.compact
463
+ avg_confidence_delta = confidence_deltas.any? ? confidence_deltas.sum / confidence_deltas.size : 0
464
+
465
+ decision_distribution = valid_results.group_by { |r| r[:replay_decision] }.transform_values(&:count)
466
+ baseline_distribution = valid_results.select { |r| r[:baseline_decision] }
467
+ .group_by { |r| r[:baseline_decision] }
468
+ .transform_values(&:count)
469
+
470
+ {
471
+ total_decisions: total,
472
+ changed_decisions: changed,
473
+ unchanged_decisions: unchanged,
474
+ change_rate: valid_results.size.positive? ? (changed.to_f / valid_results.size) : 0,
475
+ average_confidence_delta: avg_confidence_delta,
476
+ decision_distribution: decision_distribution,
477
+ baseline_distribution: baseline_distribution,
478
+ results: results,
479
+ has_baseline: !baseline_agent.nil?,
480
+ errors: results.count { |r| r[:error] }
481
+ }
482
+ end
483
+ # rubocop:enable Metrics/ClassLength
484
+ end
485
+ end
486
+ end