decision_agent 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/lib/decision_agent/ab_testing/ab_test.rb +197 -0
  3. data/lib/decision_agent/ab_testing/ab_test_assignment.rb +76 -0
  4. data/lib/decision_agent/ab_testing/ab_test_manager.rb +317 -0
  5. data/lib/decision_agent/ab_testing/ab_testing_agent.rb +152 -0
  6. data/lib/decision_agent/ab_testing/storage/activerecord_adapter.rb +155 -0
  7. data/lib/decision_agent/ab_testing/storage/adapter.rb +67 -0
  8. data/lib/decision_agent/ab_testing/storage/memory_adapter.rb +116 -0
  9. data/lib/decision_agent/monitoring/metrics_collector.rb +148 -3
  10. data/lib/decision_agent/monitoring/storage/activerecord_adapter.rb +253 -0
  11. data/lib/decision_agent/monitoring/storage/base_adapter.rb +90 -0
  12. data/lib/decision_agent/monitoring/storage/memory_adapter.rb +222 -0
  13. data/lib/decision_agent/version.rb +1 -1
  14. data/lib/decision_agent.rb +7 -0
  15. data/lib/generators/decision_agent/install/install_generator.rb +37 -0
  16. data/lib/generators/decision_agent/install/templates/ab_test_assignment_model.rb +45 -0
  17. data/lib/generators/decision_agent/install/templates/ab_test_model.rb +54 -0
  18. data/lib/generators/decision_agent/install/templates/ab_testing_migration.rb +43 -0
  19. data/lib/generators/decision_agent/install/templates/ab_testing_tasks.rake +189 -0
  20. data/lib/generators/decision_agent/install/templates/decision_agent_tasks.rake +114 -0
  21. data/lib/generators/decision_agent/install/templates/decision_log.rb +57 -0
  22. data/lib/generators/decision_agent/install/templates/error_metric.rb +53 -0
  23. data/lib/generators/decision_agent/install/templates/evaluation_metric.rb +43 -0
  24. data/lib/generators/decision_agent/install/templates/monitoring_migration.rb +109 -0
  25. data/lib/generators/decision_agent/install/templates/performance_metric.rb +76 -0
  26. data/spec/ab_testing/ab_test_manager_spec.rb +330 -0
  27. data/spec/ab_testing/ab_test_spec.rb +270 -0
  28. data/spec/examples.txt +612 -548
  29. data/spec/issue_verification_spec.rb +95 -21
  30. data/spec/monitoring/metrics_collector_spec.rb +2 -2
  31. data/spec/monitoring/monitored_agent_spec.rb +1 -1
  32. data/spec/monitoring/prometheus_exporter_spec.rb +1 -1
  33. data/spec/monitoring/storage/activerecord_adapter_spec.rb +346 -0
  34. data/spec/monitoring/storage/memory_adapter_spec.rb +247 -0
  35. metadata +26 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6c8a831b21d5bb62dbb4948827cc0b4bc9bcf021b52f04a2d9141d1339287274
4
- data.tar.gz: 97d80cab6513385dc9e9b4bddb7555da1e8629488ce358319aa0842e74cf41e1
3
+ metadata.gz: fac7efbaf0c8afd76053d21e611eca64f5880d5b6434720fd90ae34549f6244d
4
+ data.tar.gz: ecc22d0a9bb19053fd103b06f2c1be768db432b6d9ed715eac8adddcbd6f68dc
5
5
  SHA512:
6
- metadata.gz: dcfa8f3cc0a5931a1a163b97caf7586c17eddfee38850b52b832fa1a94d720342868c6823158597d0dafa14bf7df82b9d9b394fdffac3c74d06a556eca2b5c18
7
- data.tar.gz: f183fc1b8c021e589571d0d9a97c3b2679fec3b529d2e73a76a4b7752a9792dcba77f4914de876c5448711416179dcbd0ba35b515fe1c4f2a74bcab27c1a873e
6
+ metadata.gz: b7a0be6ad95512697b6e49a8145668cde64256051db57a39cf925edbd93886d7772cb93026a9af7a9eaea2e9968756ec628ca94d0ebc5a0d1aef0c409eeae97d
7
+ data.tar.gz: 06777a3c4155b1adff552f712a05a0d4bd2694c6f8af631e36611528c050cf1ea711268230dba6d1a64522e127c0c312b7221fd5d36bdc56f69009b0a98c17e4
@@ -0,0 +1,197 @@
1
+ module DecisionAgent
2
+ module ABTesting
3
+ # Represents an A/B test configuration for comparing rule versions
4
+ class ABTest
5
+ attr_reader :id, :name, :champion_version_id, :challenger_version_id,
6
+ :traffic_split, :start_date, :end_date, :status
7
+
8
+ # @param name [String] Name of the A/B test
9
+ # @param champion_version_id [String, Integer] ID of the current/champion version
10
+ # @param challenger_version_id [String, Integer] ID of the new/challenger version
11
+ # @param options [Hash] Optional configuration
12
+ # @option options [Hash] :traffic_split Traffic distribution (default: { champion: 90, challenger: 10 })
13
+ # @option options [Time] :start_date When the test starts (defaults to now)
14
+ # @option options [Time] :end_date When the test ends (optional)
15
+ # @option options [String] :status Test status: running, completed, cancelled, scheduled
16
+ # @option options [String, Integer] :id Optional ID (for persistence)
17
+ def initialize(
18
+ name:,
19
+ champion_version_id:,
20
+ challenger_version_id:,
21
+ **options
22
+ )
23
+ @id = options[:id]
24
+ @name = name
25
+ @champion_version_id = champion_version_id
26
+ @challenger_version_id = challenger_version_id
27
+ @traffic_split = normalize_traffic_split(options[:traffic_split] || { champion: 90, challenger: 10 })
28
+ @start_date = options[:start_date] || Time.now.utc
29
+ @end_date = options[:end_date]
30
+ @status = options[:status] || "scheduled"
31
+
32
+ validate!
33
+ end
34
+
35
+ # Assign a variant based on traffic split
36
+ # Uses consistent hashing to ensure same user gets same variant
37
+ # @param user_id [String, nil] Optional user identifier for consistent assignment
38
+ # @return [Symbol] :champion or :challenger
39
+ def assign_variant(user_id: nil)
40
+ raise TestNotRunningError, "Test '#{@name}' is not running (status: #{@status})" unless running?
41
+
42
+ if user_id
43
+ # Consistent hashing: same user always gets same variant
44
+ hash_value = Digest::SHA256.hexdigest("#{@id}:#{user_id}").to_i(16)
45
+ percentage = hash_value % 100
46
+ else
47
+ # Random assignment
48
+ percentage = rand(100)
49
+ end
50
+
51
+ percentage < @traffic_split[:champion] ? :champion : :challenger
52
+ end
53
+
54
+ # Get the version ID for the assigned variant
55
+ # @param variant [Symbol] :champion or :challenger
56
+ # @return [String, Integer] The version ID
57
+ def version_for_variant(variant)
58
+ case variant
59
+ when :champion
60
+ @champion_version_id
61
+ when :challenger
62
+ @challenger_version_id
63
+ else
64
+ raise ArgumentError, "Invalid variant: #{variant}. Must be :champion or :challenger"
65
+ end
66
+ end
67
+
68
+ # Check if test is currently running
69
+ # @return [Boolean]
70
+ def running?
71
+ return false unless @status == "running"
72
+ return false if @start_date && Time.now.utc < @start_date
73
+ return false if @end_date && Time.now.utc > @end_date
74
+
75
+ true
76
+ end
77
+
78
+ # Check if test is scheduled to start
79
+ # @return [Boolean]
80
+ def scheduled?
81
+ @status == "scheduled" && @start_date && Time.now.utc < @start_date
82
+ end
83
+
84
+ # Check if test is completed
85
+ # @return [Boolean]
86
+ def completed?
87
+ @status == "completed" || (@end_date && Time.now.utc > @end_date)
88
+ end
89
+
90
+ # Start the test
91
+ def start!
92
+ raise InvalidStatusTransitionError, "Cannot start test from status: #{@status}" unless can_start?
93
+
94
+ @status = "running"
95
+ @start_date = Time.now.utc if @start_date.nil? || @start_date > Time.now.utc
96
+ end
97
+
98
+ # Complete the test
99
+ def complete!
100
+ raise InvalidStatusTransitionError, "Cannot complete test from status: #{@status}" unless can_complete?
101
+
102
+ @status = "completed"
103
+ @end_date = Time.now.utc
104
+ end
105
+
106
+ # Cancel the test
107
+ def cancel!
108
+ raise InvalidStatusTransitionError, "Cannot cancel test from status: #{@status}" if @status == "completed"
109
+
110
+ @status = "cancelled"
111
+ end
112
+
113
+ # Convert to hash representation
114
+ # @return [Hash]
115
+ def to_h
116
+ {
117
+ id: @id,
118
+ name: @name,
119
+ champion_version_id: @champion_version_id,
120
+ challenger_version_id: @challenger_version_id,
121
+ traffic_split: @traffic_split,
122
+ start_date: @start_date,
123
+ end_date: @end_date,
124
+ status: @status
125
+ }
126
+ end
127
+
128
+ private
129
+
130
+ def validate!
131
+ raise ValidationError, "Test name is required" if @name.nil? || @name.strip.empty?
132
+ raise ValidationError, "Champion version ID is required" if @champion_version_id.nil?
133
+ raise ValidationError, "Challenger version ID is required" if @challenger_version_id.nil?
134
+ raise ValidationError, "Champion and challenger must be different versions" if @champion_version_id == @challenger_version_id
135
+
136
+ validate_traffic_split!
137
+ validate_dates!
138
+ validate_status!
139
+ end
140
+
141
+ def validate_traffic_split!
142
+ raise ValidationError, "Traffic split must be a Hash" unless @traffic_split.is_a?(Hash)
143
+
144
+ unless @traffic_split.key?(:champion) && @traffic_split.key?(:challenger)
145
+ raise ValidationError,
146
+ "Traffic split must have :champion and :challenger keys"
147
+ end
148
+
149
+ total = @traffic_split[:champion] + @traffic_split[:challenger]
150
+ raise ValidationError, "Traffic split must sum to 100, got #{total}" unless total == 100
151
+
152
+ raise ValidationError, "Traffic percentages must be non-negative" if @traffic_split.values.any?(&:negative?)
153
+ end
154
+
155
+ def validate_dates!
156
+ return unless @start_date && @end_date
157
+
158
+ raise ValidationError, "End date must be after start date" if @end_date <= @start_date
159
+ end
160
+
161
+ def validate_status!
162
+ valid_statuses = %w[scheduled running completed cancelled]
163
+ return if valid_statuses.include?(@status)
164
+
165
+ raise ValidationError, "Invalid status: #{@status}. Must be one of: #{valid_statuses.join(', ')}"
166
+ end
167
+
168
+ def normalize_traffic_split(split)
169
+ case split
170
+ when Hash
171
+ # Handle both string and symbol keys
172
+ {
173
+ champion: (split[:champion] || split["champion"] || 50).to_i,
174
+ challenger: (split[:challenger] || split["challenger"] || 50).to_i
175
+ }
176
+ when Array
177
+ # Handle array format [90, 10]
178
+ { champion: split[0].to_i, challenger: split[1].to_i }
179
+ else
180
+ raise ValidationError, "Traffic split must be a Hash or Array"
181
+ end
182
+ end
183
+
184
+ def can_start?
185
+ %w[scheduled].include?(@status)
186
+ end
187
+
188
+ def can_complete?
189
+ %w[running].include?(@status)
190
+ end
191
+ end
192
+
193
+ # Custom errors
194
+ class TestNotRunningError < StandardError; end
195
+ class InvalidStatusTransitionError < StandardError; end
196
+ end
197
+ end
@@ -0,0 +1,76 @@
1
+ module DecisionAgent
2
+ module ABTesting
3
+ # Tracks individual assignments of users/requests to A/B test variants
4
+ class ABTestAssignment
5
+ attr_reader :id, :ab_test_id, :user_id, :variant, :version_id,
6
+ :timestamp, :decision_result, :confidence, :context
7
+
8
+ # @param ab_test_id [String, Integer] The A/B test ID
9
+ # @param variant [Symbol] :champion or :challenger
10
+ # @param version_id [String, Integer] The rule version ID that was used
11
+ # @param options [Hash] Optional configuration
12
+ # @option options [String] :user_id User identifier (optional)
13
+ # @option options [Time] :timestamp When the assignment occurred
14
+ # @option options [String] :decision_result The decision outcome
15
+ # @option options [Float] :confidence Confidence score of the decision
16
+ # @option options [Hash] :context Additional context for the decision
17
+ # @option options [String, Integer] :id Optional ID (for persistence)
18
+ def initialize(
19
+ ab_test_id:,
20
+ variant:,
21
+ version_id:,
22
+ **options
23
+ )
24
+ @id = options[:id]
25
+ @ab_test_id = ab_test_id
26
+ @user_id = options[:user_id]
27
+ @variant = variant
28
+ @version_id = version_id
29
+ @timestamp = options[:timestamp] || Time.now.utc
30
+ @decision_result = options[:decision_result]
31
+ @confidence = options[:confidence]
32
+ @context = options[:context] || {}
33
+
34
+ validate!
35
+ end
36
+
37
+ # Update the assignment with decision results
38
+ # @param decision [String] The decision result
39
+ # @param confidence [Float] The confidence score
40
+ def record_decision(decision, confidence)
41
+ @decision_result = decision
42
+ @confidence = confidence
43
+ end
44
+
45
+ # Convert to hash representation
46
+ # @return [Hash]
47
+ def to_h
48
+ {
49
+ id: @id,
50
+ ab_test_id: @ab_test_id,
51
+ user_id: @user_id,
52
+ variant: @variant,
53
+ version_id: @version_id,
54
+ timestamp: @timestamp,
55
+ decision_result: @decision_result,
56
+ confidence: @confidence,
57
+ context: @context
58
+ }
59
+ end
60
+
61
+ private
62
+
63
+ def validate!
64
+ raise ValidationError, "AB test ID is required" if @ab_test_id.nil?
65
+ raise ValidationError, "Variant is required" if @variant.nil?
66
+ raise ValidationError, "Version ID is required" if @version_id.nil?
67
+
68
+ raise ValidationError, "Variant must be :champion or :challenger, got: #{@variant}" unless %i[champion challenger].include?(@variant)
69
+
70
+ return unless @confidence && (@confidence.negative? || @confidence > 1)
71
+
72
+ raise ValidationError, "Confidence must be between 0 and 1, got: #{@confidence}"
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,317 @@
1
+ require "monitor"
2
+
3
+ module DecisionAgent
4
+ module ABTesting
5
+ # Manages A/B tests and provides high-level orchestration
6
+ class ABTestManager
7
+ include MonitorMixin
8
+
9
+ attr_reader :storage_adapter, :version_manager
10
+
11
+ # @param storage_adapter [Storage::Adapter] Storage adapter for persistence
12
+ # @param version_manager [Versioning::VersionManager] Version manager for rule versions
13
+ def initialize(storage_adapter: nil, version_manager: nil)
14
+ super()
15
+ @storage_adapter = storage_adapter || default_storage_adapter
16
+ @version_manager = version_manager || Versioning::VersionManager.new
17
+ @active_tests_cache = {}
18
+ end
19
+
20
+ # Create a new A/B test
21
+ # @param name [String] Name of the test
22
+ # @param champion_version_id [String, Integer] ID of the champion version
23
+ # @param challenger_version_id [String, Integer] ID of the challenger version
24
+ # @param traffic_split [Hash] Traffic distribution
25
+ # @param start_date [Time, nil] When to start the test
26
+ # @param end_date [Time, nil] When to end the test
27
+ # @return [ABTest] The created test
28
+ def create_test(name:, champion_version_id:, challenger_version_id:, traffic_split: { champion: 90, challenger: 10 }, start_date: nil,
29
+ end_date: nil)
30
+ synchronize do
31
+ # Validate that both versions exist
32
+ validate_version_exists!(champion_version_id, "champion")
33
+ validate_version_exists!(challenger_version_id, "challenger")
34
+
35
+ test = ABTest.new(
36
+ name: name,
37
+ champion_version_id: champion_version_id,
38
+ challenger_version_id: challenger_version_id,
39
+ traffic_split: traffic_split,
40
+ start_date: start_date || Time.now.utc,
41
+ end_date: end_date,
42
+ status: start_date && start_date > Time.now.utc ? "scheduled" : "running"
43
+ )
44
+
45
+ saved_test = @storage_adapter.save_test(test)
46
+ invalidate_cache!
47
+ saved_test
48
+ end
49
+ end
50
+
51
+ # Get an A/B test by ID
52
+ # @param test_id [String, Integer] The test ID
53
+ # @return [ABTest, nil] The test or nil if not found
54
+ def get_test(test_id)
55
+ @storage_adapter.get_test(test_id)
56
+ end
57
+
58
+ # Get all active A/B tests
59
+ # @return [Array<ABTest>] Array of active tests
60
+ def active_tests
61
+ synchronize do
62
+ return @active_tests_cache[:tests] if cache_valid?
63
+
64
+ tests = @storage_adapter.list_tests(status: "running")
65
+ @active_tests_cache = { tests: tests, timestamp: Time.now.utc }
66
+ tests
67
+ end
68
+ end
69
+
70
+ # Assign a variant for a request
71
+ # @param test_id [String, Integer] The A/B test ID
72
+ # @param user_id [String, nil] Optional user identifier for consistent assignment
73
+ # @return [Hash] Assignment details { test_id:, variant:, version_id: }
74
+ def assign_variant(test_id:, user_id: nil)
75
+ test = get_test(test_id)
76
+ raise TestNotFoundError, "Test not found: #{test_id}" unless test
77
+
78
+ variant = test.assign_variant(user_id: user_id)
79
+ version_id = test.version_for_variant(variant)
80
+
81
+ assignment = ABTestAssignment.new(
82
+ ab_test_id: test_id,
83
+ user_id: user_id,
84
+ variant: variant,
85
+ version_id: version_id
86
+ )
87
+
88
+ saved_assignment = @storage_adapter.save_assignment(assignment)
89
+
90
+ {
91
+ test_id: test_id,
92
+ variant: variant,
93
+ version_id: version_id,
94
+ assignment_id: saved_assignment.id
95
+ }
96
+ end
97
+
98
+ # Record the decision result for an assignment
99
+ # @param assignment_id [String, Integer] The assignment ID
100
+ # @param decision [String] The decision result
101
+ # @param confidence [Float] The confidence score
102
+ def record_decision(assignment_id:, decision:, confidence:)
103
+ @storage_adapter.update_assignment(assignment_id, decision_result: decision, confidence: confidence)
104
+ end
105
+
106
+ # Get results comparison for an A/B test
107
+ # @param test_id [String, Integer] The test ID
108
+ # @return [Hash] Comparison statistics
109
+ def get_results(test_id)
110
+ test = get_test(test_id)
111
+ raise TestNotFoundError, "Test not found: #{test_id}" unless test
112
+
113
+ assignments = @storage_adapter.get_assignments(test_id)
114
+
115
+ champion_assignments = assignments.select { |a| a.variant == :champion }
116
+ challenger_assignments = assignments.select { |a| a.variant == :challenger }
117
+
118
+ {
119
+ test: test.to_h,
120
+ champion: calculate_variant_stats(champion_assignments, "Champion"),
121
+ challenger: calculate_variant_stats(challenger_assignments, "Challenger"),
122
+ comparison: compare_variants(champion_assignments, challenger_assignments),
123
+ total_assignments: assignments.size,
124
+ timestamp: Time.now.utc
125
+ }
126
+ end
127
+
128
+ # Start a scheduled test
129
+ # @param test_id [String, Integer] The test ID
130
+ def start_test(test_id)
131
+ synchronize do
132
+ test = get_test(test_id)
133
+ raise TestNotFoundError, "Test not found: #{test_id}" unless test
134
+
135
+ test.start!
136
+ @storage_adapter.update_test(test_id, status: "running", start_date: test.start_date)
137
+ invalidate_cache!
138
+ end
139
+ end
140
+
141
+ # Complete a running test
142
+ # @param test_id [String, Integer] The test ID
143
+ def complete_test(test_id)
144
+ synchronize do
145
+ test = get_test(test_id)
146
+ raise TestNotFoundError, "Test not found: #{test_id}" unless test
147
+
148
+ test.complete!
149
+ @storage_adapter.update_test(test_id, status: "completed", end_date: test.end_date)
150
+ invalidate_cache!
151
+ end
152
+ end
153
+
154
+ # Cancel a test
155
+ # @param test_id [String, Integer] The test ID
156
+ def cancel_test(test_id)
157
+ synchronize do
158
+ test = get_test(test_id)
159
+ raise TestNotFoundError, "Test not found: #{test_id}" unless test
160
+
161
+ test.cancel!
162
+ @storage_adapter.update_test(test_id, status: "cancelled")
163
+ invalidate_cache!
164
+ end
165
+ end
166
+
167
+ # List all tests with optional filtering
168
+ # @param status [String, nil] Filter by status
169
+ # @param limit [Integer, nil] Limit results
170
+ # @return [Array<ABTest>] Array of tests
171
+ def list_tests(status: nil, limit: nil)
172
+ @storage_adapter.list_tests(status: status, limit: limit)
173
+ end
174
+
175
+ private
176
+
177
+ def default_storage_adapter
178
+ # Use in-memory adapter by default
179
+ require_relative "storage/memory_adapter"
180
+ Storage::MemoryAdapter.new
181
+ end
182
+
183
+ def validate_version_exists!(version_id, label)
184
+ version = @version_manager.get_version(version_id: version_id)
185
+ return if version
186
+
187
+ raise VersionNotFoundError, "#{label.capitalize} version not found: #{version_id}"
188
+ end
189
+
190
+ def cache_valid?
191
+ return false unless @active_tests_cache[:timestamp]
192
+
193
+ # Cache is valid for 60 seconds
194
+ Time.now.utc - @active_tests_cache[:timestamp] < 60
195
+ end
196
+
197
+ def invalidate_cache!
198
+ @active_tests_cache = {}
199
+ end
200
+
201
+ def calculate_variant_stats(assignments, label)
202
+ with_decisions = assignments.select(&:decision_result)
203
+
204
+ if with_decisions.empty?
205
+ return {
206
+ label: label,
207
+ total_assignments: assignments.size,
208
+ decisions_recorded: 0,
209
+ avg_confidence: nil,
210
+ decision_distribution: {}
211
+ }
212
+ end
213
+
214
+ confidences = with_decisions.map(&:confidence)
215
+ decision_counts = with_decisions.group_by(&:decision_result).transform_values(&:size)
216
+
217
+ {
218
+ label: label,
219
+ total_assignments: assignments.size,
220
+ decisions_recorded: with_decisions.size,
221
+ avg_confidence: (confidences.sum / confidences.size.to_f).round(4),
222
+ min_confidence: confidences.min&.round(4),
223
+ max_confidence: confidences.max&.round(4),
224
+ decision_distribution: decision_counts
225
+ }
226
+ end
227
+
228
+ def compare_variants(champion_assignments, challenger_assignments)
229
+ champion_with_decisions = champion_assignments.select(&:decision_result)
230
+ challenger_with_decisions = challenger_assignments.select(&:decision_result)
231
+
232
+ return { statistical_significance: "insufficient_data" } if champion_with_decisions.empty? || challenger_with_decisions.empty?
233
+
234
+ champion_confidences = champion_with_decisions.map(&:confidence)
235
+ challenger_confidences = challenger_with_decisions.map(&:confidence)
236
+
237
+ champion_avg = champion_confidences.sum / champion_confidences.size.to_f
238
+ challenger_avg = challenger_confidences.sum / challenger_confidences.size.to_f
239
+
240
+ improvement = ((challenger_avg - champion_avg) / champion_avg * 100).round(2)
241
+
242
+ # Calculate statistical significance using Welch's t-test approximation
243
+ sig_result = calculate_statistical_significance(champion_confidences, challenger_confidences)
244
+
245
+ {
246
+ champion_avg_confidence: champion_avg.round(4),
247
+ challenger_avg_confidence: challenger_avg.round(4),
248
+ improvement_percentage: improvement,
249
+ winner: determine_winner(champion_avg, challenger_avg, sig_result[:significant]),
250
+ statistical_significance: sig_result[:significant] ? "significant" : "not_significant",
251
+ confidence_level: sig_result[:confidence_level],
252
+ recommendation: generate_recommendation(improvement, sig_result[:significant])
253
+ }
254
+ end
255
+
256
+ def calculate_statistical_significance(sample1, sample2)
257
+ n1 = sample1.size
258
+ n2 = sample2.size
259
+
260
+ return { significant: false, confidence_level: 0 } if n1 < 30 || n2 < 30
261
+
262
+ mean1 = sample1.sum / n1.to_f
263
+ mean2 = sample2.sum / n2.to_f
264
+
265
+ var1 = sample1.map { |x| (x - mean1)**2 }.sum / (n1 - 1).to_f
266
+ var2 = sample2.map { |x| (x - mean2)**2 }.sum / (n2 - 1).to_f
267
+
268
+ # Welch's t-statistic
269
+ t_stat = (mean1 - mean2) / Math.sqrt((var1 / n1) + (var2 / n2))
270
+
271
+ # Simplified p-value approximation (for demonstration)
272
+ # In production, use a proper statistical library
273
+ t_stat_abs = t_stat.abs
274
+
275
+ confidence_level = if t_stat_abs > 2.576
276
+ 0.99 # 99% confidence
277
+ elsif t_stat_abs > 1.96
278
+ 0.95 # 95% confidence
279
+ elsif t_stat_abs > 1.645
280
+ 0.90 # 90% confidence
281
+ else
282
+ 0.0
283
+ end
284
+
285
+ {
286
+ significant: confidence_level >= 0.95,
287
+ confidence_level: confidence_level,
288
+ t_statistic: t_stat.round(4)
289
+ }
290
+ end
291
+
292
+ def determine_winner(champion_avg, challenger_avg, significant)
293
+ return "inconclusive" unless significant
294
+
295
+ challenger_avg > champion_avg ? "challenger" : "champion"
296
+ end
297
+
298
+ def generate_recommendation(improvement, significant)
299
+ if !significant
300
+ "Continue testing - not enough data for statistical significance"
301
+ elsif improvement > 5
302
+ "Strong evidence to promote challenger"
303
+ elsif improvement.positive?
304
+ "Moderate evidence to promote challenger"
305
+ elsif improvement > -5
306
+ "Results are similar - consider other factors"
307
+ else
308
+ "Keep champion - challenger performs worse"
309
+ end
310
+ end
311
+ end
312
+
313
+ # Custom errors
314
+ class TestNotFoundError < StandardError; end
315
+ class VersionNotFoundError < StandardError; end
316
+ end
317
+ end