decision_agent 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/decision_agent/ab_testing/ab_test.rb +197 -0
- data/lib/decision_agent/ab_testing/ab_test_assignment.rb +76 -0
- data/lib/decision_agent/ab_testing/ab_test_manager.rb +317 -0
- data/lib/decision_agent/ab_testing/ab_testing_agent.rb +152 -0
- data/lib/decision_agent/ab_testing/storage/activerecord_adapter.rb +155 -0
- data/lib/decision_agent/ab_testing/storage/adapter.rb +67 -0
- data/lib/decision_agent/ab_testing/storage/memory_adapter.rb +116 -0
- data/lib/decision_agent/monitoring/metrics_collector.rb +148 -3
- data/lib/decision_agent/monitoring/storage/activerecord_adapter.rb +253 -0
- data/lib/decision_agent/monitoring/storage/base_adapter.rb +90 -0
- data/lib/decision_agent/monitoring/storage/memory_adapter.rb +222 -0
- data/lib/decision_agent/version.rb +1 -1
- data/lib/decision_agent.rb +7 -0
- data/lib/generators/decision_agent/install/install_generator.rb +37 -0
- data/lib/generators/decision_agent/install/templates/ab_test_assignment_model.rb +45 -0
- data/lib/generators/decision_agent/install/templates/ab_test_model.rb +54 -0
- data/lib/generators/decision_agent/install/templates/ab_testing_migration.rb +43 -0
- data/lib/generators/decision_agent/install/templates/ab_testing_tasks.rake +189 -0
- data/lib/generators/decision_agent/install/templates/decision_agent_tasks.rake +114 -0
- data/lib/generators/decision_agent/install/templates/decision_log.rb +57 -0
- data/lib/generators/decision_agent/install/templates/error_metric.rb +53 -0
- data/lib/generators/decision_agent/install/templates/evaluation_metric.rb +43 -0
- data/lib/generators/decision_agent/install/templates/monitoring_migration.rb +109 -0
- data/lib/generators/decision_agent/install/templates/performance_metric.rb +76 -0
- data/spec/ab_testing/ab_test_manager_spec.rb +330 -0
- data/spec/ab_testing/ab_test_spec.rb +270 -0
- data/spec/examples.txt +612 -548
- data/spec/issue_verification_spec.rb +95 -21
- data/spec/monitoring/metrics_collector_spec.rb +2 -2
- data/spec/monitoring/monitored_agent_spec.rb +1 -1
- data/spec/monitoring/prometheus_exporter_spec.rb +1 -1
- data/spec/monitoring/storage/activerecord_adapter_spec.rb +346 -0
- data/spec/monitoring/storage/memory_adapter_spec.rb +247 -0
- metadata +26 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fac7efbaf0c8afd76053d21e611eca64f5880d5b6434720fd90ae34549f6244d
|
|
4
|
+
data.tar.gz: ecc22d0a9bb19053fd103b06f2c1be768db432b6d9ed715eac8adddcbd6f68dc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b7a0be6ad95512697b6e49a8145668cde64256051db57a39cf925edbd93886d7772cb93026a9af7a9eaea2e9968756ec628ca94d0ebc5a0d1aef0c409eeae97d
|
|
7
|
+
data.tar.gz: 06777a3c4155b1adff552f712a05a0d4bd2694c6f8af631e36611528c050cf1ea711268230dba6d1a64522e127c0c312b7221fd5d36bdc56f69009b0a98c17e4
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
module DecisionAgent
|
|
2
|
+
module ABTesting
|
|
3
|
+
# Represents an A/B test configuration for comparing rule versions
|
|
4
|
+
class ABTest
|
|
5
|
+
attr_reader :id, :name, :champion_version_id, :challenger_version_id,
|
|
6
|
+
:traffic_split, :start_date, :end_date, :status
|
|
7
|
+
|
|
8
|
+
# @param name [String] Name of the A/B test
|
|
9
|
+
# @param champion_version_id [String, Integer] ID of the current/champion version
|
|
10
|
+
# @param challenger_version_id [String, Integer] ID of the new/challenger version
|
|
11
|
+
# @param options [Hash] Optional configuration
|
|
12
|
+
# @option options [Hash] :traffic_split Traffic distribution (default: { champion: 90, challenger: 10 })
|
|
13
|
+
# @option options [Time] :start_date When the test starts (defaults to now)
|
|
14
|
+
# @option options [Time] :end_date When the test ends (optional)
|
|
15
|
+
# @option options [String] :status Test status: running, completed, cancelled, scheduled
|
|
16
|
+
# @option options [String, Integer] :id Optional ID (for persistence)
|
|
17
|
+
def initialize(
|
|
18
|
+
name:,
|
|
19
|
+
champion_version_id:,
|
|
20
|
+
challenger_version_id:,
|
|
21
|
+
**options
|
|
22
|
+
)
|
|
23
|
+
@id = options[:id]
|
|
24
|
+
@name = name
|
|
25
|
+
@champion_version_id = champion_version_id
|
|
26
|
+
@challenger_version_id = challenger_version_id
|
|
27
|
+
@traffic_split = normalize_traffic_split(options[:traffic_split] || { champion: 90, challenger: 10 })
|
|
28
|
+
@start_date = options[:start_date] || Time.now.utc
|
|
29
|
+
@end_date = options[:end_date]
|
|
30
|
+
@status = options[:status] || "scheduled"
|
|
31
|
+
|
|
32
|
+
validate!
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Assign a variant based on traffic split
|
|
36
|
+
# Uses consistent hashing to ensure same user gets same variant
|
|
37
|
+
# @param user_id [String, nil] Optional user identifier for consistent assignment
|
|
38
|
+
# @return [Symbol] :champion or :challenger
|
|
39
|
+
def assign_variant(user_id: nil)
|
|
40
|
+
raise TestNotRunningError, "Test '#{@name}' is not running (status: #{@status})" unless running?
|
|
41
|
+
|
|
42
|
+
if user_id
|
|
43
|
+
# Consistent hashing: same user always gets same variant
|
|
44
|
+
hash_value = Digest::SHA256.hexdigest("#{@id}:#{user_id}").to_i(16)
|
|
45
|
+
percentage = hash_value % 100
|
|
46
|
+
else
|
|
47
|
+
# Random assignment
|
|
48
|
+
percentage = rand(100)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
percentage < @traffic_split[:champion] ? :champion : :challenger
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Get the version ID for the assigned variant
|
|
55
|
+
# @param variant [Symbol] :champion or :challenger
|
|
56
|
+
# @return [String, Integer] The version ID
|
|
57
|
+
def version_for_variant(variant)
|
|
58
|
+
case variant
|
|
59
|
+
when :champion
|
|
60
|
+
@champion_version_id
|
|
61
|
+
when :challenger
|
|
62
|
+
@challenger_version_id
|
|
63
|
+
else
|
|
64
|
+
raise ArgumentError, "Invalid variant: #{variant}. Must be :champion or :challenger"
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Check if test is currently running
|
|
69
|
+
# @return [Boolean]
|
|
70
|
+
def running?
|
|
71
|
+
return false unless @status == "running"
|
|
72
|
+
return false if @start_date && Time.now.utc < @start_date
|
|
73
|
+
return false if @end_date && Time.now.utc > @end_date
|
|
74
|
+
|
|
75
|
+
true
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Check if test is scheduled to start
|
|
79
|
+
# @return [Boolean]
|
|
80
|
+
def scheduled?
|
|
81
|
+
@status == "scheduled" && @start_date && Time.now.utc < @start_date
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Check if test is completed
|
|
85
|
+
# @return [Boolean]
|
|
86
|
+
def completed?
|
|
87
|
+
@status == "completed" || (@end_date && Time.now.utc > @end_date)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Start the test
|
|
91
|
+
def start!
|
|
92
|
+
raise InvalidStatusTransitionError, "Cannot start test from status: #{@status}" unless can_start?
|
|
93
|
+
|
|
94
|
+
@status = "running"
|
|
95
|
+
@start_date = Time.now.utc if @start_date.nil? || @start_date > Time.now.utc
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Complete the test
|
|
99
|
+
def complete!
|
|
100
|
+
raise InvalidStatusTransitionError, "Cannot complete test from status: #{@status}" unless can_complete?
|
|
101
|
+
|
|
102
|
+
@status = "completed"
|
|
103
|
+
@end_date = Time.now.utc
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Cancel the test
|
|
107
|
+
def cancel!
|
|
108
|
+
raise InvalidStatusTransitionError, "Cannot cancel test from status: #{@status}" if @status == "completed"
|
|
109
|
+
|
|
110
|
+
@status = "cancelled"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Convert to hash representation
|
|
114
|
+
# @return [Hash]
|
|
115
|
+
def to_h
|
|
116
|
+
{
|
|
117
|
+
id: @id,
|
|
118
|
+
name: @name,
|
|
119
|
+
champion_version_id: @champion_version_id,
|
|
120
|
+
challenger_version_id: @challenger_version_id,
|
|
121
|
+
traffic_split: @traffic_split,
|
|
122
|
+
start_date: @start_date,
|
|
123
|
+
end_date: @end_date,
|
|
124
|
+
status: @status
|
|
125
|
+
}
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
private
|
|
129
|
+
|
|
130
|
+
def validate!
|
|
131
|
+
raise ValidationError, "Test name is required" if @name.nil? || @name.strip.empty?
|
|
132
|
+
raise ValidationError, "Champion version ID is required" if @champion_version_id.nil?
|
|
133
|
+
raise ValidationError, "Challenger version ID is required" if @challenger_version_id.nil?
|
|
134
|
+
raise ValidationError, "Champion and challenger must be different versions" if @champion_version_id == @challenger_version_id
|
|
135
|
+
|
|
136
|
+
validate_traffic_split!
|
|
137
|
+
validate_dates!
|
|
138
|
+
validate_status!
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def validate_traffic_split!
|
|
142
|
+
raise ValidationError, "Traffic split must be a Hash" unless @traffic_split.is_a?(Hash)
|
|
143
|
+
|
|
144
|
+
unless @traffic_split.key?(:champion) && @traffic_split.key?(:challenger)
|
|
145
|
+
raise ValidationError,
|
|
146
|
+
"Traffic split must have :champion and :challenger keys"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
total = @traffic_split[:champion] + @traffic_split[:challenger]
|
|
150
|
+
raise ValidationError, "Traffic split must sum to 100, got #{total}" unless total == 100
|
|
151
|
+
|
|
152
|
+
raise ValidationError, "Traffic percentages must be non-negative" if @traffic_split.values.any?(&:negative?)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def validate_dates!
|
|
156
|
+
return unless @start_date && @end_date
|
|
157
|
+
|
|
158
|
+
raise ValidationError, "End date must be after start date" if @end_date <= @start_date
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def validate_status!
|
|
162
|
+
valid_statuses = %w[scheduled running completed cancelled]
|
|
163
|
+
return if valid_statuses.include?(@status)
|
|
164
|
+
|
|
165
|
+
raise ValidationError, "Invalid status: #{@status}. Must be one of: #{valid_statuses.join(', ')}"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def normalize_traffic_split(split)
|
|
169
|
+
case split
|
|
170
|
+
when Hash
|
|
171
|
+
# Handle both string and symbol keys
|
|
172
|
+
{
|
|
173
|
+
champion: (split[:champion] || split["champion"] || 50).to_i,
|
|
174
|
+
challenger: (split[:challenger] || split["challenger"] || 50).to_i
|
|
175
|
+
}
|
|
176
|
+
when Array
|
|
177
|
+
# Handle array format [90, 10]
|
|
178
|
+
{ champion: split[0].to_i, challenger: split[1].to_i }
|
|
179
|
+
else
|
|
180
|
+
raise ValidationError, "Traffic split must be a Hash or Array"
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def can_start?
|
|
185
|
+
%w[scheduled].include?(@status)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def can_complete?
|
|
189
|
+
%w[running].include?(@status)
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Custom errors
|
|
194
|
+
class TestNotRunningError < StandardError; end
|
|
195
|
+
class InvalidStatusTransitionError < StandardError; end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
module DecisionAgent
|
|
2
|
+
module ABTesting
|
|
3
|
+
# Tracks individual assignments of users/requests to A/B test variants
|
|
4
|
+
class ABTestAssignment
|
|
5
|
+
attr_reader :id, :ab_test_id, :user_id, :variant, :version_id,
|
|
6
|
+
:timestamp, :decision_result, :confidence, :context
|
|
7
|
+
|
|
8
|
+
# @param ab_test_id [String, Integer] The A/B test ID
|
|
9
|
+
# @param variant [Symbol] :champion or :challenger
|
|
10
|
+
# @param version_id [String, Integer] The rule version ID that was used
|
|
11
|
+
# @param options [Hash] Optional configuration
|
|
12
|
+
# @option options [String] :user_id User identifier (optional)
|
|
13
|
+
# @option options [Time] :timestamp When the assignment occurred
|
|
14
|
+
# @option options [String] :decision_result The decision outcome
|
|
15
|
+
# @option options [Float] :confidence Confidence score of the decision
|
|
16
|
+
# @option options [Hash] :context Additional context for the decision
|
|
17
|
+
# @option options [String, Integer] :id Optional ID (for persistence)
|
|
18
|
+
def initialize(
|
|
19
|
+
ab_test_id:,
|
|
20
|
+
variant:,
|
|
21
|
+
version_id:,
|
|
22
|
+
**options
|
|
23
|
+
)
|
|
24
|
+
@id = options[:id]
|
|
25
|
+
@ab_test_id = ab_test_id
|
|
26
|
+
@user_id = options[:user_id]
|
|
27
|
+
@variant = variant
|
|
28
|
+
@version_id = version_id
|
|
29
|
+
@timestamp = options[:timestamp] || Time.now.utc
|
|
30
|
+
@decision_result = options[:decision_result]
|
|
31
|
+
@confidence = options[:confidence]
|
|
32
|
+
@context = options[:context] || {}
|
|
33
|
+
|
|
34
|
+
validate!
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Update the assignment with decision results
|
|
38
|
+
# @param decision [String] The decision result
|
|
39
|
+
# @param confidence [Float] The confidence score
|
|
40
|
+
def record_decision(decision, confidence)
|
|
41
|
+
@decision_result = decision
|
|
42
|
+
@confidence = confidence
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Convert to hash representation
|
|
46
|
+
# @return [Hash]
|
|
47
|
+
def to_h
|
|
48
|
+
{
|
|
49
|
+
id: @id,
|
|
50
|
+
ab_test_id: @ab_test_id,
|
|
51
|
+
user_id: @user_id,
|
|
52
|
+
variant: @variant,
|
|
53
|
+
version_id: @version_id,
|
|
54
|
+
timestamp: @timestamp,
|
|
55
|
+
decision_result: @decision_result,
|
|
56
|
+
confidence: @confidence,
|
|
57
|
+
context: @context
|
|
58
|
+
}
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def validate!
|
|
64
|
+
raise ValidationError, "AB test ID is required" if @ab_test_id.nil?
|
|
65
|
+
raise ValidationError, "Variant is required" if @variant.nil?
|
|
66
|
+
raise ValidationError, "Version ID is required" if @version_id.nil?
|
|
67
|
+
|
|
68
|
+
raise ValidationError, "Variant must be :champion or :challenger, got: #{@variant}" unless %i[champion challenger].include?(@variant)
|
|
69
|
+
|
|
70
|
+
return unless @confidence && (@confidence.negative? || @confidence > 1)
|
|
71
|
+
|
|
72
|
+
raise ValidationError, "Confidence must be between 0 and 1, got: #{@confidence}"
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
require "monitor"
|
|
2
|
+
|
|
3
|
+
module DecisionAgent
|
|
4
|
+
module ABTesting
|
|
5
|
+
# Manages A/B tests and provides high-level orchestration
|
|
6
|
+
class ABTestManager
|
|
7
|
+
include MonitorMixin
|
|
8
|
+
|
|
9
|
+
attr_reader :storage_adapter, :version_manager
|
|
10
|
+
|
|
11
|
+
# @param storage_adapter [Storage::Adapter] Storage adapter for persistence
|
|
12
|
+
# @param version_manager [Versioning::VersionManager] Version manager for rule versions
|
|
13
|
+
def initialize(storage_adapter: nil, version_manager: nil)
|
|
14
|
+
super()
|
|
15
|
+
@storage_adapter = storage_adapter || default_storage_adapter
|
|
16
|
+
@version_manager = version_manager || Versioning::VersionManager.new
|
|
17
|
+
@active_tests_cache = {}
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Create a new A/B test
|
|
21
|
+
# @param name [String] Name of the test
|
|
22
|
+
# @param champion_version_id [String, Integer] ID of the champion version
|
|
23
|
+
# @param challenger_version_id [String, Integer] ID of the challenger version
|
|
24
|
+
# @param traffic_split [Hash] Traffic distribution
|
|
25
|
+
# @param start_date [Time, nil] When to start the test
|
|
26
|
+
# @param end_date [Time, nil] When to end the test
|
|
27
|
+
# @return [ABTest] The created test
|
|
28
|
+
def create_test(name:, champion_version_id:, challenger_version_id:, traffic_split: { champion: 90, challenger: 10 }, start_date: nil,
|
|
29
|
+
end_date: nil)
|
|
30
|
+
synchronize do
|
|
31
|
+
# Validate that both versions exist
|
|
32
|
+
validate_version_exists!(champion_version_id, "champion")
|
|
33
|
+
validate_version_exists!(challenger_version_id, "challenger")
|
|
34
|
+
|
|
35
|
+
test = ABTest.new(
|
|
36
|
+
name: name,
|
|
37
|
+
champion_version_id: champion_version_id,
|
|
38
|
+
challenger_version_id: challenger_version_id,
|
|
39
|
+
traffic_split: traffic_split,
|
|
40
|
+
start_date: start_date || Time.now.utc,
|
|
41
|
+
end_date: end_date,
|
|
42
|
+
status: start_date && start_date > Time.now.utc ? "scheduled" : "running"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
saved_test = @storage_adapter.save_test(test)
|
|
46
|
+
invalidate_cache!
|
|
47
|
+
saved_test
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Get an A/B test by ID
|
|
52
|
+
# @param test_id [String, Integer] The test ID
|
|
53
|
+
# @return [ABTest, nil] The test or nil if not found
|
|
54
|
+
def get_test(test_id)
|
|
55
|
+
@storage_adapter.get_test(test_id)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Get all active A/B tests
|
|
59
|
+
# @return [Array<ABTest>] Array of active tests
|
|
60
|
+
def active_tests
|
|
61
|
+
synchronize do
|
|
62
|
+
return @active_tests_cache[:tests] if cache_valid?
|
|
63
|
+
|
|
64
|
+
tests = @storage_adapter.list_tests(status: "running")
|
|
65
|
+
@active_tests_cache = { tests: tests, timestamp: Time.now.utc }
|
|
66
|
+
tests
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Assign a variant for a request
|
|
71
|
+
# @param test_id [String, Integer] The A/B test ID
|
|
72
|
+
# @param user_id [String, nil] Optional user identifier for consistent assignment
|
|
73
|
+
# @return [Hash] Assignment details { test_id:, variant:, version_id: }
|
|
74
|
+
def assign_variant(test_id:, user_id: nil)
|
|
75
|
+
test = get_test(test_id)
|
|
76
|
+
raise TestNotFoundError, "Test not found: #{test_id}" unless test
|
|
77
|
+
|
|
78
|
+
variant = test.assign_variant(user_id: user_id)
|
|
79
|
+
version_id = test.version_for_variant(variant)
|
|
80
|
+
|
|
81
|
+
assignment = ABTestAssignment.new(
|
|
82
|
+
ab_test_id: test_id,
|
|
83
|
+
user_id: user_id,
|
|
84
|
+
variant: variant,
|
|
85
|
+
version_id: version_id
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
saved_assignment = @storage_adapter.save_assignment(assignment)
|
|
89
|
+
|
|
90
|
+
{
|
|
91
|
+
test_id: test_id,
|
|
92
|
+
variant: variant,
|
|
93
|
+
version_id: version_id,
|
|
94
|
+
assignment_id: saved_assignment.id
|
|
95
|
+
}
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Record the decision result for an assignment
|
|
99
|
+
# @param assignment_id [String, Integer] The assignment ID
|
|
100
|
+
# @param decision [String] The decision result
|
|
101
|
+
# @param confidence [Float] The confidence score
|
|
102
|
+
def record_decision(assignment_id:, decision:, confidence:)
|
|
103
|
+
@storage_adapter.update_assignment(assignment_id, decision_result: decision, confidence: confidence)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Get results comparison for an A/B test
|
|
107
|
+
# @param test_id [String, Integer] The test ID
|
|
108
|
+
# @return [Hash] Comparison statistics
|
|
109
|
+
def get_results(test_id)
|
|
110
|
+
test = get_test(test_id)
|
|
111
|
+
raise TestNotFoundError, "Test not found: #{test_id}" unless test
|
|
112
|
+
|
|
113
|
+
assignments = @storage_adapter.get_assignments(test_id)
|
|
114
|
+
|
|
115
|
+
champion_assignments = assignments.select { |a| a.variant == :champion }
|
|
116
|
+
challenger_assignments = assignments.select { |a| a.variant == :challenger }
|
|
117
|
+
|
|
118
|
+
{
|
|
119
|
+
test: test.to_h,
|
|
120
|
+
champion: calculate_variant_stats(champion_assignments, "Champion"),
|
|
121
|
+
challenger: calculate_variant_stats(challenger_assignments, "Challenger"),
|
|
122
|
+
comparison: compare_variants(champion_assignments, challenger_assignments),
|
|
123
|
+
total_assignments: assignments.size,
|
|
124
|
+
timestamp: Time.now.utc
|
|
125
|
+
}
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Start a scheduled test
|
|
129
|
+
# @param test_id [String, Integer] The test ID
|
|
130
|
+
def start_test(test_id)
|
|
131
|
+
synchronize do
|
|
132
|
+
test = get_test(test_id)
|
|
133
|
+
raise TestNotFoundError, "Test not found: #{test_id}" unless test
|
|
134
|
+
|
|
135
|
+
test.start!
|
|
136
|
+
@storage_adapter.update_test(test_id, status: "running", start_date: test.start_date)
|
|
137
|
+
invalidate_cache!
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Complete a running test
|
|
142
|
+
# @param test_id [String, Integer] The test ID
|
|
143
|
+
def complete_test(test_id)
|
|
144
|
+
synchronize do
|
|
145
|
+
test = get_test(test_id)
|
|
146
|
+
raise TestNotFoundError, "Test not found: #{test_id}" unless test
|
|
147
|
+
|
|
148
|
+
test.complete!
|
|
149
|
+
@storage_adapter.update_test(test_id, status: "completed", end_date: test.end_date)
|
|
150
|
+
invalidate_cache!
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Cancel a test
|
|
155
|
+
# @param test_id [String, Integer] The test ID
|
|
156
|
+
def cancel_test(test_id)
|
|
157
|
+
synchronize do
|
|
158
|
+
test = get_test(test_id)
|
|
159
|
+
raise TestNotFoundError, "Test not found: #{test_id}" unless test
|
|
160
|
+
|
|
161
|
+
test.cancel!
|
|
162
|
+
@storage_adapter.update_test(test_id, status: "cancelled")
|
|
163
|
+
invalidate_cache!
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# List all tests with optional filtering
|
|
168
|
+
# @param status [String, nil] Filter by status
|
|
169
|
+
# @param limit [Integer, nil] Limit results
|
|
170
|
+
# @return [Array<ABTest>] Array of tests
|
|
171
|
+
def list_tests(status: nil, limit: nil)
|
|
172
|
+
@storage_adapter.list_tests(status: status, limit: limit)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
private
|
|
176
|
+
|
|
177
|
+
def default_storage_adapter
|
|
178
|
+
# Use in-memory adapter by default
|
|
179
|
+
require_relative "storage/memory_adapter"
|
|
180
|
+
Storage::MemoryAdapter.new
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def validate_version_exists!(version_id, label)
|
|
184
|
+
version = @version_manager.get_version(version_id: version_id)
|
|
185
|
+
return if version
|
|
186
|
+
|
|
187
|
+
raise VersionNotFoundError, "#{label.capitalize} version not found: #{version_id}"
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def cache_valid?
|
|
191
|
+
return false unless @active_tests_cache[:timestamp]
|
|
192
|
+
|
|
193
|
+
# Cache is valid for 60 seconds
|
|
194
|
+
Time.now.utc - @active_tests_cache[:timestamp] < 60
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def invalidate_cache!
|
|
198
|
+
@active_tests_cache = {}
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def calculate_variant_stats(assignments, label)
|
|
202
|
+
with_decisions = assignments.select(&:decision_result)
|
|
203
|
+
|
|
204
|
+
if with_decisions.empty?
|
|
205
|
+
return {
|
|
206
|
+
label: label,
|
|
207
|
+
total_assignments: assignments.size,
|
|
208
|
+
decisions_recorded: 0,
|
|
209
|
+
avg_confidence: nil,
|
|
210
|
+
decision_distribution: {}
|
|
211
|
+
}
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
confidences = with_decisions.map(&:confidence)
|
|
215
|
+
decision_counts = with_decisions.group_by(&:decision_result).transform_values(&:size)
|
|
216
|
+
|
|
217
|
+
{
|
|
218
|
+
label: label,
|
|
219
|
+
total_assignments: assignments.size,
|
|
220
|
+
decisions_recorded: with_decisions.size,
|
|
221
|
+
avg_confidence: (confidences.sum / confidences.size.to_f).round(4),
|
|
222
|
+
min_confidence: confidences.min&.round(4),
|
|
223
|
+
max_confidence: confidences.max&.round(4),
|
|
224
|
+
decision_distribution: decision_counts
|
|
225
|
+
}
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def compare_variants(champion_assignments, challenger_assignments)
|
|
229
|
+
champion_with_decisions = champion_assignments.select(&:decision_result)
|
|
230
|
+
challenger_with_decisions = challenger_assignments.select(&:decision_result)
|
|
231
|
+
|
|
232
|
+
return { statistical_significance: "insufficient_data" } if champion_with_decisions.empty? || challenger_with_decisions.empty?
|
|
233
|
+
|
|
234
|
+
champion_confidences = champion_with_decisions.map(&:confidence)
|
|
235
|
+
challenger_confidences = challenger_with_decisions.map(&:confidence)
|
|
236
|
+
|
|
237
|
+
champion_avg = champion_confidences.sum / champion_confidences.size.to_f
|
|
238
|
+
challenger_avg = challenger_confidences.sum / challenger_confidences.size.to_f
|
|
239
|
+
|
|
240
|
+
improvement = ((challenger_avg - champion_avg) / champion_avg * 100).round(2)
|
|
241
|
+
|
|
242
|
+
# Calculate statistical significance using Welch's t-test approximation
|
|
243
|
+
sig_result = calculate_statistical_significance(champion_confidences, challenger_confidences)
|
|
244
|
+
|
|
245
|
+
{
|
|
246
|
+
champion_avg_confidence: champion_avg.round(4),
|
|
247
|
+
challenger_avg_confidence: challenger_avg.round(4),
|
|
248
|
+
improvement_percentage: improvement,
|
|
249
|
+
winner: determine_winner(champion_avg, challenger_avg, sig_result[:significant]),
|
|
250
|
+
statistical_significance: sig_result[:significant] ? "significant" : "not_significant",
|
|
251
|
+
confidence_level: sig_result[:confidence_level],
|
|
252
|
+
recommendation: generate_recommendation(improvement, sig_result[:significant])
|
|
253
|
+
}
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def calculate_statistical_significance(sample1, sample2)
|
|
257
|
+
n1 = sample1.size
|
|
258
|
+
n2 = sample2.size
|
|
259
|
+
|
|
260
|
+
return { significant: false, confidence_level: 0 } if n1 < 30 || n2 < 30
|
|
261
|
+
|
|
262
|
+
mean1 = sample1.sum / n1.to_f
|
|
263
|
+
mean2 = sample2.sum / n2.to_f
|
|
264
|
+
|
|
265
|
+
var1 = sample1.map { |x| (x - mean1)**2 }.sum / (n1 - 1).to_f
|
|
266
|
+
var2 = sample2.map { |x| (x - mean2)**2 }.sum / (n2 - 1).to_f
|
|
267
|
+
|
|
268
|
+
# Welch's t-statistic
|
|
269
|
+
t_stat = (mean1 - mean2) / Math.sqrt((var1 / n1) + (var2 / n2))
|
|
270
|
+
|
|
271
|
+
# Simplified p-value approximation (for demonstration)
|
|
272
|
+
# In production, use a proper statistical library
|
|
273
|
+
t_stat_abs = t_stat.abs
|
|
274
|
+
|
|
275
|
+
confidence_level = if t_stat_abs > 2.576
|
|
276
|
+
0.99 # 99% confidence
|
|
277
|
+
elsif t_stat_abs > 1.96
|
|
278
|
+
0.95 # 95% confidence
|
|
279
|
+
elsif t_stat_abs > 1.645
|
|
280
|
+
0.90 # 90% confidence
|
|
281
|
+
else
|
|
282
|
+
0.0
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
{
|
|
286
|
+
significant: confidence_level >= 0.95,
|
|
287
|
+
confidence_level: confidence_level,
|
|
288
|
+
t_statistic: t_stat.round(4)
|
|
289
|
+
}
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def determine_winner(champion_avg, challenger_avg, significant)
|
|
293
|
+
return "inconclusive" unless significant
|
|
294
|
+
|
|
295
|
+
challenger_avg > champion_avg ? "challenger" : "champion"
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def generate_recommendation(improvement, significant)
|
|
299
|
+
if !significant
|
|
300
|
+
"Continue testing - not enough data for statistical significance"
|
|
301
|
+
elsif improvement > 5
|
|
302
|
+
"Strong evidence to promote challenger"
|
|
303
|
+
elsif improvement.positive?
|
|
304
|
+
"Moderate evidence to promote challenger"
|
|
305
|
+
elsif improvement > -5
|
|
306
|
+
"Results are similar - consider other factors"
|
|
307
|
+
else
|
|
308
|
+
"Keep champion - challenger performs worse"
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Custom errors
|
|
314
|
+
class TestNotFoundError < StandardError; end
|
|
315
|
+
class VersionNotFoundError < StandardError; end
|
|
316
|
+
end
|
|
317
|
+
end
|