classic_bandit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.devcontainer/devcontainer.json +22 -0
- data/.rspec +3 -0
- data/.rubocop.yml +16 -0
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +21 -0
- data/README.md +142 -0
- data/Rakefile +12 -0
- data/lib/classic_bandit/arm.rb +31 -0
- data/lib/classic_bandit/arm_updatable.rb +33 -0
- data/lib/classic_bandit/epsilon_greedy.rb +49 -0
- data/lib/classic_bandit/softmax.rb +63 -0
- data/lib/classic_bandit/thompson_sampling.rb +62 -0
- data/lib/classic_bandit/ucb1.rb +50 -0
- data/lib/classic_bandit/version.rb +5 -0
- data/lib/classic_bandit.rb +13 -0
- data/sig/classic_bandit.rbs +4 -0
- metadata +64 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 9ce78adca73218db90a909daec3ccbb457417620d78ae2566c0a32ac7024a313
|
|
4
|
+
data.tar.gz: 9b6debd514866747242bd00de9db4d97d1fd95e5a03166e52c96b812daa3d7f6
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 80cc50641016e81853f766645d1fe9baf3c7bf4b09c8bc9303ce083dac1bec574d78432dd174953785e3dd37032e34c2248d22f47acd1c08c02078f02ad6f8a7
|
|
7
|
+
data.tar.gz: e5b534ad79cc2a91a95b617c315335cb8c399c7241308750d868b4eb13e60016382765ff1e83447495676414e1213a6b7212d31e9b18f2859063d5f491f80b25
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
|
2
|
+
// README at: https://github.com/devcontainers/templates/tree/main/src/ruby
|
|
3
|
+
{
|
|
4
|
+
"name": "Ruby",
|
|
5
|
+
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
|
6
|
+
"image": "mcr.microsoft.com/devcontainers/ruby:1-3.3-bullseye"
|
|
7
|
+
|
|
8
|
+
// Features to add to the dev container. More info: https://containers.dev/features.
|
|
9
|
+
// "features": {},
|
|
10
|
+
|
|
11
|
+
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
|
12
|
+
// "forwardPorts": [],
|
|
13
|
+
|
|
14
|
+
// Use 'postCreateCommand' to run commands after the container is created.
|
|
15
|
+
// "postCreateCommand": "ruby --version",
|
|
16
|
+
|
|
17
|
+
// Configure tool-specific properties.
|
|
18
|
+
// "customizations": {},
|
|
19
|
+
|
|
20
|
+
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
|
21
|
+
// "remoteUser": "root"
|
|
22
|
+
}
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
AllCops:
|
|
2
|
+
TargetRubyVersion: 3.0
|
|
3
|
+
|
|
4
|
+
Style/StringLiterals:
|
|
5
|
+
EnforcedStyle: double_quotes
|
|
6
|
+
|
|
7
|
+
Style/StringLiteralsInInterpolation:
|
|
8
|
+
EnforcedStyle: double_quotes
|
|
9
|
+
|
|
10
|
+
Metrics/BlockLength:
|
|
11
|
+
Exclude:
|
|
12
|
+
- 'spec/**/*'
|
|
13
|
+
|
|
14
|
+
Style/Documentation:
|
|
15
|
+
Exclude:
|
|
16
|
+
- '**/*'
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Kohei Tsuyuki
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# ClassicBandit
|
|
2
|
+
|
|
3
|
+
[](https://github.com/t-chov/classic_bandit/actions/workflows/ci.yml)
|
|
4
|
+
|
|
5
|
+
A Ruby library for classic (non-contextual) multi-armed bandit algorithms including Thompson Sampling, UCB1, and Epsilon-Greedy.
|
|
6
|
+
|
|
7
|
+
## Requirements
|
|
8
|
+
|
|
9
|
+
- Ruby >= 3.0.0
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
Add this line to your application's Gemfile:
|
|
14
|
+
|
|
15
|
+
```ruby
|
|
16
|
+
gem 'classic_bandit'
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
And then execute:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
$ bundle install
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Or install it yourself as:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
$ gem install classic_bandit
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
|
|
33
|
+
### A/B Testing Example
|
|
34
|
+
|
|
35
|
+
```ruby
|
|
36
|
+
require 'classic_bandit'
|
|
37
|
+
|
|
38
|
+
# Initialize banners for A/B testing
|
|
39
|
+
arms = [
|
|
40
|
+
ClassicBandit::Arm.new(id: 'banner_a', name: 'Spring Campaign'),
|
|
41
|
+
ClassicBandit::Arm.new(id: 'banner_b', name: 'Summer Campaign')
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
# Choose algorithm: Epsilon-Greedy with 10% exploration
|
|
45
|
+
bandit = ClassicBandit::EpsilonGreedy.new(arms: arms, epsilon: 0.1)
|
|
46
|
+
|
|
47
|
+
# In your application
|
|
48
|
+
selected_arm = bandit.select_arm
|
|
49
|
+
# Display the selected banner to user
|
|
50
|
+
show_banner(selected_arm.id)
|
|
51
|
+
|
|
52
|
+
# Update with user's response
|
|
53
|
+
# 1 for click, 0 for no click
|
|
54
|
+
bandit.update(selected_arm, 1)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Available Algorithms
|
|
58
|
+
|
|
59
|
+
### Epsilon-Greedy
|
|
60
|
+
|
|
61
|
+
Balances exploration and exploitation with a fixed exploration rate.
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
bandit = ClassicBandit::EpsilonGreedy.new(arms: arms, epsilon: 0.1)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
- Simple
|
|
68
|
+
- Explicitly controls exploration with ε parameter
|
|
69
|
+
- Explores randomly with probability ε, exploits best arm with probability 1-ε
|
|
70
|
+
|
|
71
|
+
### UCB1
|
|
72
|
+
|
|
73
|
+
Upper Confidence Bound algorithm that automatically balances exploration and exploitation.
|
|
74
|
+
|
|
75
|
+
```ruby
|
|
76
|
+
bandit = ClassicBandit::Ucb1.new(arms: arms)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
- No explicit exploration parameter needed
|
|
80
|
+
- Automatically balances exploration and exploitation
|
|
81
|
+
- Uses confidence bounds to select arms
|
|
82
|
+
- Always tries untested arms first
|
|
83
|
+
|
|
84
|
+
### Softmax
|
|
85
|
+
|
|
86
|
+
Temperature-based algorithm that selects arms according to their relative rewards.
|
|
87
|
+
|
|
88
|
+
```ruby
|
|
89
|
+
bandit = ClassicBandit::Softmax.new(
|
|
90
|
+
arms: arms,
|
|
91
|
+
initial_temperature: 1.0,
|
|
92
|
+
k: 0.5
|
|
93
|
+
)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
- Uses Boltzmann distribution for arm selection
|
|
97
|
+
- Higher temperature leads to more exploration
|
|
98
|
+
- Temperature decreases over time for better exploitation
|
|
99
|
+
- Smooth probability distribution over arms
|
|
100
|
+
|
|
101
|
+
### Thompson Sampling
|
|
102
|
+
|
|
103
|
+
Bayesian approach that maintains a probability distribution over each arm's rewards.
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
bandit = ClassicBandit::ThompsonSampling.new(arms: arms)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
- Naturally balances exploration and exploitation
|
|
110
|
+
- Uses Beta distribution to model uncertainty
|
|
111
|
+
- Performs well in practice with no tuning required
|
|
112
|
+
- Adapts quickly to reward patterns
|
|
113
|
+
|
|
114
|
+
### Common Interface
|
|
115
|
+
All algorithms share the same interface:
|
|
116
|
+
|
|
117
|
+
```ruby
|
|
118
|
+
# Select an arm
|
|
119
|
+
arm = bandit.select_arm
|
|
120
|
+
|
|
121
|
+
# Update the arm with reward
|
|
122
|
+
bandit.update(arm, 1) # Success
|
|
123
|
+
bandit.update(arm, 0) # Failure
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Development
|
|
127
|
+
|
|
128
|
+
After checking out the repo, run:
|
|
129
|
+
```bash
|
|
130
|
+
$ bundle install
|
|
131
|
+
$ bundle exec rspec
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
To release a new version:
|
|
135
|
+
|
|
136
|
+
1. Update the version number in version.rb
|
|
137
|
+
2. Create a git tag for the version
|
|
138
|
+
3. Push git commits and tags
|
|
139
|
+
|
|
140
|
+
### License
|
|
141
|
+
|
|
142
|
+
The gem is available as open source under the terms of the MIT License.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClassicBandit
|
|
4
|
+
# Represents an arm in a multi-armed bandit problem.
|
|
5
|
+
# Each arm maintains its own trial counts and success counts,
|
|
6
|
+
# which are used by various bandit algorithms to make decisions.
|
|
7
|
+
#
|
|
8
|
+
# @example Create a new arm
|
|
9
|
+
# arm = ClassicBandit::Arm.new(id: 1, name: "banner_a")
|
|
10
|
+
# arm.trials #=> 0
|
|
11
|
+
# arm.successes #=> 0
|
|
12
|
+
class Arm
|
|
13
|
+
attr_reader :id, :name
|
|
14
|
+
attr_accessor :trials, :successes
|
|
15
|
+
|
|
16
|
+
def initialize(id:, name: nil, trials: 0, successes: 0)
|
|
17
|
+
@id = id
|
|
18
|
+
@name = name || id.to_s
|
|
19
|
+
@trials = trials
|
|
20
|
+
@successes = successes
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Calculate mean reward (success rate) for this arm
|
|
24
|
+
# @return [Float] Mean reward (0.0 if no trials)
|
|
25
|
+
def mean_reward
|
|
26
|
+
return 0.0 if @trials.zero?
|
|
27
|
+
|
|
28
|
+
@successes.to_f / @trials
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClassicBandit
|
|
4
|
+
# Provides common update functionality for bandit algorithms
|
|
5
|
+
# to update arm statistics with observed rewards.
|
|
6
|
+
#
|
|
7
|
+
# @example Update an arm with a reward
|
|
8
|
+
# class MyBandit
|
|
9
|
+
# include ArmUpdatable
|
|
10
|
+
# end
|
|
11
|
+
#
|
|
12
|
+
# bandit = MyBandit.new
|
|
13
|
+
# bandit.update(selected_arm, reward: 1)
|
|
14
|
+
module ArmUpdatable
|
|
15
|
+
# Update the selected arm with the observed reward
|
|
16
|
+
# @param arm [Arm] The arm that was selected
|
|
17
|
+
# @param reward [Integer] The observed reward (0 or 1)
|
|
18
|
+
def update(arm, reward)
|
|
19
|
+
validate_reward!(reward)
|
|
20
|
+
|
|
21
|
+
arm.trials += 1
|
|
22
|
+
arm.successes += reward
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def validate_reward!(reward)
|
|
28
|
+
return if [0, 1].include?(reward)
|
|
29
|
+
|
|
30
|
+
raise ArgumentError, "reward must be 0 or 1"
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClassicBandit
|
|
4
|
+
# Implements the Epsilon-Greedy algorithm for multi-armed bandit problems.
|
|
5
|
+
# This algorithm makes a random choice with probability epsilon (exploration)
|
|
6
|
+
# and chooses the arm with the highest mean reward with probability 1-epsilon (exploitation).
|
|
7
|
+
#
|
|
8
|
+
# @example Create and use epsilon-greedy bandit
|
|
9
|
+
# arms = [
|
|
10
|
+
# ClassicBandit::Arm.new(id: 1, name: "banner_a", trials: 100, successes: 10),
|
|
11
|
+
# ClassicBandit::Arm.new(id: 2, name: "banner_b", trials: 150, successes: 14)
|
|
12
|
+
# ]
|
|
13
|
+
# bandit = ClassicBandit::EpsilonGreedy.new(arms: arms, epsilon: 0.1)
|
|
14
|
+
# selected_arm = bandit.select_arm
|
|
15
|
+
# bandit.update(selected_arm, reward: 1)
|
|
16
|
+
class EpsilonGreedy
|
|
17
|
+
include ArmUpdatable
|
|
18
|
+
|
|
19
|
+
attr_reader :arms, :epsilon
|
|
20
|
+
|
|
21
|
+
def initialize(arms:, epsilon: 0.1)
|
|
22
|
+
@arms = arms
|
|
23
|
+
@epsilon = epsilon
|
|
24
|
+
|
|
25
|
+
validate_epsilon!
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def select_arm
|
|
29
|
+
# If no arms have been tried, do random selection
|
|
30
|
+
return @arms.sample if @arms.all? { |arm| arm.trials.zero? }
|
|
31
|
+
|
|
32
|
+
if rand < @epsilon
|
|
33
|
+
# Exploration: random selection
|
|
34
|
+
@arms.sample
|
|
35
|
+
else
|
|
36
|
+
# Exploitation: select arm with highest mean reward
|
|
37
|
+
@arms.max_by(&:mean_reward)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def validate_epsilon!
|
|
44
|
+
return if (0..1).cover?(@epsilon)
|
|
45
|
+
|
|
46
|
+
raise ArgumentError, "epsilon must be between 0 and 1"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClassicBandit
|
|
4
|
+
# Implements the Softmax algorithm for multi-armed bandit problems.
|
|
5
|
+
# This algorithm selects arms based on Boltzmann distribution,
|
|
6
|
+
# with temperature parameter controlling exploration-exploitation balance.
|
|
7
|
+
#
|
|
8
|
+
# @example Create and use Softmax bandit
|
|
9
|
+
# arms = [
|
|
10
|
+
# ClassicBandit::Arm.new(id: 1, name: "banner_a"),
|
|
11
|
+
# ClassicBandit::Arm.new(id: 2, name: "banner_b")
|
|
12
|
+
# ]
|
|
13
|
+
# bandit = ClassicBandit::Softmax.new(
|
|
14
|
+
# arms: arms,
|
|
15
|
+
# initial_temperature: 1.0,
|
|
16
|
+
# k: 0.5
|
|
17
|
+
# )
|
|
18
|
+
class Softmax
|
|
19
|
+
include ArmUpdatable
|
|
20
|
+
|
|
21
|
+
attr_reader :arms
|
|
22
|
+
|
|
23
|
+
def initialize(arms:, initial_temperature:, k:) # rubocop:disable Naming/MethodParameterName
|
|
24
|
+
@arms = arms
|
|
25
|
+
@initial_temperature = initial_temperature
|
|
26
|
+
@k = k
|
|
27
|
+
|
|
28
|
+
validate_parameters!
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def select_arm
|
|
32
|
+
return @arms.sample if @arms.all? { |arm| arm.trials.zero? }
|
|
33
|
+
|
|
34
|
+
probabilities = @arms.map { |arm| softmax_score(arm, temperature) }
|
|
35
|
+
cumulative_prob = 0
|
|
36
|
+
random_value = rand
|
|
37
|
+
|
|
38
|
+
@arms.each_with_index do |arm, i|
|
|
39
|
+
cumulative_prob += probabilities[i]
|
|
40
|
+
return arm if random_value <= cumulative_prob
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
@arms.last
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def softmax_score(arm, temperature)
|
|
49
|
+
exp_values = @arms.map { |a| Math.exp(a.mean_reward / temperature) }
|
|
50
|
+
Math.exp(arm.mean_reward / temperature) / exp_values.sum
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def temperature
|
|
54
|
+
total_trials = @arms.sum(&:trials)
|
|
55
|
+
@initial_temperature / Math.log(@k * total_trials + 2)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def validate_parameters!
|
|
59
|
+
raise ArgumentError, "initial_temperature must be positive" unless @initial_temperature.positive?
|
|
60
|
+
raise ArgumentError, "k must be positive" unless @k.positive?
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClassicBandit
|
|
4
|
+
# Implements Thompson Sampling algorithm for multi-armed bandit problems.
|
|
5
|
+
# Uses Beta-Bernoulli conjugate model, sampling from Beta distribution
|
|
6
|
+
# using Gamma random variables.
|
|
7
|
+
#
|
|
8
|
+
# @example Create and use Thompson Sampling
|
|
9
|
+
# arms = [
|
|
10
|
+
# ClassicBandit::Arm.new(id: 1, name: "banner_a"),
|
|
11
|
+
# ClassicBandit::Arm.new(id: 2, name: "banner_b")
|
|
12
|
+
# ]
|
|
13
|
+
# bandit = ClassicBandit::ThompsonSampling.new(arms: arms)
|
|
14
|
+
class ThompsonSampling
|
|
15
|
+
include ArmUpdatable
|
|
16
|
+
|
|
17
|
+
attr_reader :arms
|
|
18
|
+
|
|
19
|
+
def initialize(arms:)
|
|
20
|
+
@arms = arms
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def select_arm
|
|
24
|
+
return @arms.sample if @arms.all? { |arm| arm.trials.zero? }
|
|
25
|
+
|
|
26
|
+
@arms.max_by { |arm| ts_score(arm) }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def ts_score(arm)
|
|
32
|
+
return 0.0 if arm.trials.zero?
|
|
33
|
+
return 1.0 if arm.successes == arm.trials
|
|
34
|
+
|
|
35
|
+
x = gamma_random(arm.successes + 1)
|
|
36
|
+
y = gamma_random(arm.trials - arm.successes + 1)
|
|
37
|
+
x / (x + y)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def gamma_random(alpha) # rubocop:disable Metrics/AbcSize
|
|
41
|
+
return gamma_random(alpha + 1) * rand**(1.0 / alpha) if alpha < 1
|
|
42
|
+
|
|
43
|
+
# Marsaglia-Tsang method
|
|
44
|
+
d = alpha - 1.0 / 3
|
|
45
|
+
c = 1.0 / Math.sqrt(9 * d)
|
|
46
|
+
|
|
47
|
+
loop do
|
|
48
|
+
z = normal_random
|
|
49
|
+
v = (1 + c * z)**3
|
|
50
|
+
u = rand
|
|
51
|
+
|
|
52
|
+
return d * v if z > -1.0 / c && Math.log(u) < 0.5 * z * z + d * (1 - v + Math.log(v))
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def normal_random
|
|
57
|
+
r = Math.sqrt(-2 * Math.log(rand))
|
|
58
|
+
theta = 2 * Math::PI * rand
|
|
59
|
+
r * Math.cos(theta)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ClassicBandit
|
|
4
|
+
# Implements the UCB1 (Upper Confidence Bound) algorithm for multi-armed bandit problems.
|
|
5
|
+
# This algorithm selects arms based on their mean rewards plus a confidence term,
|
|
6
|
+
# balancing exploration and exploitation without requiring an explicit epsilon parameter.
|
|
7
|
+
#
|
|
8
|
+
# @example Create and use UCB1 bandit
|
|
9
|
+
# arms = [
|
|
10
|
+
# ClassicBandit::Arm.new(id: 1, name: "banner_a"),
|
|
11
|
+
# ClassicBandit::Arm.new(id: 2, name: "banner_b")
|
|
12
|
+
# ]
|
|
13
|
+
# bandit = ClassicBandit::Ucb1.new(arms: arms)
|
|
14
|
+
# selected_arm = bandit.select_arm
|
|
15
|
+
# bandit.update(selected_arm, reward: 1)
|
|
16
|
+
class Ucb1
|
|
17
|
+
include ArmUpdatable
|
|
18
|
+
|
|
19
|
+
# @return [Array<Arm>] Available arms for selection
|
|
20
|
+
attr_reader :arms
|
|
21
|
+
|
|
22
|
+
# Initialize a new UCB1 bandit
|
|
23
|
+
# @param arms [Array<Arm>] List of arms to choose from
|
|
24
|
+
def initialize(arms:)
|
|
25
|
+
@arms = arms
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Select an arm using the UCB1 algorithm.
|
|
29
|
+
# Initially tries each arm once, then uses UCB1 formula for selection.
|
|
30
|
+
# @return [Arm] Selected arm
|
|
31
|
+
def select_arm
|
|
32
|
+
# use untried arm if exists.
|
|
33
|
+
untried_arm = @arms.find { |arm| arm.trials.zero? }
|
|
34
|
+
return untried_arm if untried_arm
|
|
35
|
+
|
|
36
|
+
total_trials = @arms.sum(&:trials)
|
|
37
|
+
@arms.max_by { |arm| ucb_score(arm, total_trials) }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def ucb_score(arm, total_trials)
|
|
43
|
+
return Float::INFINITY if arm.trials.zero?
|
|
44
|
+
|
|
45
|
+
exploration_term = Math.sqrt(2 * Math.log(total_trials) / arm.trials)
|
|
46
|
+
|
|
47
|
+
arm.mean_reward + exploration_term
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "zeitwerk"
|
|
4
|
+
require_relative "classic_bandit/version"
|
|
5
|
+
|
|
6
|
+
module ClassicBandit
|
|
7
|
+
class Error < StandardError; end
|
|
8
|
+
# Your code goes here...
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
loader = Zeitwerk::Loader.for_gem
|
|
12
|
+
loader.setup
|
|
13
|
+
loader.eager_load
|
metadata
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: classic_bandit
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Kohei Tsuyuki
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2024-12-27 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: Implementation of classic multi-armed bandit algorithms in Ruby. Supports
|
|
14
|
+
Thompson Sampling, UCB1, and Epsilon-Greedy strategies with a simple, consistent
|
|
15
|
+
API for A/B testing and optimization tasks.
|
|
16
|
+
email:
|
|
17
|
+
- kotsuyuki@gmail.com
|
|
18
|
+
executables: []
|
|
19
|
+
extensions: []
|
|
20
|
+
extra_rdoc_files: []
|
|
21
|
+
files:
|
|
22
|
+
- ".devcontainer/devcontainer.json"
|
|
23
|
+
- ".rspec"
|
|
24
|
+
- ".rubocop.yml"
|
|
25
|
+
- CHANGELOG.md
|
|
26
|
+
- LICENSE.txt
|
|
27
|
+
- README.md
|
|
28
|
+
- Rakefile
|
|
29
|
+
- lib/classic_bandit.rb
|
|
30
|
+
- lib/classic_bandit/arm.rb
|
|
31
|
+
- lib/classic_bandit/arm_updatable.rb
|
|
32
|
+
- lib/classic_bandit/epsilon_greedy.rb
|
|
33
|
+
- lib/classic_bandit/softmax.rb
|
|
34
|
+
- lib/classic_bandit/thompson_sampling.rb
|
|
35
|
+
- lib/classic_bandit/ucb1.rb
|
|
36
|
+
- lib/classic_bandit/version.rb
|
|
37
|
+
- sig/classic_bandit.rbs
|
|
38
|
+
homepage: https://github.com/t-chov/classic_bandit
|
|
39
|
+
licenses:
|
|
40
|
+
- MIT
|
|
41
|
+
metadata:
|
|
42
|
+
homepage_uri: https://github.com/t-chov/classic_bandit
|
|
43
|
+
source_code_uri: https://github.com/t-chov/classic_bandit
|
|
44
|
+
changelog_uri: https://github.com/t-chov/classic_bandit/blob/main/CHANGELOG.md
|
|
45
|
+
post_install_message:
|
|
46
|
+
rdoc_options: []
|
|
47
|
+
require_paths:
|
|
48
|
+
- lib
|
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: 3.0.0
|
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
55
|
+
requirements:
|
|
56
|
+
- - ">="
|
|
57
|
+
- !ruby/object:Gem::Version
|
|
58
|
+
version: '0'
|
|
59
|
+
requirements: []
|
|
60
|
+
rubygems_version: 3.2.33
|
|
61
|
+
signing_key:
|
|
62
|
+
specification_version: 4
|
|
63
|
+
summary: A Ruby library for classic (non-contextual) multi-armed bandit algorithms
|
|
64
|
+
test_files: []
|