classic_bandit 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -1
- data/CLAUDE.md +45 -0
- data/example/beta_random.rb +12 -6
- data/lib/classic_bandit/thompson_sampling.rb +31 -11
- data/lib/classic_bandit/version.rb +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9e7b77e4f21e909a3515df0c5785b2f321ce3cf2977d825f0190adcd9f15a82d
|
|
4
|
+
data.tar.gz: daeb7c6386d836256e0a32c65fc72e68ad02ad2283b4a6e89b8a4eb9eebf35ab
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7ca36345f798cc2e869cba8e0c3e41b33892771227fcd12c85c2d9973d6fb697a79b22366f6f59a7bb041772e1d4688147162b78f0d2530ec9d4dd9da5e45981
|
|
7
|
+
data.tar.gz: 1bea576eeb3b00795b40206279376fd9b1da678cfd9ce5157829d1be8983fcb7fa7f488d91733715d263e772de250eb1a79145790152e99cb771515453db3fbe
|
data/.rubocop.yml
CHANGED
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Development Commands
|
|
6
|
+
|
|
7
|
+
- **Run tests**: `bundle exec rspec`
|
|
8
|
+
- **Lint code**: `bundle exec rubocop`
|
|
9
|
+
- **Run all checks**: `bundle exec rake` (runs both tests and linting)
|
|
10
|
+
- **Install dependencies**: `bundle install`
|
|
11
|
+
|
|
12
|
+
The project uses RuboCop for linting with explicit config file reference in Rakefile:15.
|
|
13
|
+
|
|
14
|
+
## Architecture Overview
|
|
15
|
+
|
|
16
|
+
ClassicBandit is a Ruby gem implementing multi-armed bandit algorithms for A/B testing and optimization. The library uses Zeitwerk for autoloading and follows a modular design pattern.
|
|
17
|
+
|
|
18
|
+
### Core Components
|
|
19
|
+
|
|
20
|
+
**Arm (`lib/classic_bandit/arm.rb`)**: Represents a bandit arm with trial/success tracking and mean reward calculation.
|
|
21
|
+
|
|
22
|
+
**ArmUpdatable (`lib/classic_bandit/arm_updatable.rb`)**: Shared module providing `update(arm, reward)` method for all bandit algorithms. Validates rewards are 0 or 1.
|
|
23
|
+
|
|
24
|
+
**Algorithm Implementations**:
|
|
25
|
+
- **EpsilonGreedy**: Simple ε-greedy with exploration/exploitation balance
|
|
26
|
+
- **UCB1**: Upper Confidence Bound without explicit parameters
|
|
27
|
+
- **Softmax**: Temperature-based Boltzmann distribution selection
|
|
28
|
+
- **ThompsonSampling**: Bayesian approach with Beta-Bernoulli model using custom Gamma random number generation
|
|
29
|
+
|
|
30
|
+
### Key Design Patterns
|
|
31
|
+
|
|
32
|
+
All algorithms implement the same interface:
|
|
33
|
+
- `select_arm()` → returns an Arm instance
|
|
34
|
+
- `update(arm, reward)` → updates arm statistics (from ArmUpdatable)
|
|
35
|
+
- Handle untested arms by random selection
|
|
36
|
+
|
|
37
|
+
The Thompson Sampling implementation includes custom statistical functions (`gamma_random`, `normal_random`) using Marsaglia-Tsang method for Gamma distribution sampling.
|
|
38
|
+
|
|
39
|
+
### Testing Structure
|
|
40
|
+
|
|
41
|
+
Tests are organized in `spec/` with individual algorithm specs and a main gem spec. Uses standard RSpec testing framework.
|
|
42
|
+
|
|
43
|
+
### Example Usage
|
|
44
|
+
|
|
45
|
+
The `example/` directory contains simulation scripts demonstrating algorithm comparison with Gnuplot visualization, showing realistic usage patterns with pre-populated arm statistics.
|
data/example/beta_random.rb
CHANGED
|
@@ -2,19 +2,25 @@
|
|
|
2
2
|
|
|
3
3
|
require "gnuplot"
|
|
4
4
|
|
|
5
|
-
def gamma_random(alpha)
|
|
5
|
+
def gamma_random(alpha) # rubocop:disable Metrics/AbcSize
|
|
6
6
|
return gamma_random(alpha + 1) * rand**(1.0 / alpha) if alpha < 1
|
|
7
7
|
|
|
8
|
-
# Marsaglia-Tsang method
|
|
9
8
|
d = alpha - 1.0 / 3
|
|
10
9
|
c = 1.0 / Math.sqrt(9 * d)
|
|
11
10
|
|
|
12
11
|
loop do
|
|
13
|
-
|
|
14
|
-
v = (1 + c *
|
|
12
|
+
x = normal_random
|
|
13
|
+
v = (1 + c * x)**3
|
|
14
|
+
|
|
15
|
+
next if v <= 0
|
|
16
|
+
|
|
15
17
|
u = rand
|
|
16
|
-
|
|
17
|
-
|
|
18
|
+
|
|
19
|
+
# Squeeze test
|
|
20
|
+
return d * v if u < 1 - 0.0331 * x**4
|
|
21
|
+
|
|
22
|
+
# Full test
|
|
23
|
+
return d * v if Math.log(u) < 0.5 * x * x + d - d * v + d * Math.log(v)
|
|
18
24
|
end
|
|
19
25
|
end
|
|
20
26
|
|
|
@@ -14,26 +14,39 @@ module ClassicBandit
|
|
|
14
14
|
class ThompsonSampling
|
|
15
15
|
include ArmUpdatable
|
|
16
16
|
|
|
17
|
-
attr_reader :arms
|
|
17
|
+
attr_reader :arms, :alpha_prior, :beta_prior
|
|
18
|
+
|
|
19
|
+
# @param arms [Array<Arm>] Array of arms to choose from
|
|
20
|
+
# @param alpha_prior [Float] Prior parameter for successes (default: 1.0)
|
|
21
|
+
# @param beta_prior [Float] Prior parameter for failures (default: 1.0)
|
|
22
|
+
def initialize(arms:, alpha_prior: 1.0, beta_prior: 1.0)
|
|
23
|
+
raise ArgumentError, "alpha_prior must be positive" unless alpha_prior.positive?
|
|
24
|
+
raise ArgumentError, "beta_prior must be positive" unless beta_prior.positive?
|
|
18
25
|
|
|
19
|
-
def initialize(arms:)
|
|
20
26
|
@arms = arms
|
|
27
|
+
@alpha_prior = alpha_prior
|
|
28
|
+
@beta_prior = beta_prior
|
|
21
29
|
end
|
|
22
30
|
|
|
23
31
|
def select_arm
|
|
24
|
-
return @arms.sample if @arms.all? { |arm| arm.trials.zero? }
|
|
25
|
-
|
|
26
32
|
@arms.max_by { |arm| ts_score(arm) }
|
|
27
33
|
end
|
|
28
34
|
|
|
29
35
|
private
|
|
30
36
|
|
|
31
37
|
def ts_score(arm)
|
|
32
|
-
|
|
33
|
-
|
|
38
|
+
alpha = arm.successes + @alpha_prior
|
|
39
|
+
beta = (arm.trials - arm.successes) + @beta_prior
|
|
40
|
+
|
|
41
|
+
beta_sample(alpha, beta)
|
|
42
|
+
end
|
|
34
43
|
|
|
35
|
-
|
|
36
|
-
|
|
44
|
+
def beta_sample(alpha, beta)
|
|
45
|
+
# Beta(1,1) = Uniform(0,1)
|
|
46
|
+
return rand if alpha == 1.0 && beta == 1.0 # rubocop:disable Lint/FloatComparison
|
|
47
|
+
|
|
48
|
+
x = gamma_random(alpha)
|
|
49
|
+
y = gamma_random(beta)
|
|
37
50
|
x / (x + y)
|
|
38
51
|
end
|
|
39
52
|
|
|
@@ -45,11 +58,18 @@ module ClassicBandit
|
|
|
45
58
|
c = 1.0 / Math.sqrt(9 * d)
|
|
46
59
|
|
|
47
60
|
loop do
|
|
48
|
-
|
|
49
|
-
v = (1 + c *
|
|
61
|
+
x = normal_random
|
|
62
|
+
v = (1 + c * x)**3
|
|
63
|
+
|
|
64
|
+
next if v <= 0
|
|
65
|
+
|
|
50
66
|
u = rand
|
|
51
67
|
|
|
52
|
-
|
|
68
|
+
# Squeeze test
|
|
69
|
+
return d * v if u < 1 - 0.0331 * x**4
|
|
70
|
+
|
|
71
|
+
# Full test
|
|
72
|
+
return d * v if Math.log(u) < 0.5 * x * x + d - d * v + d * Math.log(v)
|
|
53
73
|
end
|
|
54
74
|
end
|
|
55
75
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: classic_bandit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kohei Tsuyuki
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2025-11-09 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Implementation of classic multi-armed bandit algorithms in Ruby. Supports
|
|
14
14
|
Thompson Sampling, UCB1, and Epsilon-Greedy strategies with a simple, consistent
|
|
@@ -23,6 +23,7 @@ files:
|
|
|
23
23
|
- ".rspec"
|
|
24
24
|
- ".rubocop.yml"
|
|
25
25
|
- CHANGELOG.md
|
|
26
|
+
- CLAUDE.md
|
|
26
27
|
- LICENSE.txt
|
|
27
28
|
- README.md
|
|
28
29
|
- Rakefile
|
|
@@ -46,7 +47,7 @@ metadata:
|
|
|
46
47
|
homepage_uri: https://github.com/t-chov/classic_bandit
|
|
47
48
|
source_code_uri: https://github.com/t-chov/classic_bandit
|
|
48
49
|
changelog_uri: https://github.com/t-chov/classic_bandit/blob/main/CHANGELOG.md
|
|
49
|
-
post_install_message:
|
|
50
|
+
post_install_message:
|
|
50
51
|
rdoc_options: []
|
|
51
52
|
require_paths:
|
|
52
53
|
- lib
|
|
@@ -62,7 +63,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
62
63
|
version: '0'
|
|
63
64
|
requirements: []
|
|
64
65
|
rubygems_version: 3.2.33
|
|
65
|
-
signing_key:
|
|
66
|
+
signing_key:
|
|
66
67
|
specification_version: 4
|
|
67
68
|
summary: A Ruby library for classic (non-contextual) multi-armed bandit algorithms
|
|
68
69
|
test_files: []
|