RubyGems - classic_bandit - Versions diffs - 0.1.0 → 0.1.2 - Mend

classic_bandit 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/.rubocop.yml +8 -1
data/CLAUDE.md +45 -0
data/Rakefile +3 -1
data/example/Gemfile +12 -0
data/example/Gemfile.lock +20 -0
data/example/beta_random.rb +87 -0
data/example/simulation.rb +73 -0
data/lib/classic_bandit/thompson_sampling.rb +31 -11
data/lib/classic_bandit/version.rb +1 -1
metadata +10 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9ce78adca73218db90a909daec3ccbb457417620d78ae2566c0a32ac7024a313
-  data.tar.gz: 9b6debd514866747242bd00de9db4d97d1fd95e5a03166e52c96b812daa3d7f6
+  metadata.gz: 9e7b77e4f21e909a3515df0c5785b2f321ce3cf2977d825f0190adcd9f15a82d
+  data.tar.gz: daeb7c6386d836256e0a32c65fc72e68ad02ad2283b4a6e89b8a4eb9eebf35ab
 SHA512:
-  metadata.gz: 80cc50641016e81853f766645d1fe9baf3c7bf4b09c8bc9303ce083dac1bec574d78432dd174953785e3dd37032e34c2248d22f47acd1c08c02078f02ad6f8a7
-  data.tar.gz: e5b534ad79cc2a91a95b617c315335cb8c399c7241308750d868b4eb13e60016382765ff1e83447495676414e1213a6b7212d31e9b18f2859063d5f491f80b25
+  metadata.gz: 7ca36345f798cc2e869cba8e0c3e41b33892771227fcd12c85c2d9973d6fb697a79b22366f6f59a7bb041772e1d4688147162b78f0d2530ec9d4dd9da5e45981
+  data.tar.gz: 1bea576eeb3b00795b40206279376fd9b1da678cfd9ce5157829d1be8983fcb7fa7f488d91733715d263e772de250eb1a79145790152e99cb771515453db3fbe

data/.rubocop.yml CHANGED Viewed

@@ -1,5 +1,9 @@
 AllCops:
   TargetRubyVersion: 3.0
+  NewCops: disable
+  Include:
+    - 'lib/**/*'
+    - 'spec/**/*'
 Style/StringLiterals:
   EnforcedStyle: double_quotes
@@ -13,4 +17,7 @@ Metrics/BlockLength:
 Style/Documentation:
   Exclude:
-    - '**/*'
+    - '**/*'
+Metrics/MethodLength:
+  Max: 30

data/CLAUDE.md ADDED Viewed

@@ -0,0 +1,45 @@
+# CLAUDE.md
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+## Development Commands
+- **Run tests**: `bundle exec rspec`
+- **Lint code**: `bundle exec rubocop`
+- **Run all checks**: `bundle exec rake` (runs both tests and linting)
+- **Install dependencies**: `bundle install`
+The project uses RuboCop for linting with explicit config file reference in Rakefile:15.
+## Architecture Overview
+ClassicBandit is a Ruby gem implementing multi-armed bandit algorithms for A/B testing and optimization. The library uses Zeitwerk for autoloading and follows a modular design pattern.
+### Core Components
+**Arm (`lib/classic_bandit/arm.rb`)**: Represents a bandit arm with trial/success tracking and mean reward calculation.
+**ArmUpdatable (`lib/classic_bandit/arm_updatable.rb`)**: Shared module providing `update(arm, reward)` method for all bandit algorithms. Validates rewards are 0 or 1.
+**Algorithm Implementations**:
+- **EpsilonGreedy**: Simple ε-greedy with exploration/exploitation balance
+- **UCB1**: Upper Confidence Bound without explicit parameters
+- **Softmax**: Temperature-based Boltzmann distribution selection
+- **ThompsonSampling**: Bayesian approach with Beta-Bernoulli model using custom Gamma random number generation
+### Key Design Patterns
+All algorithms implement the same interface:
+- `select_arm()` → returns an Arm instance
+- `update(arm, reward)` → updates arm statistics (from ArmUpdatable)
+- Handle untested arms by random selection
+The Thompson Sampling implementation includes custom statistical functions (`gamma_random`, `normal_random`) using Marsaglia-Tsang method for Gamma distribution sampling.
+### Testing Structure
+Tests are organized in `spec/` with individual algorithm specs and a main gem spec. Uses standard RSpec testing framework.
+### Example Usage
+The `example/` directory contains simulation scripts demonstrating algorithm comparison with Gnuplot visualization, showing realistic usage patterns with pre-populated arm statistics.

data/Rakefile CHANGED Viewed

@@ -7,6 +7,8 @@ RSpec::Core::RakeTask.new(:spec)
 require "rubocop/rake_task"
-RuboCop::RakeTask.new
+RuboCop::RakeTask.new do |task|
+  task.options = ["--config", ".rubocop.yml"] # 明示的に設定ファイルを指定
+end
 task default: %i[spec rubocop]

data/example/Gemfile ADDED Viewed

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+source "https://rubygems.org"
+# gem "rails"
+gem "gnuplot", "~> 2.6"
+gem "matrix", "~> 0.4.2"
+gem "zeitwerk", "~> 2.7"
+gem "classic_bandit", "~> 0.1.0"

data/example/Gemfile.lock ADDED Viewed

@@ -0,0 +1,20 @@
+GEM
+  remote: https://rubygems.org/
+  specs:
+    classic_bandit (0.1.0)
+    gnuplot (2.6.2)
+    matrix (0.4.2)
+    zeitwerk (2.7.1)
+PLATFORMS
+  ruby
+  x86_64-linux
+DEPENDENCIES
+  classic_bandit (~> 0.1.0)
+  gnuplot (~> 2.6)
+  matrix (~> 0.4.2)
+  zeitwerk (~> 2.7)
+BUNDLED WITH
+   2.6.2

data/example/beta_random.rb ADDED Viewed

@@ -0,0 +1,87 @@
+# frozen_string_literal: true
+require "gnuplot"
+def gamma_random(alpha) # rubocop:disable Metrics/AbcSize
+  return gamma_random(alpha + 1) * rand**(1.0 / alpha) if alpha < 1
+  d = alpha - 1.0 / 3
+  c = 1.0 / Math.sqrt(9 * d)
+  loop do
+    x = normal_random
+    v = (1 + c * x)**3
+    next if v <= 0
+    u = rand
+    # Squeeze test
+    return d * v if u < 1 - 0.0331 * x**4
+    # Full test
+    return d * v if Math.log(u) < 0.5 * x * x + d - d * v + d * Math.log(v)
+  end
+end
+def normal_random
+  r = Math.sqrt(-2 * Math.log(rand))
+  theta = 2 * Math::PI * rand
+  r * Math.cos(theta)
+end
+def beta_function(alpha, beta)
+  gamma_alpha = Math.lgamma(alpha)[0]
+  gamma_beta = Math.lgamma(beta)[0]
+  gamma_apb = Math.lgamma(alpha + beta)[0]
+  Math.exp(gamma_alpha + gamma_beta - gamma_apb)
+end
+def beta_pdf(x, alpha, beta)
+  return 0 if x <= 0 || x >= 1
+  x**(alpha - 1) * (1 - x)**(beta - 1) / beta_function(alpha, beta)
+end
+data = Array.new(10_000) do
+  x1 = gamma_random(41)
+  x2 = gamma_random(61)
+  x1 / (x1 + x2)
+end
+Gnuplot.open do |gp|
+  Gnuplot::Plot.new(gp) do |plot|
+    plot.title  "Beta distribution histogram"
+    plot.xlabel "Value"
+    plot.ylabel "Frequency"
+    min_val = 0.0
+    max_val = 1.0
+    bin_count = 60.0
+    bin_width = (max_val - min_val) / bin_count
+    plot.xrange "[0:1]"
+    total_count = data.length.to_f
+    plot.set "style data histograms"
+    plot.set "style fill solid 0.5"
+    bins = Hash.new(0)
+    bin_count.to_i.times.each { |i| bins[i * bin_width] = 0 }
+    data.each { |v| bins[(v / bin_width).floor * bin_width] += 1 }
+    bins.transform_values! { |v| v / (total_count * bin_width) }
+    plot.data << Gnuplot::DataSet.new([bins.keys, bins.values]) do |ds|
+      ds.with = "boxes"
+      ds.title = "Empirical"
+    end
+    x_points = (0..100).map { |i| i / 100.0 }
+    y_points = x_points.map { |x| beta_pdf(x, 41, 61) }
+    plot.data << Gnuplot::DataSet.new([x_points, y_points]) do |ds|
+      ds.with = "lines"
+      ds.linewidth = 2
+      ds.title = "Theoretical PDF"
+    end
+  end
+end

data/example/simulation.rb ADDED Viewed

@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+require 'classic_bandit'
+require 'gnuplot'
+bandits = {
+  "UCB1" => ClassicBandit::Ucb1.new(arms: [
+      ClassicBandit::Arm.new(id: 0, trials: 1000, successes: 120),
+      ClassicBandit::Arm.new(id: 1, trials: 1000, successes: 110),
+      ClassicBandit::Arm.new(id: 2, trials: 1000, successes: 100),
+  ]),
+  "Thompson sampling" => ClassicBandit::ThompsonSampling.new(arms: [
+      ClassicBandit::Arm.new(id: 0, trials: 1000, successes: 120),
+      ClassicBandit::Arm.new(id: 1, trials: 1000, successes: 110),
+      ClassicBandit::Arm.new(id: 2, trials: 1000, successes: 100),
+  ])
+}
+arm0_counts = Hash.new(0)
+arm0_probs = {}
+bandits.keys.each { |key| arm0_probs[key] = [] }
+x_values = []
+10000.times.each do |i|
+  bandits.each do |key, bandit|
+    # 最初の500回はランダム
+    if i < 500
+      arm = bandit.arms.sample
+    else
+      arm = bandit.select_arm
+    end
+    reward = rand <= arm.mean_reward ? 1 : 0
+    bandit.update(arm, reward)
+    if arm.id == 0
+      arm0_counts[key] += 1
+    end
+    arm0_prob = arm0_counts[key].to_f / (i + 1)
+    arm0_probs[key] << arm0_prob
+  end
+  x_values << i + 1
+end
+Gnuplot.open do |gp|
+  Gnuplot::Plot.new(gp) do |plot|
+    plot.title  "Bandit Selection Probability"
+    plot.xlabel "Iterations"
+    plot.ylabel "Probability"
+    # y軸の範囲を0-1に設定
+    plot.yrange "[0:1]"
+    # グリッドを表示
+    plot.set "grid"
+    # 線のスタイルを設定
+    plot.set "style line 1 linecolor rgb '#0060ad' linewidth 2"
+    plot.set "style line 2 linecolor rgb '#dd181f' linewidth 2"
+    # 各アルゴリズムのデータをプロット
+    colors = ["#0060ad", "#dd181f"]
+    bandits.each_with_index do |(key, _), index|
+      plot.data << Gnuplot::DataSet.new([x_values, arm0_probs[key]]) do |ds|
+        ds.with = "lines"
+        ds.linewidth = 2
+        ds.linecolor = "rgb '#{colors[index]}'"
+        ds.title = key.to_s
+      end
+    end
+  end
+end

data/lib/classic_bandit/thompson_sampling.rb CHANGED Viewed

@@ -14,26 +14,39 @@ module ClassicBandit
   class ThompsonSampling
     include ArmUpdatable
-    attr_reader :arms
+    attr_reader :arms, :alpha_prior, :beta_prior
+    # @param arms [Array<Arm>] Array of arms to choose from
+    # @param alpha_prior [Float] Prior parameter for successes (default: 1.0)
+    # @param beta_prior [Float] Prior parameter for failures (default: 1.0)
+    def initialize(arms:, alpha_prior: 1.0, beta_prior: 1.0)
+      raise ArgumentError, "alpha_prior must be positive" unless alpha_prior.positive?
+      raise ArgumentError, "beta_prior must be positive" unless beta_prior.positive?
-    def initialize(arms:)
       @arms = arms
+      @alpha_prior = alpha_prior
+      @beta_prior = beta_prior
     end
     def select_arm
-      return @arms.sample if @arms.all? { |arm| arm.trials.zero? }
       @arms.max_by { |arm| ts_score(arm) }
     end
     private
     def ts_score(arm)
-      return 0.0 if arm.trials.zero?
-      return 1.0 if arm.successes == arm.trials
+      alpha = arm.successes + @alpha_prior
+      beta = (arm.trials - arm.successes) + @beta_prior
+      beta_sample(alpha, beta)
+    end
-      x = gamma_random(arm.successes + 1)
-      y = gamma_random(arm.trials - arm.successes + 1)
+    def beta_sample(alpha, beta)
+      # Beta(1,1) = Uniform(0,1)
+      return rand if alpha == 1.0 && beta == 1.0 # rubocop:disable Lint/FloatComparison
+      x = gamma_random(alpha)
+      y = gamma_random(beta)
       x / (x + y)
     end
@@ -45,11 +58,18 @@ module ClassicBandit
       c = 1.0 / Math.sqrt(9 * d)
       loop do
-        z = normal_random
-        v = (1 + c * z)**3
+        x = normal_random
+        v = (1 + c * x)**3
+        next if v <= 0
         u = rand
-        return d * v if z > -1.0 / c && Math.log(u) < 0.5 * z * z + d * (1 - v + Math.log(v))
+        # Squeeze test
+        return d * v if u < 1 - 0.0331 * x**4
+        # Full test
+        return d * v if Math.log(u) < 0.5 * x * x + d - d * v + d * Math.log(v)
       end
     end

data/lib/classic_bandit/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module ClassicBandit
-  VERSION = "0.1.0"
+  VERSION = "0.1.2"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: classic_bandit
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.2
 platform: ruby
 authors:
 - Kohei Tsuyuki
-autorequire:
+autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-12-27 00:00:00.000000000 Z
+date: 2025-11-09 00:00:00.000000000 Z
 dependencies: []
 description: Implementation of classic multi-armed bandit algorithms in Ruby. Supports
   Thompson Sampling, UCB1, and Epsilon-Greedy strategies with a simple, consistent
@@ -23,9 +23,14 @@ files:
 - ".rspec"
 - ".rubocop.yml"
 - CHANGELOG.md
+- CLAUDE.md
 - LICENSE.txt
 - README.md
 - Rakefile
+- example/Gemfile
+- example/Gemfile.lock
+- example/beta_random.rb
+- example/simulation.rb
 - lib/classic_bandit.rb
 - lib/classic_bandit/arm.rb
 - lib/classic_bandit/arm_updatable.rb
@@ -42,7 +47,7 @@ metadata:
   homepage_uri: https://github.com/t-chov/classic_bandit
   source_code_uri: https://github.com/t-chov/classic_bandit
   changelog_uri: https://github.com/t-chov/classic_bandit/blob/main/CHANGELOG.md
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -58,7 +63,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubygems_version: 3.2.33
-signing_key:
+signing_key:
 specification_version: 4
 summary: A Ruby library for classic (non-contextual) multi-armed bandit algorithms
 test_files: []