RubyGems - classic_bandit - Versions diffs - 0.1.1 → 0.1.2 - Mend

classic_bandit 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/.rubocop.yml +5 -1
data/CLAUDE.md +45 -0
data/example/beta_random.rb +12 -6
data/lib/classic_bandit/thompson_sampling.rb +31 -11
data/lib/classic_bandit/version.rb +1 -1
metadata +6 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: adf0483eb642e9d50a1265baeb748665a45209a1bea1ed1e101a134936236faa
-  data.tar.gz: 3f4cff2ed59733a2694af2387f91a0fccfdbe30e26d9879984533815abc7b3fd
+  metadata.gz: 9e7b77e4f21e909a3515df0c5785b2f321ce3cf2977d825f0190adcd9f15a82d
+  data.tar.gz: daeb7c6386d836256e0a32c65fc72e68ad02ad2283b4a6e89b8a4eb9eebf35ab
 SHA512:
-  metadata.gz: f1abb6ea6d2c7aa56648242e318ae6ce02e5047cf13fee0dc01412030716629fef088f44b849bce50a45590f9e284a80230a3a5289b9fa1f4c65267bdaefc838
-  data.tar.gz: 49abf9eba86caefb27b3f4fd067d9aa20865e9bac066baca682a01ee34e3273883cf9113f36ebe6f47376c029377fd69749e3b7bd9ba477468a253ce9df5b422
+  metadata.gz: 7ca36345f798cc2e869cba8e0c3e41b33892771227fcd12c85c2d9973d6fb697a79b22366f6f59a7bb041772e1d4688147162b78f0d2530ec9d4dd9da5e45981
+  data.tar.gz: 1bea576eeb3b00795b40206279376fd9b1da678cfd9ce5157829d1be8983fcb7fa7f488d91733715d263e772de250eb1a79145790152e99cb771515453db3fbe

data/.rubocop.yml CHANGED Viewed

@@ -1,5 +1,6 @@
 AllCops:
   TargetRubyVersion: 3.0
+  NewCops: disable
   Include:
     - 'lib/**/*'
     - 'spec/**/*'
@@ -16,4 +17,7 @@ Metrics/BlockLength:
 Style/Documentation:
   Exclude:
-    - '**/*'
+    - '**/*'
+Metrics/MethodLength:
+  Max: 30

data/CLAUDE.md ADDED Viewed

@@ -0,0 +1,45 @@
+# CLAUDE.md
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+## Development Commands
+- **Run tests**: `bundle exec rspec`
+- **Lint code**: `bundle exec rubocop`
+- **Run all checks**: `bundle exec rake` (runs both tests and linting)
+- **Install dependencies**: `bundle install`
+The project uses RuboCop for linting with explicit config file reference in Rakefile:15.
+## Architecture Overview
+ClassicBandit is a Ruby gem implementing multi-armed bandit algorithms for A/B testing and optimization. The library uses Zeitwerk for autoloading and follows a modular design pattern.
+### Core Components
+**Arm (`lib/classic_bandit/arm.rb`)**: Represents a bandit arm with trial/success tracking and mean reward calculation.
+**ArmUpdatable (`lib/classic_bandit/arm_updatable.rb`)**: Shared module providing `update(arm, reward)` method for all bandit algorithms. Validates rewards are 0 or 1.
+**Algorithm Implementations**:
+- **EpsilonGreedy**: Simple ε-greedy with exploration/exploitation balance
+- **UCB1**: Upper Confidence Bound without explicit parameters
+- **Softmax**: Temperature-based Boltzmann distribution selection
+- **ThompsonSampling**: Bayesian approach with Beta-Bernoulli model using custom Gamma random number generation
+### Key Design Patterns
+All algorithms implement the same interface:
+- `select_arm()` → returns an Arm instance
+- `update(arm, reward)` → updates arm statistics (from ArmUpdatable)
+- Handle untested arms by random selection
+The Thompson Sampling implementation includes custom statistical functions (`gamma_random`, `normal_random`) using Marsaglia-Tsang method for Gamma distribution sampling.
+### Testing Structure
+Tests are organized in `spec/` with individual algorithm specs and a main gem spec. Uses standard RSpec testing framework.
+### Example Usage
+The `example/` directory contains simulation scripts demonstrating algorithm comparison with Gnuplot visualization, showing realistic usage patterns with pre-populated arm statistics.

data/example/beta_random.rb CHANGED Viewed

@@ -2,19 +2,25 @@
 require "gnuplot"
-def gamma_random(alpha)
+def gamma_random(alpha) # rubocop:disable Metrics/AbcSize
   return gamma_random(alpha + 1) * rand**(1.0 / alpha) if alpha < 1
-  # Marsaglia-Tsang method
   d = alpha - 1.0 / 3
   c = 1.0 / Math.sqrt(9 * d)
   loop do
-    z = normal_random
-    v = (1 + c * z)**3
+    x = normal_random
+    v = (1 + c * x)**3
+    next if v <= 0
     u = rand
-    return d * v if z > -1.0 / c && Math.log(u) < 0.5 * z * z + d * (1 - v + Math.log(v))
+    # Squeeze test
+    return d * v if u < 1 - 0.0331 * x**4
+    # Full test
+    return d * v if Math.log(u) < 0.5 * x * x + d - d * v + d * Math.log(v)
   end
 end

data/lib/classic_bandit/thompson_sampling.rb CHANGED Viewed

@@ -14,26 +14,39 @@ module ClassicBandit
   class ThompsonSampling
     include ArmUpdatable
-    attr_reader :arms
+    attr_reader :arms, :alpha_prior, :beta_prior
+    # @param arms [Array<Arm>] Array of arms to choose from
+    # @param alpha_prior [Float] Prior parameter for successes (default: 1.0)
+    # @param beta_prior [Float] Prior parameter for failures (default: 1.0)
+    def initialize(arms:, alpha_prior: 1.0, beta_prior: 1.0)
+      raise ArgumentError, "alpha_prior must be positive" unless alpha_prior.positive?
+      raise ArgumentError, "beta_prior must be positive" unless beta_prior.positive?
-    def initialize(arms:)
       @arms = arms
+      @alpha_prior = alpha_prior
+      @beta_prior = beta_prior
     end
     def select_arm
-      return @arms.sample if @arms.all? { |arm| arm.trials.zero? }
       @arms.max_by { |arm| ts_score(arm) }
     end
     private
     def ts_score(arm)
-      return 0.0 if arm.trials.zero?
-      return 1.0 if arm.successes == arm.trials
+      alpha = arm.successes + @alpha_prior
+      beta = (arm.trials - arm.successes) + @beta_prior
+      beta_sample(alpha, beta)
+    end
-      x = gamma_random(arm.successes + 1)
-      y = gamma_random(arm.trials - arm.successes + 1)
+    def beta_sample(alpha, beta)
+      # Beta(1,1) = Uniform(0,1)
+      return rand if alpha == 1.0 && beta == 1.0 # rubocop:disable Lint/FloatComparison
+      x = gamma_random(alpha)
+      y = gamma_random(beta)
       x / (x + y)
     end
@@ -45,11 +58,18 @@ module ClassicBandit
       c = 1.0 / Math.sqrt(9 * d)
       loop do
-        z = normal_random
-        v = (1 + c * z)**3
+        x = normal_random
+        v = (1 + c * x)**3
+        next if v <= 0
         u = rand
-        return d * v if z > -1.0 / c && Math.log(u) < 0.5 * z * z + d * (1 - v + Math.log(v))
+        # Squeeze test
+        return d * v if u < 1 - 0.0331 * x**4
+        # Full test
+        return d * v if Math.log(u) < 0.5 * x * x + d - d * v + d * Math.log(v)
       end
     end

data/lib/classic_bandit/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module ClassicBandit
-  VERSION = "0.1.1"
+  VERSION = "0.1.2"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: classic_bandit
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - Kohei Tsuyuki
-autorequire:
+autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-12-28 00:00:00.000000000 Z
+date: 2025-11-09 00:00:00.000000000 Z
 dependencies: []
 description: Implementation of classic multi-armed bandit algorithms in Ruby. Supports
   Thompson Sampling, UCB1, and Epsilon-Greedy strategies with a simple, consistent
@@ -23,6 +23,7 @@ files:
 - ".rspec"
 - ".rubocop.yml"
 - CHANGELOG.md
+- CLAUDE.md
 - LICENSE.txt
 - README.md
 - Rakefile
@@ -46,7 +47,7 @@ metadata:
   homepage_uri: https://github.com/t-chov/classic_bandit
   source_code_uri: https://github.com/t-chov/classic_bandit
   changelog_uri: https://github.com/t-chov/classic_bandit/blob/main/CHANGELOG.md
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -62,7 +63,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubygems_version: 3.2.33
-signing_key:
+signing_key:
 specification_version: 4
 summary: A Ruby library for classic (non-contextual) multi-armed bandit algorithms
 test_files: []