banditbungee 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- banditbungee-0.1.0/LICENSE +21 -0
- banditbungee-0.1.0/PKG-INFO +183 -0
- banditbungee-0.1.0/README.md +158 -0
- banditbungee-0.1.0/banditbungee.egg-info/PKG-INFO +183 -0
- banditbungee-0.1.0/banditbungee.egg-info/SOURCES.txt +38 -0
- banditbungee-0.1.0/banditbungee.egg-info/dependency_links.txt +1 -0
- banditbungee-0.1.0/banditbungee.egg-info/requires.txt +6 -0
- banditbungee-0.1.0/banditbungee.egg-info/top_level.txt +1 -0
- banditbungee-0.1.0/bandits/__init__.py +31 -0
- banditbungee-0.1.0/bandits/agents/__init__.py +15 -0
- banditbungee-0.1.0/bandits/agents/base.py +23 -0
- banditbungee-0.1.0/bandits/agents/ducb.py +32 -0
- banditbungee-0.1.0/bandits/agents/local_ducb.py +29 -0
- banditbungee-0.1.0/bandits/agents/sw_ucb.py +31 -0
- banditbungee-0.1.0/bandits/agents/ucb.py +29 -0
- banditbungee-0.1.0/bandits/environments/__init__.py +23 -0
- banditbungee-0.1.0/bandits/environments/base.py +31 -0
- banditbungee-0.1.0/bandits/environments/crossing_cosine_2arm.py +46 -0
- banditbungee-0.1.0/bandits/environments/deterministic_k_piecewise.py +45 -0
- banditbungee-0.1.0/bandits/environments/piecewise_stationary.py +45 -0
- banditbungee-0.1.0/bandits/environments/random_length_brownian.py +68 -0
- banditbungee-0.1.0/bandits/environments/random_length_piecewise.py +62 -0
- banditbungee-0.1.0/bandits/environments/stationary.py +29 -0
- banditbungee-0.1.0/bandits/experiments/__init__.py +1 -0
- banditbungee-0.1.0/bandits/experiments/cosine_demo.py +173 -0
- banditbungee-0.1.0/bandits/experiments/multi_run.py +32 -0
- banditbungee-0.1.0/bandits/experiments/piecewise_demo.py +214 -0
- banditbungee-0.1.0/bandits/experiments/random_switch_sweep.py +170 -0
- banditbungee-0.1.0/bandits/experiments/run.py +47 -0
- banditbungee-0.1.0/bandits/experiments/tune_k_piecewise_sweep.py +260 -0
- banditbungee-0.1.0/bandits/experiments/tune_random_switch_sweep.py +308 -0
- banditbungee-0.1.0/bandits/metrics/__init__.py +1 -0
- banditbungee-0.1.0/bandits/metrics/change_metrics.py +97 -0
- banditbungee-0.1.0/bandits/metrics/summary.py +27 -0
- banditbungee-0.1.0/bandits/plots/__init__.py +1 -0
- banditbungee-0.1.0/bandits/plots/frequency_sweep_plots.py +103 -0
- banditbungee-0.1.0/bandits/plots/piecewise_plots.py +302 -0
- banditbungee-0.1.0/pyproject.toml +38 -0
- banditbungee-0.1.0/setup.cfg +4 -0
- banditbungee-0.1.0/tests/test_smoke.py +76 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Aditya-Ojha
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: banditbungee
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A simulation framework for stationary and non-stationary multi-armed bandit experiments.
|
|
5
|
+
Author: bandito
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: bandits,simulation,reinforcement-learning,research
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering
|
|
16
|
+
Requires-Python: >=3.8
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: matplotlib
|
|
21
|
+
Requires-Dist: seaborn
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# BanditBench
|
|
27
|
+
|
|
28
|
+
A clean, object-oriented simulation framework for stationary and non-stationary multi-armed bandit experiments.
|
|
29
|
+
|
|
30
|
+
This framework was designed with a special focus on comparing different forgetting and exploration mechanisms, such as **Sliding-Window UCB**, **Global Discounted UCB**, and **Local Discounted UCB**. It includes a variety of dynamic environments to test algorithmic adaptation speeds and robustness against changing reward landscapes.
|
|
31
|
+
|
|
32
|
+
## Why BanditBench?
|
|
33
|
+
|
|
34
|
+
BanditBench is not intended to replace large general-purpose bandit libraries. Its goal is to provide a compact, readable, and reproducible framework for studying adaptation in non-stationary stochastic bandit settings.
|
|
35
|
+
|
|
36
|
+
The package focuses on:
|
|
37
|
+
- forgetting mechanisms in UCB-style algorithms
|
|
38
|
+
- sudden shifts, smooth drifts, and crossing reward landscapes
|
|
39
|
+
- pseudo-regret and adaptation delay metrics
|
|
40
|
+
- clean experiment scripts for reproducible comparisons
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
* **Modular Agents**: Easy-to-extend `Agent` base class. Current implementations include UCB, SW-UCB, and Discounted UCB variants.
|
|
47
|
+
* **Dynamic Environments**: Base `Environment` class enforcing strict regret-tracking capabilities. Supports stationary distributions, piecewise sudden shifts, continuous Brownian drifts, and smooth crossing environments.
|
|
48
|
+
* **Reproducibility**: Strict adherence to seeded NumPy Random Generators (`default_rng`) for clean, deterministic experimentation across multiple runs.
|
|
49
|
+
* **Research-Ready Metrics**: Track instantaneous pseudo-regret, optimal arm selection probabilities, and adaptation delays cleanly across any environment.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
Since this module utilizes standard Python packaging, you can install it easily in your environment.
|
|
56
|
+
|
|
57
|
+
**Option 1: Install from a local checkout**
|
|
58
|
+
```bash
|
|
59
|
+
git clone https://github.com/AI-is-fun11/banditbench.git
|
|
60
|
+
cd banditbench
|
|
61
|
+
pip install -e .
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**Option 2: Install directly from GitHub after the repository is pushed**
|
|
65
|
+
```bash
|
|
66
|
+
pip install git+https://github.com/AI-is-fun11/banditbench.git
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Option 2b: Install from PyPI after publication**
|
|
70
|
+
```bash
|
|
71
|
+
pip install banditbungee
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
**Option 3: Install test tooling**
|
|
75
|
+
```bash
|
|
76
|
+
pip install -e .[dev]
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Quick Start Example
|
|
82
|
+
|
|
83
|
+
Below is a minimal example of how to import the module, spin up a stationary environment, and have an agent interact with it.
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
import numpy as np
|
|
87
|
+
from bandits.agents.ucb import UCB1
|
|
88
|
+
from bandits.environments.stationary import StationaryBernoulliEnv
|
|
89
|
+
|
|
90
|
+
# 1. Initialize a 3-arm stationary environment
|
|
91
|
+
env = StationaryBernoulliEnv(means=[0.2, 0.5, 0.8], horizon=1000)
|
|
92
|
+
env.reset(seed=42)
|
|
93
|
+
|
|
94
|
+
# 2. Initialize a standard UCB1 agent
|
|
95
|
+
agent = UCB1(n_arms=3, c=2.0)
|
|
96
|
+
|
|
97
|
+
cumulative_regret = 0.0
|
|
98
|
+
|
|
99
|
+
# 3. Run the interaction loop
|
|
100
|
+
for t in range(env.horizon):
|
|
101
|
+
# Agent selects an arm
|
|
102
|
+
chosen_arm = agent.select_arm()
|
|
103
|
+
|
|
104
|
+
# Environment yields a reward
|
|
105
|
+
reward = env.step(chosen_arm)
|
|
106
|
+
|
|
107
|
+
# Agent updates its internal statistics
|
|
108
|
+
agent.update(chosen_arm, reward)
|
|
109
|
+
|
|
110
|
+
# Track pseudo-regret (true best mean - chosen mean)
|
|
111
|
+
instant_regret = env.best_mean() - env.current_means()[chosen_arm]
|
|
112
|
+
cumulative_regret += instant_regret
|
|
113
|
+
|
|
114
|
+
print(f"Final Cumulative Pseudo-Regret: {cumulative_regret:.2f}")
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Running Experiments
|
|
120
|
+
|
|
121
|
+
The repository also includes ready-to-run experiment scripts for comparing algorithms in non-stationary settings.
|
|
122
|
+
|
|
123
|
+
**Piecewise-stationary benchmark**
|
|
124
|
+
```bash
|
|
125
|
+
python -m bandits.experiments.piecewise_demo
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
This runs repeated simulations for `DiscountedUCB`, `LocalDiscountedUCB`, and `SlidingWindowUCB`, then saves:
|
|
129
|
+
- figures to `figures/piecewise_demo/`
|
|
130
|
+
- summary files to `results/piecewise_demo/`
|
|
131
|
+
|
|
132
|
+
**Crossing cosine benchmark**
|
|
133
|
+
```bash
|
|
134
|
+
python -m bandits.experiments.cosine_demo
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
This generates a smooth two-arm crossing environment and saves:
|
|
138
|
+
- figures to `figures/cosine_demo/`
|
|
139
|
+
- summary files to `results/cosine_demo/`
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Plotting
|
|
144
|
+
|
|
145
|
+
Plot helpers live in `bandits/plots/` and operate on the summarized output returned by the experiment utilities.
|
|
146
|
+
|
|
147
|
+
Example:
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from bandits.agents.sw_ucb import SlidingWindowUCB
|
|
151
|
+
from bandits.environments.stationary import StationaryBernoulliEnv
|
|
152
|
+
from bandits.experiments.multi_run import run_many
|
|
153
|
+
from bandits.metrics.summary import summarize_run
|
|
154
|
+
from bandits.plots.piecewise_plots import plot_cumulative_regret
|
|
155
|
+
|
|
156
|
+
raw = run_many(
|
|
157
|
+
agent_factory=lambda: SlidingWindowUCB(n_arms=2, window_size=20),
|
|
158
|
+
env_factory=lambda: StationaryBernoulliEnv(means=[0.4, 0.7], horizon=200),
|
|
159
|
+
seeds=[0, 1, 2, 3, 4],
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
summary = summarize_run(raw)
|
|
163
|
+
plot_cumulative_regret({"SlidingWindowUCB": summary}, out_dir="figures/example")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
The main plotting functions include:
|
|
167
|
+
- `plot_cumulative_regret`
|
|
168
|
+
- `plot_instantaneous_regret`
|
|
169
|
+
- `plot_optimal_tracking`
|
|
170
|
+
- `plot_environment`
|
|
171
|
+
- `plot_means_path`
|
|
172
|
+
- `plot_change_metric_bars`
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## Directory Structure
|
|
177
|
+
|
|
178
|
+
* `bandits/agents/`: Bandit algorithm implementations. All agents inherit from `Agent`.
|
|
179
|
+
* `bandits/environments/`: Testbeds for both stationary and non-stationary reward distributions. All environments inherit from `Environment`.
|
|
180
|
+
* `bandits/experiments/`: Configurable scripts to run large-scale sweeps and comparisons.
|
|
181
|
+
* `bandits/metrics/`: Calculation of pseudo-regret, adaptation time, and probability of optimal arm selection.
|
|
182
|
+
* `bandits/plots/`: Visualization tools to compare agent performances seamlessly.
|
|
183
|
+
* `tests/`: Basic smoke tests.
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# BanditBench
|
|
2
|
+
|
|
3
|
+
A clean, object-oriented simulation framework for stationary and non-stationary multi-armed bandit experiments.
|
|
4
|
+
|
|
5
|
+
This framework was designed with a special focus on comparing different forgetting and exploration mechanisms, such as **Sliding-Window UCB**, **Global Discounted UCB**, and **Local Discounted UCB**. It includes a variety of dynamic environments to test algorithmic adaptation speeds and robustness against changing reward landscapes.
|
|
6
|
+
|
|
7
|
+
## Why BanditBench?
|
|
8
|
+
|
|
9
|
+
BanditBench is not intended to replace large general-purpose bandit libraries. Its goal is to provide a compact, readable, and reproducible framework for studying adaptation in non-stationary stochastic bandit settings.
|
|
10
|
+
|
|
11
|
+
The package focuses on:
|
|
12
|
+
- forgetting mechanisms in UCB-style algorithms
|
|
13
|
+
- sudden shifts, smooth drifts, and crossing reward landscapes
|
|
14
|
+
- pseudo-regret and adaptation delay metrics
|
|
15
|
+
- clean experiment scripts for reproducible comparisons
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Features
|
|
20
|
+
|
|
21
|
+
* **Modular Agents**: Easy-to-extend `Agent` base class. Current implementations include UCB, SW-UCB, and Discounted UCB variants.
|
|
22
|
+
* **Dynamic Environments**: Base `Environment` class enforcing strict regret-tracking capabilities. Supports stationary distributions, piecewise sudden shifts, continuous Brownian drifts, and smooth crossing environments.
|
|
23
|
+
* **Reproducibility**: Strict adherence to seeded NumPy Random Generators (`default_rng`) for clean, deterministic experimentation across multiple runs.
|
|
24
|
+
* **Research-Ready Metrics**: Track instantaneous pseudo-regret, optimal arm selection probabilities, and adaptation delays cleanly across any environment.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
Since this module utilizes standard Python packaging, you can install it easily in your environment.
|
|
31
|
+
|
|
32
|
+
**Option 1: Install from a local checkout**
|
|
33
|
+
```bash
|
|
34
|
+
git clone https://github.com/AI-is-fun11/banditbench.git
|
|
35
|
+
cd banditbench
|
|
36
|
+
pip install -e .
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Option 2: Install directly from GitHub after the repository is pushed**
|
|
40
|
+
```bash
|
|
41
|
+
pip install git+https://github.com/AI-is-fun11/banditbench.git
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**Option 2b: Install from PyPI after publication**
|
|
45
|
+
```bash
|
|
46
|
+
pip install banditbungee
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Option 3: Install test tooling**
|
|
50
|
+
```bash
|
|
51
|
+
pip install -e .[dev]
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Quick Start Example
|
|
57
|
+
|
|
58
|
+
Below is a minimal example of how to import the module, spin up a stationary environment, and have an agent interact with it.
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import numpy as np
|
|
62
|
+
from bandits.agents.ucb import UCB1
|
|
63
|
+
from bandits.environments.stationary import StationaryBernoulliEnv
|
|
64
|
+
|
|
65
|
+
# 1. Initialize a 3-arm stationary environment
|
|
66
|
+
env = StationaryBernoulliEnv(means=[0.2, 0.5, 0.8], horizon=1000)
|
|
67
|
+
env.reset(seed=42)
|
|
68
|
+
|
|
69
|
+
# 2. Initialize a standard UCB1 agent
|
|
70
|
+
agent = UCB1(n_arms=3, c=2.0)
|
|
71
|
+
|
|
72
|
+
cumulative_regret = 0.0
|
|
73
|
+
|
|
74
|
+
# 3. Run the interaction loop
|
|
75
|
+
for t in range(env.horizon):
|
|
76
|
+
# Agent selects an arm
|
|
77
|
+
chosen_arm = agent.select_arm()
|
|
78
|
+
|
|
79
|
+
# Environment yields a reward
|
|
80
|
+
reward = env.step(chosen_arm)
|
|
81
|
+
|
|
82
|
+
# Agent updates its internal statistics
|
|
83
|
+
agent.update(chosen_arm, reward)
|
|
84
|
+
|
|
85
|
+
# Track pseudo-regret (true best mean - chosen mean)
|
|
86
|
+
instant_regret = env.best_mean() - env.current_means()[chosen_arm]
|
|
87
|
+
cumulative_regret += instant_regret
|
|
88
|
+
|
|
89
|
+
print(f"Final Cumulative Pseudo-Regret: {cumulative_regret:.2f}")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Running Experiments
|
|
95
|
+
|
|
96
|
+
The repository also includes ready-to-run experiment scripts for comparing algorithms in non-stationary settings.
|
|
97
|
+
|
|
98
|
+
**Piecewise-stationary benchmark**
|
|
99
|
+
```bash
|
|
100
|
+
python -m bandits.experiments.piecewise_demo
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
This runs repeated simulations for `DiscountedUCB`, `LocalDiscountedUCB`, and `SlidingWindowUCB`, then saves:
|
|
104
|
+
- figures to `figures/piecewise_demo/`
|
|
105
|
+
- summary files to `results/piecewise_demo/`
|
|
106
|
+
|
|
107
|
+
**Crossing cosine benchmark**
|
|
108
|
+
```bash
|
|
109
|
+
python -m bandits.experiments.cosine_demo
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
This generates a smooth two-arm crossing environment and saves:
|
|
113
|
+
- figures to `figures/cosine_demo/`
|
|
114
|
+
- summary files to `results/cosine_demo/`
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## Plotting
|
|
119
|
+
|
|
120
|
+
Plot helpers live in `bandits/plots/` and operate on the summarized output returned by the experiment utilities.
|
|
121
|
+
|
|
122
|
+
Example:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from bandits.agents.sw_ucb import SlidingWindowUCB
|
|
126
|
+
from bandits.environments.stationary import StationaryBernoulliEnv
|
|
127
|
+
from bandits.experiments.multi_run import run_many
|
|
128
|
+
from bandits.metrics.summary import summarize_run
|
|
129
|
+
from bandits.plots.piecewise_plots import plot_cumulative_regret
|
|
130
|
+
|
|
131
|
+
raw = run_many(
|
|
132
|
+
agent_factory=lambda: SlidingWindowUCB(n_arms=2, window_size=20),
|
|
133
|
+
env_factory=lambda: StationaryBernoulliEnv(means=[0.4, 0.7], horizon=200),
|
|
134
|
+
seeds=[0, 1, 2, 3, 4],
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
summary = summarize_run(raw)
|
|
138
|
+
plot_cumulative_regret({"SlidingWindowUCB": summary}, out_dir="figures/example")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
The main plotting functions include:
|
|
142
|
+
- `plot_cumulative_regret`
|
|
143
|
+
- `plot_instantaneous_regret`
|
|
144
|
+
- `plot_optimal_tracking`
|
|
145
|
+
- `plot_environment`
|
|
146
|
+
- `plot_means_path`
|
|
147
|
+
- `plot_change_metric_bars`
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Directory Structure
|
|
152
|
+
|
|
153
|
+
* `bandits/agents/`: Bandit algorithm implementations. All agents inherit from `Agent`.
|
|
154
|
+
* `bandits/environments/`: Testbeds for both stationary and non-stationary reward distributions. All environments inherit from `Environment`.
|
|
155
|
+
* `bandits/experiments/`: Configurable scripts to run large-scale sweeps and comparisons.
|
|
156
|
+
* `bandits/metrics/`: Calculation of pseudo-regret, adaptation time, and probability of optimal arm selection.
|
|
157
|
+
* `bandits/plots/`: Visualization tools to compare agent performances seamlessly.
|
|
158
|
+
* `tests/`: Basic smoke tests.
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: banditbungee
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A simulation framework for stationary and non-stationary multi-armed bandit experiments.
|
|
5
|
+
Author: bandito
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: bandits,simulation,reinforcement-learning,research
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering
|
|
16
|
+
Requires-Python: >=3.8
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: matplotlib
|
|
21
|
+
Requires-Dist: seaborn
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# BanditBench
|
|
27
|
+
|
|
28
|
+
A clean, object-oriented simulation framework for stationary and non-stationary multi-armed bandit experiments.
|
|
29
|
+
|
|
30
|
+
This framework was designed with a special focus on comparing different forgetting and exploration mechanisms, such as **Sliding-Window UCB**, **Global Discounted UCB**, and **Local Discounted UCB**. It includes a variety of dynamic environments to test algorithmic adaptation speeds and robustness against changing reward landscapes.
|
|
31
|
+
|
|
32
|
+
## Why BanditBench?
|
|
33
|
+
|
|
34
|
+
BanditBench is not intended to replace large general-purpose bandit libraries. Its goal is to provide a compact, readable, and reproducible framework for studying adaptation in non-stationary stochastic bandit settings.
|
|
35
|
+
|
|
36
|
+
The package focuses on:
|
|
37
|
+
- forgetting mechanisms in UCB-style algorithms
|
|
38
|
+
- sudden shifts, smooth drifts, and crossing reward landscapes
|
|
39
|
+
- pseudo-regret and adaptation delay metrics
|
|
40
|
+
- clean experiment scripts for reproducible comparisons
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
* **Modular Agents**: Easy-to-extend `Agent` base class. Current implementations include UCB, SW-UCB, and Discounted UCB variants.
|
|
47
|
+
* **Dynamic Environments**: Base `Environment` class enforcing strict regret-tracking capabilities. Supports stationary distributions, piecewise sudden shifts, continuous Brownian drifts, and smooth crossing environments.
|
|
48
|
+
* **Reproducibility**: Strict adherence to seeded NumPy Random Generators (`default_rng`) for clean, deterministic experimentation across multiple runs.
|
|
49
|
+
* **Research-Ready Metrics**: Track instantaneous pseudo-regret, optimal arm selection probabilities, and adaptation delays cleanly across any environment.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
Since this module utilizes standard Python packaging, you can install it easily in your environment.
|
|
56
|
+
|
|
57
|
+
**Option 1: Install from a local checkout**
|
|
58
|
+
```bash
|
|
59
|
+
git clone https://github.com/AI-is-fun11/banditbench.git
|
|
60
|
+
cd banditbench
|
|
61
|
+
pip install -e .
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**Option 2: Install directly from GitHub after the repository is pushed**
|
|
65
|
+
```bash
|
|
66
|
+
pip install git+https://github.com/AI-is-fun11/banditbench.git
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Option 2b: Install from PyPI after publication**
|
|
70
|
+
```bash
|
|
71
|
+
pip install banditbungee
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
**Option 3: Install test tooling**
|
|
75
|
+
```bash
|
|
76
|
+
pip install -e .[dev]
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Quick Start Example
|
|
82
|
+
|
|
83
|
+
Below is a minimal example of how to import the module, spin up a stationary environment, and have an agent interact with it.
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
import numpy as np
|
|
87
|
+
from bandits.agents.ucb import UCB1
|
|
88
|
+
from bandits.environments.stationary import StationaryBernoulliEnv
|
|
89
|
+
|
|
90
|
+
# 1. Initialize a 3-arm stationary environment
|
|
91
|
+
env = StationaryBernoulliEnv(means=[0.2, 0.5, 0.8], horizon=1000)
|
|
92
|
+
env.reset(seed=42)
|
|
93
|
+
|
|
94
|
+
# 2. Initialize a standard UCB1 agent
|
|
95
|
+
agent = UCB1(n_arms=3, c=2.0)
|
|
96
|
+
|
|
97
|
+
cumulative_regret = 0.0
|
|
98
|
+
|
|
99
|
+
# 3. Run the interaction loop
|
|
100
|
+
for t in range(env.horizon):
|
|
101
|
+
# Agent selects an arm
|
|
102
|
+
chosen_arm = agent.select_arm()
|
|
103
|
+
|
|
104
|
+
# Environment yields a reward
|
|
105
|
+
reward = env.step(chosen_arm)
|
|
106
|
+
|
|
107
|
+
# Agent updates its internal statistics
|
|
108
|
+
agent.update(chosen_arm, reward)
|
|
109
|
+
|
|
110
|
+
# Track pseudo-regret (true best mean - chosen mean)
|
|
111
|
+
instant_regret = env.best_mean() - env.current_means()[chosen_arm]
|
|
112
|
+
cumulative_regret += instant_regret
|
|
113
|
+
|
|
114
|
+
print(f"Final Cumulative Pseudo-Regret: {cumulative_regret:.2f}")
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Running Experiments
|
|
120
|
+
|
|
121
|
+
The repository also includes ready-to-run experiment scripts for comparing algorithms in non-stationary settings.
|
|
122
|
+
|
|
123
|
+
**Piecewise-stationary benchmark**
|
|
124
|
+
```bash
|
|
125
|
+
python -m bandits.experiments.piecewise_demo
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
This runs repeated simulations for `DiscountedUCB`, `LocalDiscountedUCB`, and `SlidingWindowUCB`, then saves:
|
|
129
|
+
- figures to `figures/piecewise_demo/`
|
|
130
|
+
- summary files to `results/piecewise_demo/`
|
|
131
|
+
|
|
132
|
+
**Crossing cosine benchmark**
|
|
133
|
+
```bash
|
|
134
|
+
python -m bandits.experiments.cosine_demo
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
This generates a smooth two-arm crossing environment and saves:
|
|
138
|
+
- figures to `figures/cosine_demo/`
|
|
139
|
+
- summary files to `results/cosine_demo/`
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Plotting
|
|
144
|
+
|
|
145
|
+
Plot helpers live in `bandits/plots/` and operate on the summarized output returned by the experiment utilities.
|
|
146
|
+
|
|
147
|
+
Example:
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from bandits.agents.sw_ucb import SlidingWindowUCB
|
|
151
|
+
from bandits.environments.stationary import StationaryBernoulliEnv
|
|
152
|
+
from bandits.experiments.multi_run import run_many
|
|
153
|
+
from bandits.metrics.summary import summarize_run
|
|
154
|
+
from bandits.plots.piecewise_plots import plot_cumulative_regret
|
|
155
|
+
|
|
156
|
+
raw = run_many(
|
|
157
|
+
agent_factory=lambda: SlidingWindowUCB(n_arms=2, window_size=20),
|
|
158
|
+
env_factory=lambda: StationaryBernoulliEnv(means=[0.4, 0.7], horizon=200),
|
|
159
|
+
seeds=[0, 1, 2, 3, 4],
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
summary = summarize_run(raw)
|
|
163
|
+
plot_cumulative_regret({"SlidingWindowUCB": summary}, out_dir="figures/example")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
The main plotting functions include:
|
|
167
|
+
- `plot_cumulative_regret`
|
|
168
|
+
- `plot_instantaneous_regret`
|
|
169
|
+
- `plot_optimal_tracking`
|
|
170
|
+
- `plot_environment`
|
|
171
|
+
- `plot_means_path`
|
|
172
|
+
- `plot_change_metric_bars`
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## Directory Structure
|
|
177
|
+
|
|
178
|
+
* `bandits/agents/`: Bandit algorithm implementations. All agents inherit from `Agent`.
|
|
179
|
+
* `bandits/environments/`: Testbeds for both stationary and non-stationary reward distributions. All environments inherit from `Environment`.
|
|
180
|
+
* `bandits/experiments/`: Configurable scripts to run large-scale sweeps and comparisons.
|
|
181
|
+
* `bandits/metrics/`: Calculation of pseudo-regret, adaptation time, and probability of optimal arm selection.
|
|
182
|
+
* `bandits/plots/`: Visualization tools to compare agent performances seamlessly.
|
|
183
|
+
* `tests/`: Basic smoke tests.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
banditbungee.egg-info/PKG-INFO
|
|
5
|
+
banditbungee.egg-info/SOURCES.txt
|
|
6
|
+
banditbungee.egg-info/dependency_links.txt
|
|
7
|
+
banditbungee.egg-info/requires.txt
|
|
8
|
+
banditbungee.egg-info/top_level.txt
|
|
9
|
+
bandits/__init__.py
|
|
10
|
+
bandits/agents/__init__.py
|
|
11
|
+
bandits/agents/base.py
|
|
12
|
+
bandits/agents/ducb.py
|
|
13
|
+
bandits/agents/local_ducb.py
|
|
14
|
+
bandits/agents/sw_ucb.py
|
|
15
|
+
bandits/agents/ucb.py
|
|
16
|
+
bandits/environments/__init__.py
|
|
17
|
+
bandits/environments/base.py
|
|
18
|
+
bandits/environments/crossing_cosine_2arm.py
|
|
19
|
+
bandits/environments/deterministic_k_piecewise.py
|
|
20
|
+
bandits/environments/piecewise_stationary.py
|
|
21
|
+
bandits/environments/random_length_brownian.py
|
|
22
|
+
bandits/environments/random_length_piecewise.py
|
|
23
|
+
bandits/environments/stationary.py
|
|
24
|
+
bandits/experiments/__init__.py
|
|
25
|
+
bandits/experiments/cosine_demo.py
|
|
26
|
+
bandits/experiments/multi_run.py
|
|
27
|
+
bandits/experiments/piecewise_demo.py
|
|
28
|
+
bandits/experiments/random_switch_sweep.py
|
|
29
|
+
bandits/experiments/run.py
|
|
30
|
+
bandits/experiments/tune_k_piecewise_sweep.py
|
|
31
|
+
bandits/experiments/tune_random_switch_sweep.py
|
|
32
|
+
bandits/metrics/__init__.py
|
|
33
|
+
bandits/metrics/change_metrics.py
|
|
34
|
+
bandits/metrics/summary.py
|
|
35
|
+
bandits/plots/__init__.py
|
|
36
|
+
bandits/plots/frequency_sweep_plots.py
|
|
37
|
+
bandits/plots/piecewise_plots.py
|
|
38
|
+
tests/test_smoke.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
bandits
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""BanditBench public API."""
|
|
2
|
+
|
|
3
|
+
from bandits.agents.ducb import DiscountedUCB
|
|
4
|
+
from bandits.agents.local_ducb import LocalDiscountedUCB
|
|
5
|
+
from bandits.agents.sw_ucb import SlidingWindowUCB
|
|
6
|
+
from bandits.agents.ucb import UCB1
|
|
7
|
+
from bandits.environments.crossing_cosine_2arm import CrossingCosine2ArmEnv
|
|
8
|
+
from bandits.environments.deterministic_k_piecewise import (
|
|
9
|
+
DeterministicKPiecewiseBernoulliEnv,
|
|
10
|
+
)
|
|
11
|
+
from bandits.environments.piecewise_stationary import PiecewiseStationaryBernoulliEnv
|
|
12
|
+
from bandits.environments.random_length_brownian import RandomLengthBrownianEnv
|
|
13
|
+
from bandits.environments.random_length_piecewise import (
|
|
14
|
+
RandomLengthPiecewiseBernoulliEnv,
|
|
15
|
+
)
|
|
16
|
+
from bandits.environments.stationary import StationaryBernoulliEnv
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"CrossingCosine2ArmEnv",
|
|
20
|
+
"DeterministicKPiecewiseBernoulliEnv",
|
|
21
|
+
"DiscountedUCB",
|
|
22
|
+
"LocalDiscountedUCB",
|
|
23
|
+
"PiecewiseStationaryBernoulliEnv",
|
|
24
|
+
"RandomLengthBrownianEnv",
|
|
25
|
+
"RandomLengthPiecewiseBernoulliEnv",
|
|
26
|
+
"SlidingWindowUCB",
|
|
27
|
+
"StationaryBernoulliEnv",
|
|
28
|
+
"UCB1",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Bandit algorithms."""
|
|
2
|
+
|
|
3
|
+
from bandits.agents.base import Agent
|
|
4
|
+
from bandits.agents.ducb import DiscountedUCB
|
|
5
|
+
from bandits.agents.local_ducb import LocalDiscountedUCB
|
|
6
|
+
from bandits.agents.sw_ucb import SlidingWindowUCB
|
|
7
|
+
from bandits.agents.ucb import UCB1
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"Agent",
|
|
11
|
+
"DiscountedUCB",
|
|
12
|
+
"LocalDiscountedUCB",
|
|
13
|
+
"SlidingWindowUCB",
|
|
14
|
+
"UCB1",
|
|
15
|
+
]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Agent(ABC):
|
|
7
|
+
def __init__(self, n_arms: int, name: str | None = None):
|
|
8
|
+
self.n_arms = int(n_arms)
|
|
9
|
+
self.name = name or self.__class__.__name__
|
|
10
|
+
self.t = 0
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def reset(self) -> None:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def select_arm(self) -> int:
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def update(self, arm: int, reward: float) -> None:
|
|
22
|
+
pass
|
|
23
|
+
|