banditbungee 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. banditbungee-0.1.0/LICENSE +21 -0
  2. banditbungee-0.1.0/PKG-INFO +183 -0
  3. banditbungee-0.1.0/README.md +158 -0
  4. banditbungee-0.1.0/banditbungee.egg-info/PKG-INFO +183 -0
  5. banditbungee-0.1.0/banditbungee.egg-info/SOURCES.txt +38 -0
  6. banditbungee-0.1.0/banditbungee.egg-info/dependency_links.txt +1 -0
  7. banditbungee-0.1.0/banditbungee.egg-info/requires.txt +6 -0
  8. banditbungee-0.1.0/banditbungee.egg-info/top_level.txt +1 -0
  9. banditbungee-0.1.0/bandits/__init__.py +31 -0
  10. banditbungee-0.1.0/bandits/agents/__init__.py +15 -0
  11. banditbungee-0.1.0/bandits/agents/base.py +23 -0
  12. banditbungee-0.1.0/bandits/agents/ducb.py +32 -0
  13. banditbungee-0.1.0/bandits/agents/local_ducb.py +29 -0
  14. banditbungee-0.1.0/bandits/agents/sw_ucb.py +31 -0
  15. banditbungee-0.1.0/bandits/agents/ucb.py +29 -0
  16. banditbungee-0.1.0/bandits/environments/__init__.py +23 -0
  17. banditbungee-0.1.0/bandits/environments/base.py +31 -0
  18. banditbungee-0.1.0/bandits/environments/crossing_cosine_2arm.py +46 -0
  19. banditbungee-0.1.0/bandits/environments/deterministic_k_piecewise.py +45 -0
  20. banditbungee-0.1.0/bandits/environments/piecewise_stationary.py +45 -0
  21. banditbungee-0.1.0/bandits/environments/random_length_brownian.py +68 -0
  22. banditbungee-0.1.0/bandits/environments/random_length_piecewise.py +62 -0
  23. banditbungee-0.1.0/bandits/environments/stationary.py +29 -0
  24. banditbungee-0.1.0/bandits/experiments/__init__.py +1 -0
  25. banditbungee-0.1.0/bandits/experiments/cosine_demo.py +173 -0
  26. banditbungee-0.1.0/bandits/experiments/multi_run.py +32 -0
  27. banditbungee-0.1.0/bandits/experiments/piecewise_demo.py +214 -0
  28. banditbungee-0.1.0/bandits/experiments/random_switch_sweep.py +170 -0
  29. banditbungee-0.1.0/bandits/experiments/run.py +47 -0
  30. banditbungee-0.1.0/bandits/experiments/tune_k_piecewise_sweep.py +260 -0
  31. banditbungee-0.1.0/bandits/experiments/tune_random_switch_sweep.py +308 -0
  32. banditbungee-0.1.0/bandits/metrics/__init__.py +1 -0
  33. banditbungee-0.1.0/bandits/metrics/change_metrics.py +97 -0
  34. banditbungee-0.1.0/bandits/metrics/summary.py +27 -0
  35. banditbungee-0.1.0/bandits/plots/__init__.py +1 -0
  36. banditbungee-0.1.0/bandits/plots/frequency_sweep_plots.py +103 -0
  37. banditbungee-0.1.0/bandits/plots/piecewise_plots.py +302 -0
  38. banditbungee-0.1.0/pyproject.toml +38 -0
  39. banditbungee-0.1.0/setup.cfg +4 -0
  40. banditbungee-0.1.0/tests/test_smoke.py +76 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Aditya-Ojha
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,183 @@
1
+ Metadata-Version: 2.4
2
+ Name: banditbungee
3
+ Version: 0.1.0
4
+ Summary: A simulation framework for stationary and non-stationary multi-armed bandit experiments.
5
+ Author: bandito
6
+ License-Expression: MIT
7
+ Keywords: bandits,simulation,reinforcement-learning,research
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3 :: Only
10
+ Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Requires-Python: >=3.8
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: numpy
20
+ Requires-Dist: matplotlib
21
+ Requires-Dist: seaborn
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7; extra == "dev"
24
+ Dynamic: license-file
25
+
26
+ # BanditBench
27
+
28
+ A clean, object-oriented simulation framework for stationary and non-stationary multi-armed bandit experiments.
29
+
30
+ This framework was designed with a special focus on comparing different forgetting and exploration mechanisms, such as **Sliding-Window UCB**, **Global Discounted UCB**, and **Local Discounted UCB**. It includes a variety of dynamic environments to test algorithmic adaptation speeds and robustness against changing reward landscapes.
31
+
32
+ ## Why BanditBench?
33
+
34
+ BanditBench is not intended to replace large general-purpose bandit libraries. Its goal is to provide a compact, readable, and reproducible framework for studying adaptation in non-stationary stochastic bandit settings.
35
+
36
+ The package focuses on:
37
+ - forgetting mechanisms in UCB-style algorithms
38
+ - sudden shifts, smooth drifts, and crossing reward landscapes
39
+ - pseudo-regret and adaptation delay metrics
40
+ - clean experiment scripts for reproducible comparisons
41
+
42
+ ---
43
+
44
+ ## Features
45
+
46
+ * **Modular Agents**: Easy-to-extend `Agent` base class. Current implementations include UCB, SW-UCB, and Discounted UCB variants.
47
+ * **Dynamic Environments**: Base `Environment` class enforcing strict regret-tracking capabilities. Supports stationary distributions, piecewise sudden shifts, continuous Brownian drifts, and smooth crossing environments.
48
+ * **Reproducibility**: Strict adherence to seeded NumPy Random Generators (`default_rng`) for clean, deterministic experimentation across multiple runs.
49
+ * **Research-Ready Metrics**: Track instantaneous pseudo-regret, optimal arm selection probabilities, and adaptation delays cleanly across any environment.
50
+
51
+ ---
52
+
53
+ ## Installation
54
+
55
+ Since this module utilizes standard Python packaging, you can install it easily in your environment.
56
+
57
+ **Option 1: Install from a local checkout**
58
+ ```bash
59
+ git clone https://github.com/AI-is-fun11/banditbench.git
60
+ cd banditbench
61
+ pip install -e .
62
+ ```
63
+
64
+ **Option 2: Install directly from GitHub after the repository is pushed**
65
+ ```bash
66
+ pip install git+https://github.com/AI-is-fun11/banditbench.git
67
+ ```
68
+
69
+ **Option 2b: Install from PyPI after publication**
70
+ ```bash
71
+ pip install banditbungee
72
+ ```
73
+
74
+ **Option 3: Install test tooling**
75
+ ```bash
76
+ pip install -e .[dev]
77
+ ```
78
+
79
+ ---
80
+
81
+ ## Quick Start Example
82
+
83
+ Below is a minimal example of how to import the module, spin up a stationary environment, and have an agent interact with it.
84
+
85
+ ```python
86
+ import numpy as np
87
+ from bandits.agents.ucb import UCB1
88
+ from bandits.environments.stationary import StationaryBernoulliEnv
89
+
90
+ # 1. Initialize a 3-arm stationary environment
91
+ env = StationaryBernoulliEnv(means=[0.2, 0.5, 0.8], horizon=1000)
92
+ env.reset(seed=42)
93
+
94
+ # 2. Initialize a standard UCB1 agent
95
+ agent = UCB1(n_arms=3, c=2.0)
96
+
97
+ cumulative_regret = 0.0
98
+
99
+ # 3. Run the interaction loop
100
+ for t in range(env.horizon):
101
+ # Agent selects an arm
102
+ chosen_arm = agent.select_arm()
103
+
104
+ # Environment yields a reward
105
+ reward = env.step(chosen_arm)
106
+
107
+ # Agent updates its internal statistics
108
+ agent.update(chosen_arm, reward)
109
+
110
+ # Track pseudo-regret (true best mean - chosen mean)
111
+ instant_regret = env.best_mean() - env.current_means()[chosen_arm]
112
+ cumulative_regret += instant_regret
113
+
114
+ print(f"Final Cumulative Pseudo-Regret: {cumulative_regret:.2f}")
115
+ ```
116
+
117
+ ---
118
+
119
+ ## Running Experiments
120
+
121
+ The repository also includes ready-to-run experiment scripts for comparing algorithms in non-stationary settings.
122
+
123
+ **Piecewise-stationary benchmark**
124
+ ```bash
125
+ python -m bandits.experiments.piecewise_demo
126
+ ```
127
+
128
+ This runs repeated simulations for `DiscountedUCB`, `LocalDiscountedUCB`, and `SlidingWindowUCB`, then saves:
129
+ - figures to `figures/piecewise_demo/`
130
+ - summary files to `results/piecewise_demo/`
131
+
132
+ **Crossing cosine benchmark**
133
+ ```bash
134
+ python -m bandits.experiments.cosine_demo
135
+ ```
136
+
137
+ This generates a smooth two-arm crossing environment and saves:
138
+ - figures to `figures/cosine_demo/`
139
+ - summary files to `results/cosine_demo/`
140
+
141
+ ---
142
+
143
+ ## Plotting
144
+
145
+ Plot helpers live in `bandits/plots/` and operate on the summarized output returned by the experiment utilities.
146
+
147
+ Example:
148
+
149
+ ```python
150
+ from bandits.agents.sw_ucb import SlidingWindowUCB
151
+ from bandits.environments.stationary import StationaryBernoulliEnv
152
+ from bandits.experiments.multi_run import run_many
153
+ from bandits.metrics.summary import summarize_run
154
+ from bandits.plots.piecewise_plots import plot_cumulative_regret
155
+
156
+ raw = run_many(
157
+ agent_factory=lambda: SlidingWindowUCB(n_arms=2, window_size=20),
158
+ env_factory=lambda: StationaryBernoulliEnv(means=[0.4, 0.7], horizon=200),
159
+ seeds=[0, 1, 2, 3, 4],
160
+ )
161
+
162
+ summary = summarize_run(raw)
163
+ plot_cumulative_regret({"SlidingWindowUCB": summary}, out_dir="figures/example")
164
+ ```
165
+
166
+ The main plotting functions include:
167
+ - `plot_cumulative_regret`
168
+ - `plot_instantaneous_regret`
169
+ - `plot_optimal_tracking`
170
+ - `plot_environment`
171
+ - `plot_means_path`
172
+ - `plot_change_metric_bars`
173
+
174
+ ---
175
+
176
+ ## Directory Structure
177
+
178
+ * `bandits/agents/`: Bandit algorithm implementations. All agents inherit from `Agent`.
179
+ * `bandits/environments/`: Testbeds for both stationary and non-stationary reward distributions. All environments inherit from `Environment`.
180
+ * `bandits/experiments/`: Configurable scripts to run large-scale sweeps and comparisons.
181
+ * `bandits/metrics/`: Calculation of pseudo-regret, adaptation time, and probability of optimal arm selection.
182
+ * `bandits/plots/`: Visualization tools to compare agent performances seamlessly.
183
+ * `tests/`: Basic smoke tests.
@@ -0,0 +1,158 @@
1
+ # BanditBench
2
+
3
+ A clean, object-oriented simulation framework for stationary and non-stationary multi-armed bandit experiments.
4
+
5
+ This framework was designed with a special focus on comparing different forgetting and exploration mechanisms, such as **Sliding-Window UCB**, **Global Discounted UCB**, and **Local Discounted UCB**. It includes a variety of dynamic environments to test algorithmic adaptation speeds and robustness against changing reward landscapes.
6
+
7
+ ## Why BanditBench?
8
+
9
+ BanditBench is not intended to replace large general-purpose bandit libraries. Its goal is to provide a compact, readable, and reproducible framework for studying adaptation in non-stationary stochastic bandit settings.
10
+
11
+ The package focuses on:
12
+ - forgetting mechanisms in UCB-style algorithms
13
+ - sudden shifts, smooth drifts, and crossing reward landscapes
14
+ - pseudo-regret and adaptation delay metrics
15
+ - clean experiment scripts for reproducible comparisons
16
+
17
+ ---
18
+
19
+ ## Features
20
+
21
+ * **Modular Agents**: Easy-to-extend `Agent` base class. Current implementations include UCB, SW-UCB, and Discounted UCB variants.
22
+ * **Dynamic Environments**: Base `Environment` class enforcing strict regret-tracking capabilities. Supports stationary distributions, piecewise sudden shifts, continuous Brownian drifts, and smooth crossing environments.
23
+ * **Reproducibility**: Strict adherence to seeded NumPy Random Generators (`default_rng`) for clean, deterministic experimentation across multiple runs.
24
+ * **Research-Ready Metrics**: Track instantaneous pseudo-regret, optimal arm selection probabilities, and adaptation delays cleanly across any environment.
25
+
26
+ ---
27
+
28
+ ## Installation
29
+
30
+ Since this module utilizes standard Python packaging, you can install it easily in your environment.
31
+
32
+ **Option 1: Install from a local checkout**
33
+ ```bash
34
+ git clone https://github.com/AI-is-fun11/banditbench.git
35
+ cd banditbench
36
+ pip install -e .
37
+ ```
38
+
39
+ **Option 2: Install directly from GitHub after the repository is pushed**
40
+ ```bash
41
+ pip install git+https://github.com/AI-is-fun11/banditbench.git
42
+ ```
43
+
44
+ **Option 2b: Install from PyPI after publication**
45
+ ```bash
46
+ pip install banditbungee
47
+ ```
48
+
49
+ **Option 3: Install test tooling**
50
+ ```bash
51
+ pip install -e .[dev]
52
+ ```
53
+
54
+ ---
55
+
56
+ ## Quick Start Example
57
+
58
+ Below is a minimal example of how to import the module, spin up a stationary environment, and have an agent interact with it.
59
+
60
+ ```python
61
+ import numpy as np
62
+ from bandits.agents.ucb import UCB1
63
+ from bandits.environments.stationary import StationaryBernoulliEnv
64
+
65
+ # 1. Initialize a 3-arm stationary environment
66
+ env = StationaryBernoulliEnv(means=[0.2, 0.5, 0.8], horizon=1000)
67
+ env.reset(seed=42)
68
+
69
+ # 2. Initialize a standard UCB1 agent
70
+ agent = UCB1(n_arms=3, c=2.0)
71
+
72
+ cumulative_regret = 0.0
73
+
74
+ # 3. Run the interaction loop
75
+ for t in range(env.horizon):
76
+ # Agent selects an arm
77
+ chosen_arm = agent.select_arm()
78
+
79
+ # Environment yields a reward
80
+ reward = env.step(chosen_arm)
81
+
82
+ # Agent updates its internal statistics
83
+ agent.update(chosen_arm, reward)
84
+
85
+ # Track pseudo-regret (true best mean - chosen mean)
86
+ instant_regret = env.best_mean() - env.current_means()[chosen_arm]
87
+ cumulative_regret += instant_regret
88
+
89
+ print(f"Final Cumulative Pseudo-Regret: {cumulative_regret:.2f}")
90
+ ```
91
+
92
+ ---
93
+
94
+ ## Running Experiments
95
+
96
+ The repository also includes ready-to-run experiment scripts for comparing algorithms in non-stationary settings.
97
+
98
+ **Piecewise-stationary benchmark**
99
+ ```bash
100
+ python -m bandits.experiments.piecewise_demo
101
+ ```
102
+
103
+ This runs repeated simulations for `DiscountedUCB`, `LocalDiscountedUCB`, and `SlidingWindowUCB`, then saves:
104
+ - figures to `figures/piecewise_demo/`
105
+ - summary files to `results/piecewise_demo/`
106
+
107
+ **Crossing cosine benchmark**
108
+ ```bash
109
+ python -m bandits.experiments.cosine_demo
110
+ ```
111
+
112
+ This generates a smooth two-arm crossing environment and saves:
113
+ - figures to `figures/cosine_demo/`
114
+ - summary files to `results/cosine_demo/`
115
+
116
+ ---
117
+
118
+ ## Plotting
119
+
120
+ Plot helpers live in `bandits/plots/` and operate on the summarized output returned by the experiment utilities.
121
+
122
+ Example:
123
+
124
+ ```python
125
+ from bandits.agents.sw_ucb import SlidingWindowUCB
126
+ from bandits.environments.stationary import StationaryBernoulliEnv
127
+ from bandits.experiments.multi_run import run_many
128
+ from bandits.metrics.summary import summarize_run
129
+ from bandits.plots.piecewise_plots import plot_cumulative_regret
130
+
131
+ raw = run_many(
132
+ agent_factory=lambda: SlidingWindowUCB(n_arms=2, window_size=20),
133
+ env_factory=lambda: StationaryBernoulliEnv(means=[0.4, 0.7], horizon=200),
134
+ seeds=[0, 1, 2, 3, 4],
135
+ )
136
+
137
+ summary = summarize_run(raw)
138
+ plot_cumulative_regret({"SlidingWindowUCB": summary}, out_dir="figures/example")
139
+ ```
140
+
141
+ The main plotting functions include:
142
+ - `plot_cumulative_regret`
143
+ - `plot_instantaneous_regret`
144
+ - `plot_optimal_tracking`
145
+ - `plot_environment`
146
+ - `plot_means_path`
147
+ - `plot_change_metric_bars`
148
+
149
+ ---
150
+
151
+ ## Directory Structure
152
+
153
+ * `bandits/agents/`: Bandit algorithm implementations. All agents inherit from `Agent`.
154
+ * `bandits/environments/`: Testbeds for both stationary and non-stationary reward distributions. All environments inherit from `Environment`.
155
+ * `bandits/experiments/`: Configurable scripts to run large-scale sweeps and comparisons.
156
+ * `bandits/metrics/`: Calculation of pseudo-regret, adaptation time, and probability of optimal arm selection.
157
+ * `bandits/plots/`: Visualization tools to compare agent performances seamlessly.
158
+ * `tests/`: Basic smoke tests.
@@ -0,0 +1,183 @@
1
+ Metadata-Version: 2.4
2
+ Name: banditbungee
3
+ Version: 0.1.0
4
+ Summary: A simulation framework for stationary and non-stationary multi-armed bandit experiments.
5
+ Author: bandito
6
+ License-Expression: MIT
7
+ Keywords: bandits,simulation,reinforcement-learning,research
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3 :: Only
10
+ Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Requires-Python: >=3.8
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: numpy
20
+ Requires-Dist: matplotlib
21
+ Requires-Dist: seaborn
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7; extra == "dev"
24
+ Dynamic: license-file
25
+
26
+ # BanditBench
27
+
28
+ A clean, object-oriented simulation framework for stationary and non-stationary multi-armed bandit experiments.
29
+
30
+ This framework was designed with a special focus on comparing different forgetting and exploration mechanisms, such as **Sliding-Window UCB**, **Global Discounted UCB**, and **Local Discounted UCB**. It includes a variety of dynamic environments to test algorithmic adaptation speeds and robustness against changing reward landscapes.
31
+
32
+ ## Why BanditBench?
33
+
34
+ BanditBench is not intended to replace large general-purpose bandit libraries. Its goal is to provide a compact, readable, and reproducible framework for studying adaptation in non-stationary stochastic bandit settings.
35
+
36
+ The package focuses on:
37
+ - forgetting mechanisms in UCB-style algorithms
38
+ - sudden shifts, smooth drifts, and crossing reward landscapes
39
+ - pseudo-regret and adaptation delay metrics
40
+ - clean experiment scripts for reproducible comparisons
41
+
42
+ ---
43
+
44
+ ## Features
45
+
46
+ * **Modular Agents**: Easy-to-extend `Agent` base class. Current implementations include UCB, SW-UCB, and Discounted UCB variants.
47
+ * **Dynamic Environments**: Base `Environment` class enforcing strict regret-tracking capabilities. Supports stationary distributions, piecewise sudden shifts, continuous Brownian drifts, and smooth crossing environments.
48
+ * **Reproducibility**: Strict adherence to seeded NumPy Random Generators (`default_rng`) for clean, deterministic experimentation across multiple runs.
49
+ * **Research-Ready Metrics**: Track instantaneous pseudo-regret, optimal arm selection probabilities, and adaptation delays cleanly across any environment.
50
+
51
+ ---
52
+
53
+ ## Installation
54
+
55
+ Since this module utilizes standard Python packaging, you can install it easily in your environment.
56
+
57
+ **Option 1: Install from a local checkout**
58
+ ```bash
59
+ git clone https://github.com/AI-is-fun11/banditbench.git
60
+ cd banditbench
61
+ pip install -e .
62
+ ```
63
+
64
+ **Option 2: Install directly from GitHub after the repository is pushed**
65
+ ```bash
66
+ pip install git+https://github.com/AI-is-fun11/banditbench.git
67
+ ```
68
+
69
+ **Option 2b: Install from PyPI after publication**
70
+ ```bash
71
+ pip install banditbungee
72
+ ```
73
+
74
+ **Option 3: Install test tooling**
75
+ ```bash
76
+ pip install -e .[dev]
77
+ ```
78
+
79
+ ---
80
+
81
+ ## Quick Start Example
82
+
83
+ Below is a minimal example of how to import the module, spin up a stationary environment, and have an agent interact with it.
84
+
85
+ ```python
86
+ import numpy as np
87
+ from bandits.agents.ucb import UCB1
88
+ from bandits.environments.stationary import StationaryBernoulliEnv
89
+
90
+ # 1. Initialize a 3-arm stationary environment
91
+ env = StationaryBernoulliEnv(means=[0.2, 0.5, 0.8], horizon=1000)
92
+ env.reset(seed=42)
93
+
94
+ # 2. Initialize a standard UCB1 agent
95
+ agent = UCB1(n_arms=3, c=2.0)
96
+
97
+ cumulative_regret = 0.0
98
+
99
+ # 3. Run the interaction loop
100
+ for t in range(env.horizon):
101
+ # Agent selects an arm
102
+ chosen_arm = agent.select_arm()
103
+
104
+ # Environment yields a reward
105
+ reward = env.step(chosen_arm)
106
+
107
+ # Agent updates its internal statistics
108
+ agent.update(chosen_arm, reward)
109
+
110
+ # Track pseudo-regret (true best mean - chosen mean)
111
+ instant_regret = env.best_mean() - env.current_means()[chosen_arm]
112
+ cumulative_regret += instant_regret
113
+
114
+ print(f"Final Cumulative Pseudo-Regret: {cumulative_regret:.2f}")
115
+ ```
116
+
117
+ ---
118
+
119
+ ## Running Experiments
120
+
121
+ The repository also includes ready-to-run experiment scripts for comparing algorithms in non-stationary settings.
122
+
123
+ **Piecewise-stationary benchmark**
124
+ ```bash
125
+ python -m bandits.experiments.piecewise_demo
126
+ ```
127
+
128
+ This runs repeated simulations for `DiscountedUCB`, `LocalDiscountedUCB`, and `SlidingWindowUCB`, then saves:
129
+ - figures to `figures/piecewise_demo/`
130
+ - summary files to `results/piecewise_demo/`
131
+
132
+ **Crossing cosine benchmark**
133
+ ```bash
134
+ python -m bandits.experiments.cosine_demo
135
+ ```
136
+
137
+ This generates a smooth two-arm crossing environment and saves:
138
+ - figures to `figures/cosine_demo/`
139
+ - summary files to `results/cosine_demo/`
140
+
141
+ ---
142
+
143
+ ## Plotting
144
+
145
+ Plot helpers live in `bandits/plots/` and operate on the summarized output returned by the experiment utilities.
146
+
147
+ Example:
148
+
149
+ ```python
150
+ from bandits.agents.sw_ucb import SlidingWindowUCB
151
+ from bandits.environments.stationary import StationaryBernoulliEnv
152
+ from bandits.experiments.multi_run import run_many
153
+ from bandits.metrics.summary import summarize_run
154
+ from bandits.plots.piecewise_plots import plot_cumulative_regret
155
+
156
+ raw = run_many(
157
+ agent_factory=lambda: SlidingWindowUCB(n_arms=2, window_size=20),
158
+ env_factory=lambda: StationaryBernoulliEnv(means=[0.4, 0.7], horizon=200),
159
+ seeds=[0, 1, 2, 3, 4],
160
+ )
161
+
162
+ summary = summarize_run(raw)
163
+ plot_cumulative_regret({"SlidingWindowUCB": summary}, out_dir="figures/example")
164
+ ```
165
+
166
+ The main plotting functions include:
167
+ - `plot_cumulative_regret`
168
+ - `plot_instantaneous_regret`
169
+ - `plot_optimal_tracking`
170
+ - `plot_environment`
171
+ - `plot_means_path`
172
+ - `plot_change_metric_bars`
173
+
174
+ ---
175
+
176
+ ## Directory Structure
177
+
178
+ * `bandits/agents/`: Bandit algorithm implementations. All agents inherit from `Agent`.
179
+ * `bandits/environments/`: Testbeds for both stationary and non-stationary reward distributions. All environments inherit from `Environment`.
180
+ * `bandits/experiments/`: Configurable scripts to run large-scale sweeps and comparisons.
181
+ * `bandits/metrics/`: Calculation of pseudo-regret, adaptation time, and probability of optimal arm selection.
182
+ * `bandits/plots/`: Visualization tools to compare agent performances seamlessly.
183
+ * `tests/`: Basic smoke tests.
@@ -0,0 +1,38 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ banditbungee.egg-info/PKG-INFO
5
+ banditbungee.egg-info/SOURCES.txt
6
+ banditbungee.egg-info/dependency_links.txt
7
+ banditbungee.egg-info/requires.txt
8
+ banditbungee.egg-info/top_level.txt
9
+ bandits/__init__.py
10
+ bandits/agents/__init__.py
11
+ bandits/agents/base.py
12
+ bandits/agents/ducb.py
13
+ bandits/agents/local_ducb.py
14
+ bandits/agents/sw_ucb.py
15
+ bandits/agents/ucb.py
16
+ bandits/environments/__init__.py
17
+ bandits/environments/base.py
18
+ bandits/environments/crossing_cosine_2arm.py
19
+ bandits/environments/deterministic_k_piecewise.py
20
+ bandits/environments/piecewise_stationary.py
21
+ bandits/environments/random_length_brownian.py
22
+ bandits/environments/random_length_piecewise.py
23
+ bandits/environments/stationary.py
24
+ bandits/experiments/__init__.py
25
+ bandits/experiments/cosine_demo.py
26
+ bandits/experiments/multi_run.py
27
+ bandits/experiments/piecewise_demo.py
28
+ bandits/experiments/random_switch_sweep.py
29
+ bandits/experiments/run.py
30
+ bandits/experiments/tune_k_piecewise_sweep.py
31
+ bandits/experiments/tune_random_switch_sweep.py
32
+ bandits/metrics/__init__.py
33
+ bandits/metrics/change_metrics.py
34
+ bandits/metrics/summary.py
35
+ bandits/plots/__init__.py
36
+ bandits/plots/frequency_sweep_plots.py
37
+ bandits/plots/piecewise_plots.py
38
+ tests/test_smoke.py
@@ -0,0 +1,6 @@
1
+ numpy
2
+ matplotlib
3
+ seaborn
4
+
5
+ [dev]
6
+ pytest>=7
@@ -0,0 +1 @@
1
+ bandits
@@ -0,0 +1,31 @@
1
+ """BanditBench public API."""
2
+
3
+ from bandits.agents.ducb import DiscountedUCB
4
+ from bandits.agents.local_ducb import LocalDiscountedUCB
5
+ from bandits.agents.sw_ucb import SlidingWindowUCB
6
+ from bandits.agents.ucb import UCB1
7
+ from bandits.environments.crossing_cosine_2arm import CrossingCosine2ArmEnv
8
+ from bandits.environments.deterministic_k_piecewise import (
9
+ DeterministicKPiecewiseBernoulliEnv,
10
+ )
11
+ from bandits.environments.piecewise_stationary import PiecewiseStationaryBernoulliEnv
12
+ from bandits.environments.random_length_brownian import RandomLengthBrownianEnv
13
+ from bandits.environments.random_length_piecewise import (
14
+ RandomLengthPiecewiseBernoulliEnv,
15
+ )
16
+ from bandits.environments.stationary import StationaryBernoulliEnv
17
+
18
+ __all__ = [
19
+ "CrossingCosine2ArmEnv",
20
+ "DeterministicKPiecewiseBernoulliEnv",
21
+ "DiscountedUCB",
22
+ "LocalDiscountedUCB",
23
+ "PiecewiseStationaryBernoulliEnv",
24
+ "RandomLengthBrownianEnv",
25
+ "RandomLengthPiecewiseBernoulliEnv",
26
+ "SlidingWindowUCB",
27
+ "StationaryBernoulliEnv",
28
+ "UCB1",
29
+ ]
30
+
31
+ __version__ = "0.1.0"
@@ -0,0 +1,15 @@
1
+ """Bandit algorithms."""
2
+
3
+ from bandits.agents.base import Agent
4
+ from bandits.agents.ducb import DiscountedUCB
5
+ from bandits.agents.local_ducb import LocalDiscountedUCB
6
+ from bandits.agents.sw_ucb import SlidingWindowUCB
7
+ from bandits.agents.ucb import UCB1
8
+
9
+ __all__ = [
10
+ "Agent",
11
+ "DiscountedUCB",
12
+ "LocalDiscountedUCB",
13
+ "SlidingWindowUCB",
14
+ "UCB1",
15
+ ]
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+
6
+ class Agent(ABC):
7
+ def __init__(self, n_arms: int, name: str | None = None):
8
+ self.n_arms = int(n_arms)
9
+ self.name = name or self.__class__.__name__
10
+ self.t = 0
11
+
12
+ @abstractmethod
13
+ def reset(self) -> None:
14
+ pass
15
+
16
+ @abstractmethod
17
+ def select_arm(self) -> int:
18
+ pass
19
+
20
+ @abstractmethod
21
+ def update(self, arm: int, reward: float) -> None:
22
+ pass
23
+