rlcmab-sampler 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import scipy.stats as stats
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class sampler:
|
|
6
|
+
"""
|
|
7
|
+
Gaussian reward sampler for Contextual Bandit news recommendation.
|
|
8
|
+
|
|
9
|
+
Properties:
|
|
10
|
+
- Same i → same hidden distributions
|
|
11
|
+
- Strong separation across students
|
|
12
|
+
- 12 Gaussian arms
|
|
13
|
+
- Continuous rewards (old behavior preserved)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, i, n_arms=12):
|
|
17
|
+
self.i = int(i)
|
|
18
|
+
self.n_arms = n_arms
|
|
19
|
+
|
|
20
|
+
seed = self._derive_seed(self.i)
|
|
21
|
+
self.rng = np.random.default_rng(seed)
|
|
22
|
+
|
|
23
|
+
self.means, self.stds = self._generate_gaussian_parameters()
|
|
24
|
+
|
|
25
|
+
self.distributions = [
|
|
26
|
+
stats.norm(m, s) for m, s in zip(self.means, self.stds)
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
def _derive_seed(self, i):
|
|
30
|
+
return (i * 2654435761) % (2**32)
|
|
31
|
+
|
|
32
|
+
def _generate_gaussian_parameters(self):
|
|
33
|
+
base_means = np.linspace(-8.0, 8.0, self.n_arms)
|
|
34
|
+
noise = self.rng.normal(0.0, 2.0, size=self.n_arms)
|
|
35
|
+
|
|
36
|
+
means = base_means + noise
|
|
37
|
+
self.rng.shuffle(means)
|
|
38
|
+
|
|
39
|
+
stds = self.rng.uniform(0.8, 1.4, size=self.n_arms)
|
|
40
|
+
return means.tolist(), stds.tolist()
|
|
41
|
+
|
|
42
|
+
def sample(self, j):
|
|
43
|
+
if not (0 <= j < self.n_arms):
|
|
44
|
+
raise ValueError("Index j must be between 0 and 11.")
|
|
45
|
+
return self.distributions[j].rvs()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Explicit public API
|
|
49
|
+
__all__ = ["sampler"]
|
|
50
|
+
__author__ = "Saswata Sarkar"
|
rlcmab_sampler/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: rlcmab-sampler
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Author: Saswata Sarkar
|
|
6
|
+
Author-email: Saswata Sarkar <sarkarsaswata01@gmail.com>
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# rlcmab-lab3-sampler
|
|
11
|
+
|
|
12
|
+
A reward sampler for Contextual Bandit news recommendation systems.
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install rlcmab-lab3-sampler
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Or install from source:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
cd sampler
|
|
24
|
+
pip install .
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from sampler import sampler
|
|
31
|
+
|
|
32
|
+
# Initialize with your roll number (i)
|
|
33
|
+
reward_sampler = sampler(i=42)
|
|
34
|
+
|
|
35
|
+
# Get a reward from arm j
|
|
36
|
+
reward = reward_sampler.sample(j=5)
|
|
37
|
+
print(reward)
|
|
38
|
+
|
|
39
|
+
# Sample a few arms
|
|
40
|
+
for j in range(3):
|
|
41
|
+
print(j, reward_sampler.sample(j))
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## API Reference
|
|
45
|
+
|
|
46
|
+
### `sampler(i, n_arms=12)`
|
|
47
|
+
|
|
48
|
+
Initialize a new sampler instance.
|
|
49
|
+
|
|
50
|
+
**Parameters:**
|
|
51
|
+
|
|
52
|
+
- `i` (int): Student ID used to seed the random number generator
|
|
53
|
+
- `n_arms` (int, optional): Number of arms/articles. Default: 12
|
|
54
|
+
|
|
55
|
+
**Attributes:**
|
|
56
|
+
|
|
57
|
+
- `means`: List of mean values for each Gaussian distribution
|
|
58
|
+
- `stds`: List of standard deviations for each Gaussian distribution
|
|
59
|
+
- `distributions`: List of scipy.stats.norm distribution objects
|
|
60
|
+
|
|
61
|
+
### `sample(j)`
|
|
62
|
+
|
|
63
|
+
Sample a reward from the specified arm.
|
|
64
|
+
|
|
65
|
+
**Parameters:**
|
|
66
|
+
|
|
67
|
+
- `j` (int): Arm index (must be between 0 and 11)
|
|
68
|
+
|
|
69
|
+
**Returns:**
|
|
70
|
+
|
|
71
|
+
- `float`: Reward value sampled from the arm's Gaussian distribution
|
|
72
|
+
|
|
73
|
+
**Raises:**
|
|
74
|
+
|
|
75
|
+
- `ValueError`: If `j` is not in the valid range [0, 11]
|
|
76
|
+
|
|
77
|
+
## Reproducibility
|
|
78
|
+
|
|
79
|
+
For the same student ID `i`, the generated arm distributions remain the same
|
|
80
|
+
across runs. Different student IDs map to different distributions.
|
|
81
|
+
|
|
82
|
+
## License
|
|
83
|
+
|
|
84
|
+
[MIT License](LICENSE)
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
rlcmab_sampler/__init__.py,sha256=hFMxoKcV0PfVsRUJb81Is3IP_AblpQ8w6FaFbr8BwfQ,1345
|
|
2
|
+
rlcmab_sampler/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
rlcmab_sampler-0.1.0.dist-info/WHEEL,sha256=YUH1mBqsx8Dh2cQG2rlcuRYUhJddG9iClegy4IgnHik,79
|
|
4
|
+
rlcmab_sampler-0.1.0.dist-info/METADATA,sha256=QNhZVhC76KJPdN8Qfh6VsZScEP0FRPhcthSCaBriNVM,1667
|
|
5
|
+
rlcmab_sampler-0.1.0.dist-info/RECORD,,
|