rlcmab-sampler 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,84 @@
1
+ Metadata-Version: 2.3
2
+ Name: rlcmab-sampler
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Author: Saswata Sarkar
6
+ Author-email: Saswata Sarkar <sarkarsaswata01@gmail.com>
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+
10
+ # rlcmab-lab3-sampler
11
+
12
+ A reward sampler for Contextual Bandit news recommendation systems.
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ pip install rlcmab-lab3-sampler
18
+ ```
19
+
20
+ Or install from source:
21
+
22
+ ```bash
23
+ cd sampler
24
+ pip install .
25
+ ```
26
+
27
+ ## Quick Start
28
+
29
+ ```python
30
+ from sampler import sampler
31
+
32
+ # Initialize with your roll number (i)
33
+ reward_sampler = sampler(i=42)
34
+
35
+ # Get a reward from arm j
36
+ reward = reward_sampler.sample(j=5)
37
+ print(reward)
38
+
39
+ # Sample a few arms
40
+ for j in range(3):
41
+ print(j, reward_sampler.sample(j))
42
+ ```
43
+
44
+ ## API Reference
45
+
46
+ ### `sampler(i, n_arms=12)`
47
+
48
+ Initialize a new sampler instance.
49
+
50
+ **Parameters:**
51
+
52
+ - `i` (int): Student ID used to seed the random number generator
53
+ - `n_arms` (int, optional): Number of arms/articles. Default: 12
54
+
55
+ **Attributes:**
56
+
57
+ - `means`: List of mean values for each Gaussian distribution
58
+ - `stds`: List of standard deviations for each Gaussian distribution
59
+ - `distributions`: List of scipy.stats.norm distribution objects
60
+
61
+ ### `sample(j)`
62
+
63
+ Sample a reward from the specified arm.
64
+
65
+ **Parameters:**
66
+
67
+ - `j` (int): Arm index (must be between 0 and 11)
68
+
69
+ **Returns:**
70
+
71
+ - `float`: Reward value sampled from the arm's Gaussian distribution
72
+
73
+ **Raises:**
74
+
75
+ - `ValueError`: If `j` is not in the valid range [0, 11]
76
+
77
+ ## Reproducibility
78
+
79
+ For the same student ID `i`, the generated arm distributions remain the same
80
+ across runs. Different student IDs map to different distributions.
81
+
82
+ ## License
83
+
84
+ [MIT License](LICENSE)
@@ -0,0 +1,75 @@
1
+ # rlcmab-lab3-sampler
2
+
3
+ A reward sampler for Contextual Bandit news recommendation systems.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install rlcmab-lab3-sampler
9
+ ```
10
+
11
+ Or install from source:
12
+
13
+ ```bash
14
+ cd sampler
15
+ pip install .
16
+ ```
17
+
18
+ ## Quick Start
19
+
20
+ ```python
21
+ from sampler import sampler
22
+
23
+ # Initialize with your roll number (i)
24
+ reward_sampler = sampler(i=42)
25
+
26
+ # Get a reward from arm j
27
+ reward = reward_sampler.sample(j=5)
28
+ print(reward)
29
+
30
+ # Sample a few arms
31
+ for j in range(3):
32
+ print(j, reward_sampler.sample(j))
33
+ ```
34
+
35
+ ## API Reference
36
+
37
+ ### `sampler(i, n_arms=12)`
38
+
39
+ Initialize a new sampler instance.
40
+
41
+ **Parameters:**
42
+
43
+ - `i` (int): Student ID used to seed the random number generator
44
+ - `n_arms` (int, optional): Number of arms/articles. Default: 12
45
+
46
+ **Attributes:**
47
+
48
+ - `means`: List of mean values for each Gaussian distribution
49
+ - `stds`: List of standard deviations for each Gaussian distribution
50
+ - `distributions`: List of scipy.stats.norm distribution objects
51
+
52
+ ### `sample(j)`
53
+
54
+ Sample a reward from the specified arm.
55
+
56
+ **Parameters:**
57
+
58
+ - `j` (int): Arm index (must be between 0 and 11)
59
+
60
+ **Returns:**
61
+
62
+ - `float`: Reward value sampled from the arm's Gaussian distribution
63
+
64
+ **Raises:**
65
+
66
+ - `ValueError`: If `j` is not in the valid range [0, 11]
67
+
68
+ ## Reproducibility
69
+
70
+ For the same student ID `i`, the generated arm distributions remain the same
71
+ across runs. Different student IDs map to different distributions.
72
+
73
+ ## License
74
+
75
+ [MIT License](LICENSE)
@@ -0,0 +1,14 @@
1
+ [project]
2
+ name = "rlcmab-sampler"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Saswata Sarkar", email = "sarkarsaswata01@gmail.com" }
8
+ ]
9
+ requires-python = ">=3.12"
10
+ dependencies = []
11
+
12
+ [build-system]
13
+ requires = ["uv_build>=0.9.11,<0.10.0"]
14
+ build-backend = "uv_build"
@@ -0,0 +1,50 @@
1
+ import numpy as np
2
+ import scipy.stats as stats
3
+
4
+
5
+ class sampler:
6
+ """
7
+ Gaussian reward sampler for Contextual Bandit news recommendation.
8
+
9
+ Properties:
10
+ - Same i → same hidden distributions
11
+ - Strong separation across students
12
+ - 12 Gaussian arms
13
+ - Continuous rewards (old behavior preserved)
14
+ """
15
+
16
+ def __init__(self, i, n_arms=12):
17
+ self.i = int(i)
18
+ self.n_arms = n_arms
19
+
20
+ seed = self._derive_seed(self.i)
21
+ self.rng = np.random.default_rng(seed)
22
+
23
+ self.means, self.stds = self._generate_gaussian_parameters()
24
+
25
+ self.distributions = [
26
+ stats.norm(m, s) for m, s in zip(self.means, self.stds)
27
+ ]
28
+
29
+ def _derive_seed(self, i):
30
+ return (i * 2654435761) % (2**32)
31
+
32
+ def _generate_gaussian_parameters(self):
33
+ base_means = np.linspace(-8.0, 8.0, self.n_arms)
34
+ noise = self.rng.normal(0.0, 2.0, size=self.n_arms)
35
+
36
+ means = base_means + noise
37
+ self.rng.shuffle(means)
38
+
39
+ stds = self.rng.uniform(0.8, 1.4, size=self.n_arms)
40
+ return means.tolist(), stds.tolist()
41
+
42
+ def sample(self, j):
43
+ if not (0 <= j < self.n_arms):
44
+ raise ValueError("Index j must be between 0 and 11.")
45
+ return self.distributions[j].rvs()
46
+
47
+
48
+ # Explicit public API
49
+ __all__ = ["sampler"]
50
+ __author__ = "Saswata Sarkar"
File without changes