rslearn-ML 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rustam Singh Bhadouriya
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,186 @@
1
+ Metadata-Version: 2.4
2
+ Name: rslearn-ML
3
+ Version: 1.0.0
4
+ Summary: A simple sklearn-like ML library built from scratch
5
+ Author-email: Rustam Singh Bhadouriya <rustambhadouriya7@gmail.com>
6
+ License: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: numpy
11
+ Dynamic: license-file
12
+
13
+ # ๐Ÿง  rslearn โ€” Machine Learning Library (From Scratch)
14
+
15
+ **rslearn** is a lightweight, from-scratch machine learning library inspired by scikit-learn, built using pure Python and NumPy.
16
+
17
+ This project is focused on deeply understanding ML algorithms by implementing them step-by-step, while also providing a clean and usable API similar to modern ML libraries.
18
+
19
+ ---
20
+
21
+ ## ๐Ÿš€ Features
22
+
23
+ ### ๐Ÿ“Š Linear Models
24
+
25
+ * Linear Regression (Single & Multi-feature)
26
+ * Logistic Regression (Binary & Multi-class)
27
+ * Ridge Regression (L2 Regularization)
28
+ * Lasso Regression (L1 Regularization)
29
+ * Elastic Net (L1 + L2)
30
+
31
+ ---
32
+
33
+ ### ๐Ÿ“ Metrics
34
+
35
+ * Mean Squared Error (MSE)
36
+ * Mean Absolute Error (MAE)
37
+ * Root Mean Squared Error (RMSE)
38
+ * Rยฒ Score
39
+ * Accuracy (for classification)
40
+
41
+ โœ” Supports **single-output and multi-output** tasks
42
+
43
+ ---
44
+
45
+ ### ๐Ÿ”ง Preprocessing
46
+
47
+ * StandardScaler
48
+ * MinMaxScaler
49
+
50
+ ---
51
+
52
+ ### ๐Ÿงช Model Selection
53
+
54
+ * Train-Test Split
55
+
56
+ * Supports `stratify` for balanced sampling
57
+
58
+ ---
59
+
60
+ ## โš™๏ธ Optimization Details
61
+
62
+ All models in **rslearn** are implemented using **Gradient Descent**.
63
+
64
+ โš ๏ธ **Important:**
65
+
66
+ * Feature scaling is highly recommended for stable and faster convergence.
67
+ * Use:
68
+
69
+ * `StandardScaler` (recommended)
70
+ * or `MinMaxScaler`
71
+
72
+ ---
73
+
74
+ ## ๐Ÿค– Auto Scaling (Ridge, Lasso, ElasticNet)
75
+
76
+ Regularized models include:
77
+
78
+ ```python
79
+ scale=True # default
80
+ ```
81
+
82
+ * Automatically applies feature scaling internally
83
+ * Helps prevent numerical instability
84
+
85
+ ๐Ÿ’ก Still recommended:
86
+
87
+ > Use `StandardScaler` manually for best performance and control.
88
+
89
+ ---
90
+
91
+ ## ๐Ÿ“ Project Structure
92
+
93
+ ```
94
+ rslearn/
95
+ โ”‚
96
+ โ”œโ”€โ”€ linear_model/
97
+ โ”‚ โ”œโ”€โ”€ _linear_regression.py
98
+ โ”‚ โ”œโ”€โ”€ _logistic_regression.py
99
+ โ”‚ โ”œโ”€โ”€ _ridge.py
100
+ โ”‚ โ”œโ”€โ”€ _lasso.py
101
+ โ”‚ โ”œโ”€โ”€ _elastic_net.py
102
+ โ”‚
103
+ โ”œโ”€โ”€ preprocessing/
104
+ โ”‚ โ”œโ”€โ”€ _scaler.py
105
+ โ”‚
106
+ โ”œโ”€โ”€ metrics/
107
+ โ”‚ โ”œโ”€โ”€ _regression.py
108
+ โ”‚
109
+ โ”œโ”€โ”€ model_selection/
110
+ โ”‚ โ”œโ”€โ”€ _split.py
111
+ โ”‚
112
+ โ””โ”€โ”€ README.md
113
+ ```
114
+
115
+ ๐Ÿ“Œ Each module contains its own **detailed README** with usage examples and explanations.
116
+
117
+ ---
118
+
119
+ ## ๐Ÿ› ๏ธ Installation
120
+
121
+ ### Clone the repository
122
+
123
+ ```bash
124
+ git clone https://github.com/Rustam-Singh-Bhadouriya/sklearn_clone.git
125
+ cd rslearn
126
+ ```
127
+
128
+ ### Install dependencies
129
+
130
+ ```bash
131
+ pip install -r requirements.txt
132
+ ```
133
+
134
+ ---
135
+
136
+ ## ๐Ÿ“Œ Quick Example
137
+
138
+ ```python
139
+ from rslearn.linear_model import LinearRegression
140
+ from rslearn.preprocessing import StandardScaler
141
+ import numpy as np
142
+
143
+ X = np.array([10, 20, 30])
144
+ y = np.array([5, 10, 15])
145
+
146
+ scaler = StandardScaler()
147
+ X_scaled = scaler.fit_transform(X)
148
+
149
+ model = LinearRegression()
150
+ model.fit(X_scaled, y)
151
+
152
+ print(model.predict([40]))
153
+ ```
154
+
155
+ ---
156
+
157
+ ## ๐Ÿ“š Documentation
158
+
159
+ * Each folder includes its own **README.md**
160
+ * Covers:
161
+
162
+ * Usage
163
+ * Parameters
164
+ * Examples
165
+ * Internal working
166
+
167
+ ---
168
+
169
+ ## ๐ŸŽฏ Goals of this Project
170
+
171
+ * Understand ML algorithms from scratch
172
+ * Build a sklearn-like API
173
+ * Create reusable and modular ML components
174
+ * Learn real-world ML system design
175
+
176
+ ---
177
+
178
+ ## ๐Ÿง‘โ€๐Ÿ’ป Author
179
+
180
+ **Rustam Singh Bhadouriya**
181
+
182
+ ---
183
+
184
+ ## ๐Ÿ“œ License
185
+
186
+ This project is licensed under the MIT License.
@@ -0,0 +1,174 @@
1
+ # ๐Ÿง  rslearn โ€” Machine Learning Library (From Scratch)
2
+
3
+ **rslearn** is a lightweight, from-scratch machine learning library inspired by scikit-learn, built using pure Python and NumPy.
4
+
5
+ This project is focused on deeply understanding ML algorithms by implementing them step-by-step, while also providing a clean and usable API similar to modern ML libraries.
6
+
7
+ ---
8
+
9
+ ## ๐Ÿš€ Features
10
+
11
+ ### ๐Ÿ“Š Linear Models
12
+
13
+ * Linear Regression (Single & Multi-feature)
14
+ * Logistic Regression (Binary & Multi-class)
15
+ * Ridge Regression (L2 Regularization)
16
+ * Lasso Regression (L1 Regularization)
17
+ * Elastic Net (L1 + L2)
18
+
19
+ ---
20
+
21
+ ### ๐Ÿ“ Metrics
22
+
23
+ * Mean Squared Error (MSE)
24
+ * Mean Absolute Error (MAE)
25
+ * Root Mean Squared Error (RMSE)
26
+ * Rยฒ Score
27
+ * Accuracy (for classification)
28
+
29
+ โœ” Supports **single-output and multi-output** tasks
30
+
31
+ ---
32
+
33
+ ### ๐Ÿ”ง Preprocessing
34
+
35
+ * StandardScaler
36
+ * MinMaxScaler
37
+
38
+ ---
39
+
40
+ ### ๐Ÿงช Model Selection
41
+
42
+ * Train-Test Split
43
+
44
+ * Supports `stratify` for balanced sampling
45
+
46
+ ---
47
+
48
+ ## โš™๏ธ Optimization Details
49
+
50
+ All models in **rslearn** are implemented using **Gradient Descent**.
51
+
52
+ โš ๏ธ **Important:**
53
+
54
+ * Feature scaling is highly recommended for stable and faster convergence.
55
+ * Use:
56
+
57
+ * `StandardScaler` (recommended)
58
+ * or `MinMaxScaler`
59
+
60
+ ---
61
+
62
+ ## ๐Ÿค– Auto Scaling (Ridge, Lasso, ElasticNet)
63
+
64
+ Regularized models include:
65
+
66
+ ```python
67
+ scale=True # default
68
+ ```
69
+
70
+ * Automatically applies feature scaling internally
71
+ * Helps prevent numerical instability
72
+
73
+ ๐Ÿ’ก Still recommended:
74
+
75
+ > Use `StandardScaler` manually for best performance and control.
76
+
77
+ ---
78
+
79
+ ## ๐Ÿ“ Project Structure
80
+
81
+ ```
82
+ rslearn/
83
+ โ”‚
84
+ โ”œโ”€โ”€ linear_model/
85
+ โ”‚ โ”œโ”€โ”€ _linear_regression.py
86
+ โ”‚ โ”œโ”€โ”€ _logistic_regression.py
87
+ โ”‚ โ”œโ”€โ”€ _ridge.py
88
+ โ”‚ โ”œโ”€โ”€ _lasso.py
89
+ โ”‚ โ”œโ”€โ”€ _elastic_net.py
90
+ โ”‚
91
+ โ”œโ”€โ”€ preprocessing/
92
+ โ”‚ โ”œโ”€โ”€ _scaler.py
93
+ โ”‚
94
+ โ”œโ”€โ”€ metrics/
95
+ โ”‚ โ”œโ”€โ”€ _regression.py
96
+ โ”‚
97
+ โ”œโ”€โ”€ model_selection/
98
+ โ”‚ โ”œโ”€โ”€ _split.py
99
+ โ”‚
100
+ โ””โ”€โ”€ README.md
101
+ ```
102
+
103
+ ๐Ÿ“Œ Each module contains its own **detailed README** with usage examples and explanations.
104
+
105
+ ---
106
+
107
+ ## ๐Ÿ› ๏ธ Installation
108
+
109
+ ### Clone the repository
110
+
111
+ ```bash
112
+ git clone https://github.com/Rustam-Singh-Bhadouriya/sklearn_clone.git
113
+ cd rslearn
114
+ ```
115
+
116
+ ### Install dependencies
117
+
118
+ ```bash
119
+ pip install -r requirements.txt
120
+ ```
121
+
122
+ ---
123
+
124
+ ## ๐Ÿ“Œ Quick Example
125
+
126
+ ```python
127
+ from rslearn.linear_model import LinearRegression
128
+ from rslearn.preprocessing import StandardScaler
129
+ import numpy as np
130
+
131
+ X = np.array([10, 20, 30])
132
+ y = np.array([5, 10, 15])
133
+
134
+ scaler = StandardScaler()
135
+ X_scaled = scaler.fit_transform(X)
136
+
137
+ model = LinearRegression()
138
+ model.fit(X_scaled, y)
139
+
140
+ print(model.predict([40]))
141
+ ```
142
+
143
+ ---
144
+
145
+ ## ๐Ÿ“š Documentation
146
+
147
+ * Each folder includes its own **README.md**
148
+ * Covers:
149
+
150
+ * Usage
151
+ * Parameters
152
+ * Examples
153
+ * Internal working
154
+
155
+ ---
156
+
157
+ ## ๐ŸŽฏ Goals of this Project
158
+
159
+ * Understand ML algorithms from scratch
160
+ * Build a sklearn-like API
161
+ * Create reusable and modular ML components
162
+ * Learn real-world ML system design
163
+
164
+ ---
165
+
166
+ ## ๐Ÿง‘โ€๐Ÿ’ป Author
167
+
168
+ **Rustam Singh Bhadouriya**
169
+
170
+ ---
171
+
172
+ ## ๐Ÿ“œ License
173
+
174
+ This project is licensed under the MIT License.
@@ -0,0 +1,19 @@
1
+ [project]
2
+ name = "rslearn-ML"
3
+ version = "1.0.0"
4
+ description = "A simple sklearn-like ML library built from scratch"
5
+ authors = [
6
+ { name="Rustam Singh Bhadouriya", email="rustambhadouriya7@gmail.com" }
7
+ ]
8
+ readme = "README.md"
9
+ license = { text = "MIT" }
10
+ requires-python = ">=3.10"
11
+
12
+ dependencies = [
13
+ "numpy"
14
+ ]
15
+
16
+
17
+ [build-system]
18
+ requires = ["setuptools>=61.0"]
19
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,3 @@
1
+ from . import linear_model, metrics, preprocessing, model_selection
2
+
3
+ __all__ = ["linear_model", "metrics", "preprocessing", "model_selection"]
@@ -0,0 +1,242 @@
1
+ """
2
+ Things : -
3
+
4
+ # it is linear regression
5
+ y = m1x1 + m1x2 + m3x3 + ... + MnXn + b
6
+
7
+ y = prediction
8
+ m = weight
9
+ x = value
10
+ b = bias
11
+
12
+ loss = prediction - real_val
13
+ dw = gradient descent of weight
14
+ db = gradient descenf of bias
15
+
16
+ It uses Gradients so, Use `StandardScaler` or `MinMaxScaler` for better result
17
+
18
+ Scalers...
19
+ >>> from rslearn.preprocessing import StandardScaler, MinMaxScaler
20
+ Read READNE.md or Documentation for More Information about their Functions
21
+ """
22
+
23
+ import numpy as np
24
+ from rslearn.metrics import mse
25
+
26
+ class LinearRegression():
27
+
28
+
29
+ def __init__(self, regulization=None, alpha : float = 0.1, l1_ratio=0.5):
30
+
31
+ """
32
+ Linear Regression
33
+ ------------------------
34
+
35
+ linear Regression for 1D and 2D metrics arrays using gradient descents and regulization
36
+ use Scalers like MinMaxScaler or StandardScaler before fitting for haldle large value
37
+
38
+ Example
39
+ --------
40
+ regulization: regulizing option to avoid overfitting
41
+ options: `l1` for Lasso
42
+ `l2` for Ridge
43
+ `elastic_net` for elastic_net
44
+
45
+ Default: None, For No regulization.
46
+
47
+ alpha: alpha value for Ridge, Lasso, ElasticNet
48
+ Default: 0.1
49
+
50
+ l1_ratio: Lasso Ratio for Better ElasticNet Gradient and MSE
51
+ Default: 0.5
52
+
53
+ Functions
54
+ ---------
55
+ fit()
56
+ Function for Train Model | use MinMaxScaler for good Computation and Prediction
57
+ Parameters - given in Function
58
+
59
+ get_weight_bias()
60
+ Returns Selected weight and Bias for minimum loss
61
+
62
+ predict()
63
+ Prediction generator from Model
64
+
65
+
66
+ Example
67
+ -------
68
+
69
+ >>> from rslearn.linear_model import LinearRegression
70
+ >>> Model = LinearRegression()
71
+ >>> X = np.array([10, 20, 30]) # List also works.
72
+ >>> y = np.array([5, 10, 15])
73
+ >>> Model.fit(X, y) # You can change learning_rate too
74
+ >>> print(f"Weight & Bias: {Model.get_weight_bias()}")
75
+ >>> prediction = Model.predict(np.array([40, 50]))
76
+ >>> print(f"Prediction: {prediction}")
77
+
78
+
79
+
80
+ """
81
+
82
+ self.weights = None
83
+ self.bias = None
84
+
85
+ valid_params = {"l1", "l2", "elastic_net", None}
86
+ if regulization not in valid_params:
87
+ raise ValueError(f"regulization parameter is not supported, supported Parameters {valid_params}")
88
+
89
+ self.caclucate_error = self._regulizing_linear_helper(regulization=regulization, alpha=alpha, l1_ratio=l1_ratio)
90
+
91
+
92
+
93
+
94
+ def fit(self,
95
+ X ,
96
+ y ,
97
+ weights= None,
98
+ bias = None,
99
+ learning_rate : float = 0.01,
100
+ min_loss : float = 0.2,
101
+ max_itr : int = 18000
102
+ ):
103
+ """
104
+
105
+ Input Param*
106
+ __________
107
+ X = Data to Train 1D array, Dtype = np.array
108
+
109
+ Y = True value a.k.a. original prediction 1D array, Dtype = np.array
110
+
111
+ max_itr = loop to update weight and bias, Dtype = int and default = 18000 | No Input need
112
+
113
+ learning_rate = how fast weights should update, Dtype = float, Default = 0.01
114
+
115
+ weights = enter custom weight | optional
116
+
117
+ bias = enter custom bias | optional
118
+
119
+ min_loss = minimum loss where to stop the loop Default = 0.2 and its almost best for gradient descent
120
+ -----------------
121
+
122
+ Change the `learning_rate` or Use `Scalers` if output or weights contains 'e' e.g -1.8038873e+163
123
+ """
124
+
125
+
126
+
127
+ X = np.array(X)
128
+ y = np.array(y).reshape(-1)
129
+
130
+ if X.ndim == 1:
131
+ X = X.reshape(-1, 1)
132
+
133
+ n_samples, n_feature = X.shape
134
+
135
+ np.random.seed(7)
136
+ if weights is None:
137
+ weights = np.random.uniform(0.2, 3, n_feature)
138
+
139
+ if bias is None:
140
+ bias = 0
141
+
142
+ iteration = 0
143
+
144
+ while iteration < max_itr:
145
+ pred = np.dot(X, weights) + bias # prediction
146
+
147
+ mse_error = self.caclucate_error.get_error(y_true=y, y_pred=pred, weights=weights)
148
+
149
+ if mse_error <= min_loss:
150
+ print(f"Model Succesfully Fitted at #{iteration} iteration")
151
+ break
152
+
153
+ loss = pred - y # Loss for Gradients
154
+ dw = (2/n_samples) * np.dot(X.T, loss) + self.caclucate_error.get_weight_gradient(weights=weights)
155
+ db = (2/n_samples) * np.sum(loss)
156
+
157
+ weights -= learning_rate * dw
158
+ bias -= learning_rate * db
159
+
160
+ if np.isnan(weights).any() or np.isnan(bias):
161
+ print("NaN detected, stopping training, Use Scalers to avoid it")
162
+ break
163
+
164
+ iteration += 1
165
+
166
+
167
+
168
+ self.weights = weights
169
+ self.bias = bias
170
+
171
+ def get_weight_bias(self) -> np.array:
172
+ """Input = None,
173
+ O/P - (np.array, float64)
174
+ >>> weights, bias = Model.get_weight_bias()
175
+ """
176
+
177
+ return (self.weights, self.bias)
178
+
179
+ def predict(self, new_data : np.array) -> np.array:
180
+ """
181
+ Input Format = 1D or 2D np.array
182
+ Output Format = 1D np.array
183
+ """
184
+ if len(new_data) == 0:
185
+ raise ValueError("Got Empty Array")
186
+
187
+ new_data = np.array(new_data)
188
+
189
+ if new_data.ndim == 1:
190
+ new_data = new_data.reshape(-1, 1)
191
+
192
+ return (np.dot(new_data, self.weights) + self.bias).round(2)
193
+
194
+ class _regulizing_linear_helper:
195
+ def __init__(self, alpha=0.1, regulization=None, l1_ratio = 0.5):
196
+ self.alpha = alpha
197
+ self.regulization = regulization
198
+ self.l1_ratio = l1_ratio
199
+
200
+ def get_error(self, y_true, y_pred, weights):
201
+ mse_error = mse(y_true, y_pred)
202
+ if self.regulization is None:
203
+ return mse_error
204
+
205
+ if self.regulization == "l1":
206
+ reg = self.alpha * np.sum(np.abs(weights))
207
+ return mse_error + reg
208
+
209
+ if self.regulization == "l2":
210
+ reg = self.alpha * np.sum(np.square(weights))
211
+ return mse_error + reg
212
+
213
+ if self.regulization == "elastic_net":
214
+ l1 = self.alpha * self.l1_ratio
215
+ l2 = self.alpha * (1 - self.l1_ratio)
216
+
217
+ reg = l1 * np.sum(np.abs(weights)) + l2 * np.sum(np.square(weights))
218
+ return mse_error + reg
219
+
220
+ def get_weight_gradient(self, weights):
221
+ if self.regulization == "l1":
222
+ return self.alpha * np.sign(weights)
223
+
224
+ if self.regulization == "l2":
225
+ return 2 * self.alpha * weights
226
+
227
+ if self.regulization == "elastic_net":
228
+ l1 = self.alpha * self.l1_ratio
229
+ l2 = self.alpha * (1 - self.l1_ratio)
230
+
231
+ return l1 * np.sign(weights) + 2 * l2 * weights
232
+
233
+ return 0
234
+
235
+
236
+
237
+
238
+
239
+
240
+
241
+ if __name__ == "__main__":
242
+ Model = LinearRegression()