quicklearnkit 0.0.2__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quicklearnkit-0.0.2 → quicklearnkit-0.2.1}/LICENSE +21 -21
- quicklearnkit-0.2.1/PKG-INFO +219 -0
- quicklearnkit-0.2.1/pyproject.toml +23 -0
- quicklearnkit-0.2.1/quicklearnkit/__init__.py +2 -0
- {quicklearnkit-0.0.2 → quicklearnkit-0.2.1}/quicklearnkit/classifier.py +89 -89
- {quicklearnkit-0.0.2 → quicklearnkit-0.2.1}/quicklearnkit/quickimports.py +22 -21
- quicklearnkit-0.2.1/quicklearnkit/randomizer.py +175 -0
- {quicklearnkit-0.0.2 → quicklearnkit-0.2.1}/quicklearnkit/regressor.py +91 -91
- quicklearnkit-0.2.1/quicklearnkit/split.py +99 -0
- quicklearnkit-0.2.1/quicklearnkit/utils.py +196 -0
- quicklearnkit-0.2.1/quicklearnkit.egg-info/PKG-INFO +219 -0
- {quicklearnkit-0.0.2 → quicklearnkit-0.2.1}/quicklearnkit.egg-info/SOURCES.txt +3 -2
- {quicklearnkit-0.0.2 → quicklearnkit-0.2.1}/quicklearnkit.egg-info/requires.txt +3 -2
- {quicklearnkit-0.0.2 → quicklearnkit-0.2.1}/quicklearnkit.egg-info/top_level.txt +1 -0
- quicklearnkit-0.2.1/readme.md +183 -0
- {quicklearnkit-0.0.2 → quicklearnkit-0.2.1}/setup.cfg +4 -4
- quicklearnkit-0.0.2/PKG-INFO +0 -29
- quicklearnkit-0.0.2/README.md +0 -2
- quicklearnkit-0.0.2/pyproject.toml +0 -3
- quicklearnkit-0.0.2/quicklearnkit/__init__.py +0 -2
- quicklearnkit-0.0.2/quicklearnkit/utils.py +0 -39
- quicklearnkit-0.0.2/quicklearnkit.egg-info/PKG-INFO +0 -29
- quicklearnkit-0.0.2/setup.py +0 -24
- {quicklearnkit-0.0.2 → quicklearnkit-0.2.1}/quicklearnkit.egg-info/dependency_links.txt +0 -0
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 Masterhazi
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Masterhazi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quicklearnkit
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Learning-first machine learning utilities library for simplified imports, sampling, splitting, and probabilistic preprocessing.
|
|
5
|
+
Author: Hazi Afrid
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Masterhazi
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Requires-Python: >=3.8
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: numpy
|
|
32
|
+
Requires-Dist: pandas
|
|
33
|
+
Requires-Dist: scikit-learn
|
|
34
|
+
Requires-Dist: xgboost
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
|
|
37
|
+
````markdown
|
|
38
|
+
# QuickLearnKit
|
|
39
|
+
|
|
40
|
+
QuickLearnKit is a lightweight, learning-first machine learning utilities library designed to simplify model imports and streamline common ML workflows. No more deep module navigation—import models and tools effortlessly and start building.
|
|
41
|
+
|
|
42
|
+
It focuses on removing *mechanical friction* so students and practitioners can spend more time understanding concepts, not fighting syntax.
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
Install QuickLearnKit using pip:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install quicklearnkit
|
|
52
|
+
````
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Quick Model Imports
|
|
57
|
+
|
|
58
|
+
QuickLearnKit provides seamless access to essential machine learning models with minimal syntax. Simply import and initialize models without the usual clutter.
|
|
59
|
+
|
|
60
|
+
### Example Usage
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from quicklearnkit import (
|
|
64
|
+
LinearRegressionmodel,
|
|
65
|
+
RandomForestRegressionmodel,
|
|
66
|
+
XGBoostRegressionmodel,
|
|
67
|
+
KNeighborsClassifiermodel,
|
|
68
|
+
GradientBoostingClassifiermodel
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Initialize models directly
|
|
72
|
+
lr_model = LinearRegressionmodel()
|
|
73
|
+
rf_model = RandomForestRegressionmodel()
|
|
74
|
+
xgb_model = XGBoostRegressionmodel()
|
|
75
|
+
|
|
76
|
+
# Initialize classifiers
|
|
77
|
+
knn_classifier = KNeighborsClassifiermodel()
|
|
78
|
+
gb_classifier = GradientBoostingClassifiermodel()
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Supported Models
|
|
84
|
+
|
|
85
|
+
QuickLearnKit offers easy access to commonly used supervised learning models:
|
|
86
|
+
|
|
87
|
+
### Regression Models
|
|
88
|
+
|
|
89
|
+
* Linear Regression (`LinearRegressionmodel()`)
|
|
90
|
+
* K-Nearest Neighbors Regression (`KNNRegressionmodel()`)
|
|
91
|
+
* Decision Tree Regression (`DecisionTreeRegressionmodel()`)
|
|
92
|
+
* Random Forest Regression (`RandomForestRegressionmodel()`)
|
|
93
|
+
* Gradient Boosting Regression (`GradientBoostingRegressionmodel()`)
|
|
94
|
+
* AdaBoost Regression (`AdaBoostRegressionmodel()`)
|
|
95
|
+
* XGBoost Regression (`XGBoostRegressionmodel()`)
|
|
96
|
+
* ElasticNet Regression (`ElasticNetRegressionmodel()`)
|
|
97
|
+
|
|
98
|
+
### Classification Models
|
|
99
|
+
|
|
100
|
+
* Logistic Regression (`LogisticRegressionmodel()`)
|
|
101
|
+
* K-Nearest Neighbors Classifier (`KNeighborsClassifiermodel()`)
|
|
102
|
+
* Decision Tree Classifier (`DecisionTreeClassifiermodel()`)
|
|
103
|
+
* Random Forest Classifier (`RandomForestClassifiermodel()`)
|
|
104
|
+
* AdaBoost Classifier (`AdaBoostClassifiermodel()`)
|
|
105
|
+
* Gradient Boosting Classifier (`GradientBoostingClassifiermodel()`)
|
|
106
|
+
* XGBoost Classifier (`XGBClassifiermodel()`)
|
|
107
|
+
* Support Vector Classifier (`SVClassifiermodel()`)
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Utilities & Workflow Tools
|
|
112
|
+
|
|
113
|
+
Beyond models, QuickLearnKit provides practical tools to support the full machine learning workflow.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
### 🔀 Random Sampling — `Sampler`
|
|
118
|
+
|
|
119
|
+
Randomly sample from lists, NumPy arrays, or pandas DataFrames. Supports both **stateless (reproducible)** and **stateful (streaming / simulation)** modes.
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from quicklearnkit import Sampler
|
|
123
|
+
import seaborn as sns
|
|
124
|
+
|
|
125
|
+
df = sns.load_dataset("titanic")
|
|
126
|
+
|
|
127
|
+
sampler = Sampler(df, n=5, random_state=42)
|
|
128
|
+
sampled_data = sampler.sample()
|
|
129
|
+
|
|
130
|
+
print(sampled_data)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
### ✂️ Train–Test Splitting — `train_test_split`
|
|
136
|
+
|
|
137
|
+
Split datasets into training and testing sets with support for:
|
|
138
|
+
|
|
139
|
+
* Shuffling
|
|
140
|
+
* Stratification
|
|
141
|
+
* NumPy arrays and pandas DataFrames
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from quicklearnkit import train_test_split
|
|
145
|
+
import numpy as np
|
|
146
|
+
|
|
147
|
+
X = np.arange(20).reshape(10, 2)
|
|
148
|
+
|
|
149
|
+
X_train, X_test = train_test_split(
|
|
150
|
+
X,
|
|
151
|
+
test_size=0.25,
|
|
152
|
+
shuffle=True,
|
|
153
|
+
random_state=42
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
print(X_train.shape, X_test.shape)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
### 🎲 Probabilistic Imputation — `ProbabilisticImputer`
|
|
162
|
+
|
|
163
|
+
A group-aware, probabilistic categorical imputer that learns conditional distributions and samples missing values in a **reproducible** way by default.
|
|
164
|
+
|
|
165
|
+
This is especially useful for:
|
|
166
|
+
|
|
167
|
+
* Teaching how distributions work
|
|
168
|
+
* Simulating realistic missing data handling
|
|
169
|
+
* Data augmentation and robustness testing
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
from quicklearnkit import ProbabilisticImputer
|
|
173
|
+
import seaborn as sns
|
|
174
|
+
|
|
175
|
+
df = sns.load_dataset("titanic")
|
|
176
|
+
|
|
177
|
+
imputer = ProbabilisticImputer(
|
|
178
|
+
group_col="pclass",
|
|
179
|
+
target_col="deck",
|
|
180
|
+
random_state=42 # reproducible by default
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
df_imputed = imputer.fit_transform(df)
|
|
184
|
+
|
|
185
|
+
print("Missing before:", df["deck"].isna().sum())
|
|
186
|
+
print("Missing after:", df_imputed["deck"].isna().sum())
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Randomized Data Generation
|
|
192
|
+
|
|
193
|
+
Generate random arrays with specific characteristics:
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from quicklearnkit import create_random
|
|
197
|
+
|
|
198
|
+
random_data = create_random(mean=0, std_dev=1, size=100)
|
|
199
|
+
print(random_data)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Contribute
|
|
205
|
+
|
|
206
|
+
Want to improve QuickLearnKit? Fork the repository, suggest enhancements, and help make machine learning more accessible and easier to teach.
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## License
|
|
211
|
+
|
|
212
|
+
This project is licensed under the MIT License.
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
QuickLearnKit makes machine learning utilities effortless—so you can focus on **learning, experimenting, and building**, not writing complex import statements. 🚀
|
|
217
|
+
|
|
218
|
+
````
|
|
219
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "quicklearnkit"
|
|
7
|
+
version = "0.2.1"
|
|
8
|
+
description = "Learning-first machine learning utilities library for simplified imports, sampling, splitting, and probabilistic preprocessing."
|
|
9
|
+
readme = "readme.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Hazi Afrid" }
|
|
13
|
+
]
|
|
14
|
+
requires-python = ">=3.8"
|
|
15
|
+
dependencies = [
|
|
16
|
+
"numpy",
|
|
17
|
+
"pandas",
|
|
18
|
+
"scikit-learn",
|
|
19
|
+
"xgboost"
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[tool.setuptools.packages.find]
|
|
23
|
+
where = ["."]
|
|
@@ -1,90 +1,90 @@
|
|
|
1
|
-
from sklearn.linear_model import LogisticRegression as logisticregression
|
|
2
|
-
from sklearn.neighbors import KNeighborsClassifier as knnclassifier
|
|
3
|
-
from sklearn.tree import DecisionTreeClassifier as decisiontreeclassifier
|
|
4
|
-
from sklearn.ensemble import RandomForestClassifier as randomforestclassifier
|
|
5
|
-
from sklearn.ensemble import GradientBoostingClassifier as gradientboostingclassifier
|
|
6
|
-
from sklearn.ensemble import AdaBoostClassifier as adaboostclassifier
|
|
7
|
-
from xgboost import XGBClassifier as xgboostclassifier
|
|
8
|
-
from sklearn.svm import SVC as supportvectorclassifer
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class LogisticRegressionmodel:
|
|
12
|
-
def __init__(self, **kwargs):
|
|
13
|
-
self.model = logisticregression(**kwargs)
|
|
14
|
-
|
|
15
|
-
def fit(self, X,y):
|
|
16
|
-
self.model.fit(X,y)
|
|
17
|
-
|
|
18
|
-
def predict(self,X):
|
|
19
|
-
self.model.predict(X)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class KNeighborsClassifiermodel:
|
|
23
|
-
def __init__(self, **kwargs):
|
|
24
|
-
self.model = knnclassifier(**kwargs)
|
|
25
|
-
|
|
26
|
-
def fit(self, X,y):
|
|
27
|
-
self.model.fit(X,y)
|
|
28
|
-
|
|
29
|
-
def predict(self,X):
|
|
30
|
-
self.model.predict(X)
|
|
31
|
-
|
|
32
|
-
class DecisionTreeClassifiermodel:
|
|
33
|
-
def __init__(self, **kwargs):
|
|
34
|
-
self.model = decisiontreeclassifier(**kwargs)
|
|
35
|
-
|
|
36
|
-
def fit(self,X,y):
|
|
37
|
-
self.model.fit(X,y)
|
|
38
|
-
|
|
39
|
-
def predict(self, X):
|
|
40
|
-
self.model.predict(X)
|
|
41
|
-
|
|
42
|
-
class RandomForestClassifiermodel:
|
|
43
|
-
def __init__(self, **kwargs):
|
|
44
|
-
self.model = randomforestclassifier(**kwargs)
|
|
45
|
-
|
|
46
|
-
def fit(self, X,y):
|
|
47
|
-
self.model.fit(X,y)
|
|
48
|
-
|
|
49
|
-
def predict(self, X):
|
|
50
|
-
self.model.predict(X)
|
|
51
|
-
|
|
52
|
-
class GradientBoostingClassifiermodel:
|
|
53
|
-
def __init__(self, **kwargs):
|
|
54
|
-
self.model = gradientboostingclassifier(**kwargs)
|
|
55
|
-
|
|
56
|
-
def fit(self, X,y):
|
|
57
|
-
self.model.fit(X,y)
|
|
58
|
-
|
|
59
|
-
def predict(self, X):
|
|
60
|
-
self.model.predict(X)
|
|
61
|
-
|
|
62
|
-
class AdaBoostClassifiermodel:
|
|
63
|
-
def __init__(self, **kwargs):
|
|
64
|
-
self.model = adaboostclassifier(**kwargs)
|
|
65
|
-
|
|
66
|
-
def fit(self, X,y):
|
|
67
|
-
self.model.fit(X,y)
|
|
68
|
-
|
|
69
|
-
def predict(self, X):
|
|
70
|
-
self.model.predict(X)
|
|
71
|
-
|
|
72
|
-
class SVClassifiermodel:
|
|
73
|
-
def __init__(self, **kwargs):
|
|
74
|
-
self.model = supportvectorclassifer(**kwargs)
|
|
75
|
-
|
|
76
|
-
def fit(self, X,y):
|
|
77
|
-
self.model.fit(X,y)
|
|
78
|
-
|
|
79
|
-
def predict(self, X):
|
|
80
|
-
self.model.predict(X)
|
|
81
|
-
|
|
82
|
-
class XGBClassifiermodel:
|
|
83
|
-
def __init__(self, **kwargs):
|
|
84
|
-
self.model = xgboostclassifier(**kwargs)
|
|
85
|
-
|
|
86
|
-
def fit (self, X,y):
|
|
87
|
-
self.model.fit(X,y)
|
|
88
|
-
|
|
89
|
-
def predict(self, X):
|
|
1
|
+
from sklearn.linear_model import LogisticRegression as logisticregression
|
|
2
|
+
from sklearn.neighbors import KNeighborsClassifier as knnclassifier
|
|
3
|
+
from sklearn.tree import DecisionTreeClassifier as decisiontreeclassifier
|
|
4
|
+
from sklearn.ensemble import RandomForestClassifier as randomforestclassifier
|
|
5
|
+
from sklearn.ensemble import GradientBoostingClassifier as gradientboostingclassifier
|
|
6
|
+
from sklearn.ensemble import AdaBoostClassifier as adaboostclassifier
|
|
7
|
+
from xgboost import XGBClassifier as xgboostclassifier
|
|
8
|
+
from sklearn.svm import SVC as supportvectorclassifer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LogisticRegressionmodel:
|
|
12
|
+
def __init__(self, **kwargs):
|
|
13
|
+
self.model = logisticregression(**kwargs)
|
|
14
|
+
|
|
15
|
+
def fit(self, X,y):
|
|
16
|
+
self.model.fit(X,y)
|
|
17
|
+
|
|
18
|
+
def predict(self,X):
|
|
19
|
+
self.model.predict(X)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class KNeighborsClassifiermodel:
|
|
23
|
+
def __init__(self, **kwargs):
|
|
24
|
+
self.model = knnclassifier(**kwargs)
|
|
25
|
+
|
|
26
|
+
def fit(self, X,y):
|
|
27
|
+
self.model.fit(X,y)
|
|
28
|
+
|
|
29
|
+
def predict(self,X):
|
|
30
|
+
self.model.predict(X)
|
|
31
|
+
|
|
32
|
+
class DecisionTreeClassifiermodel:
|
|
33
|
+
def __init__(self, **kwargs):
|
|
34
|
+
self.model = decisiontreeclassifier(**kwargs)
|
|
35
|
+
|
|
36
|
+
def fit(self,X,y):
|
|
37
|
+
self.model.fit(X,y)
|
|
38
|
+
|
|
39
|
+
def predict(self, X):
|
|
40
|
+
self.model.predict(X)
|
|
41
|
+
|
|
42
|
+
class RandomForestClassifiermodel:
|
|
43
|
+
def __init__(self, **kwargs):
|
|
44
|
+
self.model = randomforestclassifier(**kwargs)
|
|
45
|
+
|
|
46
|
+
def fit(self, X,y):
|
|
47
|
+
self.model.fit(X,y)
|
|
48
|
+
|
|
49
|
+
def predict(self, X):
|
|
50
|
+
self.model.predict(X)
|
|
51
|
+
|
|
52
|
+
class GradientBoostingClassifiermodel:
|
|
53
|
+
def __init__(self, **kwargs):
|
|
54
|
+
self.model = gradientboostingclassifier(**kwargs)
|
|
55
|
+
|
|
56
|
+
def fit(self, X,y):
|
|
57
|
+
self.model.fit(X,y)
|
|
58
|
+
|
|
59
|
+
def predict(self, X):
|
|
60
|
+
self.model.predict(X)
|
|
61
|
+
|
|
62
|
+
class AdaBoostClassifiermodel:
|
|
63
|
+
def __init__(self, **kwargs):
|
|
64
|
+
self.model = adaboostclassifier(**kwargs)
|
|
65
|
+
|
|
66
|
+
def fit(self, X,y):
|
|
67
|
+
self.model.fit(X,y)
|
|
68
|
+
|
|
69
|
+
def predict(self, X):
|
|
70
|
+
self.model.predict(X)
|
|
71
|
+
|
|
72
|
+
class SVClassifiermodel:
|
|
73
|
+
def __init__(self, **kwargs):
|
|
74
|
+
self.model = supportvectorclassifer(**kwargs)
|
|
75
|
+
|
|
76
|
+
def fit(self, X,y):
|
|
77
|
+
self.model.fit(X,y)
|
|
78
|
+
|
|
79
|
+
def predict(self, X):
|
|
80
|
+
self.model.predict(X)
|
|
81
|
+
|
|
82
|
+
class XGBClassifiermodel:
|
|
83
|
+
def __init__(self, **kwargs):
|
|
84
|
+
self.model = xgboostclassifier(**kwargs)
|
|
85
|
+
|
|
86
|
+
def fit (self, X,y):
|
|
87
|
+
self.model.fit(X,y)
|
|
88
|
+
|
|
89
|
+
def predict(self, X):
|
|
90
90
|
self.model.fit(X)
|
|
@@ -1,21 +1,22 @@
|
|
|
1
|
-
#from .regression import LinearRegression, KNeighborsRegression, DecisionTreeRegression, RandomForestRegression, AdaBoostRegression, GradientBoostingRegression, XGBRegressor,SVR
|
|
2
|
-
#from .classifier import LogisticRegression, KNeighborsClassifier, DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, XGBClassifier, SVC
|
|
3
|
-
|
|
4
|
-
#__all__= [
|
|
5
|
-
# 'LinearRegression', ' KNeighborsRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 'XGBRegressor', 'SVR',
|
|
6
|
-
# 'LogisticRegression', 'KNeighborsClassifier', 'DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier', 'XGBClassifier', 'SVC'
|
|
7
|
-
#]
|
|
8
|
-
|
|
9
|
-
from .regressor import LinearRegressionmodel, KNNRegressionmodel, DecisionTreeRegressionmodel, RandomForestRegressionmodel, GradientBoostingRegressionmodel, AdaBoostRegressionmodel, XGBoostRegressionmodel, ElasticNetRegressionmodel
|
|
10
|
-
from .classifier import LogisticRegressionmodel, KNeighborsClassifiermodel, DecisionTreeClassifiermodel, RandomForestClassifiermodel, AdaBoostClassifiermodel, GradientBoostingClassifiermodel, XGBClassifiermodel, SVClassifiermodel
|
|
11
|
-
from .utils import create_random
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
'
|
|
16
|
-
'
|
|
17
|
-
'
|
|
18
|
-
'
|
|
19
|
-
'
|
|
20
|
-
|
|
21
|
-
|
|
1
|
+
#from .regression import LinearRegression, KNeighborsRegression, DecisionTreeRegression, RandomForestRegression, AdaBoostRegression, GradientBoostingRegression, XGBRegressor,SVR
|
|
2
|
+
#from .classifier import LogisticRegression, KNeighborsClassifier, DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, XGBClassifier, SVC
|
|
3
|
+
|
|
4
|
+
#__all__= [
|
|
5
|
+
# 'LinearRegression', ' KNeighborsRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 'XGBRegressor', 'SVR',
|
|
6
|
+
# 'LogisticRegression', 'KNeighborsClassifier', 'DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier', 'XGBClassifier', 'SVC'
|
|
7
|
+
#]
|
|
8
|
+
|
|
9
|
+
from .regressor import LinearRegressionmodel, KNNRegressionmodel, DecisionTreeRegressionmodel, RandomForestRegressionmodel, GradientBoostingRegressionmodel, AdaBoostRegressionmodel, XGBoostRegressionmodel, ElasticNetRegressionmodel
|
|
10
|
+
from .classifier import LogisticRegressionmodel, KNeighborsClassifiermodel, DecisionTreeClassifiermodel, RandomForestClassifiermodel, AdaBoostClassifiermodel, GradientBoostingClassifiermodel, XGBClassifiermodel, SVClassifiermodel
|
|
11
|
+
from .utils import create_random, ProbabilisticImputer
|
|
12
|
+
from .randomizer import Sampler
|
|
13
|
+
from .split import train_test_split
|
|
14
|
+
__all__=[
|
|
15
|
+
'LinearRegressionmodel','LogisticRegressionmodel', 'KNNRegressionmodel','GradientBoostingRegressionmodel',
|
|
16
|
+
'AdaBoostRegressionmodel', 'XGBoostRegressionmodel', 'ElasticNetRegressionmodel',
|
|
17
|
+
'DecisionTreeRegressionmodel', 'RandomForestRegressionmodel',
|
|
18
|
+
'KNeighborsClassifiermodel', 'DecisionTreeClassifiermodel', 'RandomForestClassifiermodel','AdaBoostClassifiermodel',
|
|
19
|
+
'GradientBoostingClassifiermodel', 'XGBClassifiermodel', 'SVClassifiermodel',
|
|
20
|
+
'create_random', 'Sampler', 'train_test_split', "ProbabilisticImputer",
|
|
21
|
+
]
|
|
22
|
+
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from typing import Union, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
DataType = Union[list, tuple, np.ndarray, pd.DataFrame]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Sampler:
|
|
11
|
+
"""
|
|
12
|
+
Unified random sampler supporting:
|
|
13
|
+
|
|
14
|
+
- Stateful and stateless sampling
|
|
15
|
+
- Sampling with or without replacement
|
|
16
|
+
- list, numpy.ndarray, pandas.DataFrame
|
|
17
|
+
- Row or column sampling for DataFrames
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
data : list, tuple, numpy.ndarray, or pandas.DataFrame
|
|
22
|
+
Data to sample from.
|
|
23
|
+
replace : bool, default=False
|
|
24
|
+
Whether sampling is done with replacement.
|
|
25
|
+
stateful : bool, default=False
|
|
26
|
+
If True, sampled elements are removed from future draws.
|
|
27
|
+
seed : int, optional
|
|
28
|
+
Random seed for reproducibility.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
data: DataType,
|
|
34
|
+
replace: bool = False,
|
|
35
|
+
stateful: bool = False,
|
|
36
|
+
seed: Optional[int] = None
|
|
37
|
+
):
|
|
38
|
+
self.replace = replace
|
|
39
|
+
self.stateful = stateful
|
|
40
|
+
|
|
41
|
+
if seed is not None:
|
|
42
|
+
random.seed(seed)
|
|
43
|
+
np.random.seed(seed)
|
|
44
|
+
|
|
45
|
+
# Detect data type
|
|
46
|
+
if isinstance(data, (list, tuple)):
|
|
47
|
+
self._type = "list"
|
|
48
|
+
self.data = list(data)
|
|
49
|
+
|
|
50
|
+
elif isinstance(data, np.ndarray):
|
|
51
|
+
self._type = "array"
|
|
52
|
+
self.data = data
|
|
53
|
+
|
|
54
|
+
elif isinstance(data, pd.DataFrame):
|
|
55
|
+
self._type = "df"
|
|
56
|
+
self.data = data
|
|
57
|
+
|
|
58
|
+
else:
|
|
59
|
+
raise TypeError(
|
|
60
|
+
"Unsupported data type. Use list, numpy array, or pandas DataFrame."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if self.stateful:
|
|
64
|
+
self._reset_pool()
|
|
65
|
+
|
|
66
|
+
# ---------------- INTERNAL ---------------- #
|
|
67
|
+
|
|
68
|
+
def _reset_pool(self):
|
|
69
|
+
"""Initialize or reset internal sampling pool."""
|
|
70
|
+
if self._type == "list":
|
|
71
|
+
self.pool = self.data.copy()
|
|
72
|
+
random.shuffle(self.pool)
|
|
73
|
+
|
|
74
|
+
elif self._type == "array":
|
|
75
|
+
self.pool = self.data.copy()
|
|
76
|
+
np.random.shuffle(self.pool)
|
|
77
|
+
|
|
78
|
+
elif self._type == "df":
|
|
79
|
+
self.pool = self.data.copy()
|
|
80
|
+
|
|
81
|
+
# ---------------- PUBLIC API ---------------- #
|
|
82
|
+
|
|
83
|
+
def sample(self, n: int = 1, axis: int = 0):
|
|
84
|
+
"""
|
|
85
|
+
Sample data.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
n : int, default=1
|
|
90
|
+
Number of items to sample.
|
|
91
|
+
axis : int, default=0
|
|
92
|
+
Axis to sample from when data is a DataFrame.
|
|
93
|
+
0 = rows, 1 = columns.
|
|
94
|
+
|
|
95
|
+
Returns
|
|
96
|
+
-------
|
|
97
|
+
Sampled data (same type as input).
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
if n <= 0:
|
|
101
|
+
raise ValueError("n must be a positive integer")
|
|
102
|
+
|
|
103
|
+
source = self.pool if self.stateful else self.data
|
|
104
|
+
|
|
105
|
+
# -------- LIST --------
|
|
106
|
+
if self._type == "list":
|
|
107
|
+
if self.stateful:
|
|
108
|
+
if n > len(self.pool):
|
|
109
|
+
raise StopIteration("No items left to sample")
|
|
110
|
+
out = self.pool[:n]
|
|
111
|
+
self.pool = self.pool[n:]
|
|
112
|
+
return out
|
|
113
|
+
|
|
114
|
+
if not self.replace and n > len(source):
|
|
115
|
+
raise ValueError("Cannot sample more elements than population")
|
|
116
|
+
|
|
117
|
+
return (
|
|
118
|
+
random.sample(source, n)
|
|
119
|
+
if not self.replace
|
|
120
|
+
else random.choices(source, k=n)
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# -------- NUMPY ARRAY --------
|
|
124
|
+
if self._type == "array":
|
|
125
|
+
if self.stateful:
|
|
126
|
+
if n > len(self.pool):
|
|
127
|
+
raise StopIteration("No items left to sample")
|
|
128
|
+
out = self.pool[:n]
|
|
129
|
+
self.pool = self.pool[n:]
|
|
130
|
+
return out
|
|
131
|
+
|
|
132
|
+
return np.random.choice(source, size=n, replace=self.replace)
|
|
133
|
+
|
|
134
|
+
# -------- DATAFRAME --------
|
|
135
|
+
if self._type == "df":
|
|
136
|
+
if axis not in (0, 1):
|
|
137
|
+
raise ValueError("axis must be 0 (rows) or 1 (columns)")
|
|
138
|
+
|
|
139
|
+
# Row sampling
|
|
140
|
+
if axis == 0:
|
|
141
|
+
if self.stateful:
|
|
142
|
+
if n > len(self.pool):
|
|
143
|
+
raise StopIteration("No rows left to sample")
|
|
144
|
+
out = self.pool.iloc[:n]
|
|
145
|
+
self.pool = self.pool.iloc[n:]
|
|
146
|
+
return out
|
|
147
|
+
|
|
148
|
+
return self.data.sample(n=n, replace=self.replace)
|
|
149
|
+
|
|
150
|
+
# Column sampling
|
|
151
|
+
cols = list(source.columns)
|
|
152
|
+
|
|
153
|
+
if n > len(cols):
|
|
154
|
+
raise StopIteration("No columns left to sample")
|
|
155
|
+
|
|
156
|
+
chosen = (
|
|
157
|
+
cols[:n]
|
|
158
|
+
if self.stateful
|
|
159
|
+
else random.sample(cols, n)
|
|
160
|
+
if not self.replace
|
|
161
|
+
else random.choices(cols, k=n)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
if self.stateful:
|
|
165
|
+
self.pool = self.pool.drop(columns=chosen)
|
|
166
|
+
|
|
167
|
+
return self.data[chosen]
|
|
168
|
+
|
|
169
|
+
def reset(self):
|
|
170
|
+
"""
|
|
171
|
+
Reset internal state (only for stateful sampler).
|
|
172
|
+
"""
|
|
173
|
+
if not self.stateful:
|
|
174
|
+
raise RuntimeError("reset() is only available when stateful=True")
|
|
175
|
+
self._reset_pool()
|