quicklearnkit 0.0.1__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quicklearnkit-0.1.0/LICENSE +21 -0
- quicklearnkit-0.1.0/PKG-INFO +35 -0
- quicklearnkit-0.1.0/pyproject.toml +23 -0
- quicklearnkit-0.1.0/quicklearnkit/__init__.py +2 -0
- {quicklearnkit-0.0.1 → quicklearnkit-0.1.0}/quicklearnkit/classifier.py +89 -89
- {quicklearnkit-0.0.1 → quicklearnkit-0.1.0}/quicklearnkit/quickimports.py +22 -21
- quicklearnkit-0.1.0/quicklearnkit/randomizer.py +175 -0
- {quicklearnkit-0.0.1 → quicklearnkit-0.1.0}/quicklearnkit/regressor.py +91 -91
- quicklearnkit-0.1.0/quicklearnkit/split.py +99 -0
- quicklearnkit-0.1.0/quicklearnkit/utils.py +196 -0
- quicklearnkit-0.1.0/quicklearnkit.egg-info/PKG-INFO +35 -0
- {quicklearnkit-0.0.1 → quicklearnkit-0.1.0}/quicklearnkit.egg-info/SOURCES.txt +3 -2
- {quicklearnkit-0.0.1 → quicklearnkit-0.1.0}/quicklearnkit.egg-info/requires.txt +3 -2
- {quicklearnkit-0.0.1 → quicklearnkit-0.1.0}/quicklearnkit.egg-info/top_level.txt +1 -0
- {quicklearnkit-0.0.1 → quicklearnkit-0.1.0}/setup.cfg +4 -4
- quicklearnkit-0.0.1/PKG-INFO +0 -26
- quicklearnkit-0.0.1/README.md +0 -1
- quicklearnkit-0.0.1/pyproject.toml +0 -3
- quicklearnkit-0.0.1/quicklearnkit/__init__.py +0 -2
- quicklearnkit-0.0.1/quicklearnkit/utils.py +0 -39
- quicklearnkit-0.0.1/quicklearnkit.egg-info/PKG-INFO +0 -26
- quicklearnkit-0.0.1/setup.py +0 -24
- {quicklearnkit-0.0.1 → quicklearnkit-0.1.0}/quicklearnkit.egg-info/dependency_links.txt +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Masterhazi
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quicklearnkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: QuickLearnKit: utilities for learning machine learning concepts
|
|
5
|
+
Author: Hazi Afrid
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Masterhazi
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Requires-Python: >=3.8
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: numpy
|
|
32
|
+
Requires-Dist: pandas
|
|
33
|
+
Requires-Dist: scikit-learn
|
|
34
|
+
Requires-Dist: xgboost
|
|
35
|
+
Dynamic: license-file
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "quicklearnkit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "QuickLearnKit: utilities for learning machine learning concepts"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Hazi Afrid" }
|
|
13
|
+
]
|
|
14
|
+
requires-python = ">=3.8"
|
|
15
|
+
dependencies = [
|
|
16
|
+
"numpy",
|
|
17
|
+
"pandas",
|
|
18
|
+
"scikit-learn",
|
|
19
|
+
"xgboost"
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[tool.setuptools.packages.find]
|
|
23
|
+
where = ["."]
|
|
@@ -1,90 +1,90 @@
|
|
|
1
|
-
from sklearn.linear_model import LogisticRegression as logisticregression
|
|
2
|
-
from sklearn.neighbors import KNeighborsClassifier as knnclassifier
|
|
3
|
-
from sklearn.tree import DecisionTreeClassifier as decisiontreeclassifier
|
|
4
|
-
from sklearn.ensemble import RandomForestClassifier as randomforestclassifier
|
|
5
|
-
from sklearn.ensemble import GradientBoostingClassifier as gradientboostingclassifier
|
|
6
|
-
from sklearn.ensemble import AdaBoostClassifier as adaboostclassifier
|
|
7
|
-
from xgboost import XGBClassifier as xgboostclassifier
|
|
8
|
-
from sklearn.svm import SVC as supportvectorclassifer
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class LogisticRegressionmodel:
|
|
12
|
-
def __init__(self, **kwargs):
|
|
13
|
-
self.model = logisticregression(**kwargs)
|
|
14
|
-
|
|
15
|
-
def fit(self, X,y):
|
|
16
|
-
self.model.fit(X,y)
|
|
17
|
-
|
|
18
|
-
def predict(self,X):
|
|
19
|
-
self.model.predict(X)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class KNeighborsClassifiermodel:
|
|
23
|
-
def __init__(self, **kwargs):
|
|
24
|
-
self.model = knnclassifier(**kwargs)
|
|
25
|
-
|
|
26
|
-
def fit(self, X,y):
|
|
27
|
-
self.model.fit(X,y)
|
|
28
|
-
|
|
29
|
-
def predict(self,X):
|
|
30
|
-
self.model.predict(X)
|
|
31
|
-
|
|
32
|
-
class DecisionTreeClassifiermodel:
|
|
33
|
-
def __init__(self, **kwargs):
|
|
34
|
-
self.model = decisiontreeclassifier(**kwargs)
|
|
35
|
-
|
|
36
|
-
def fit(self,X,y):
|
|
37
|
-
self.model.fit(X,y)
|
|
38
|
-
|
|
39
|
-
def predict(self, X):
|
|
40
|
-
self.model.predict(X)
|
|
41
|
-
|
|
42
|
-
class RandomForestClassifiermodel:
|
|
43
|
-
def __init__(self, **kwargs):
|
|
44
|
-
self.model = randomforestclassifier(**kwargs)
|
|
45
|
-
|
|
46
|
-
def fit(self, X,y):
|
|
47
|
-
self.model.fit(X,y)
|
|
48
|
-
|
|
49
|
-
def predict(self, X):
|
|
50
|
-
self.model.predict(X)
|
|
51
|
-
|
|
52
|
-
class GradientBoostingClassifiermodel:
|
|
53
|
-
def __init__(self, **kwargs):
|
|
54
|
-
self.model = gradientboostingclassifier(**kwargs)
|
|
55
|
-
|
|
56
|
-
def fit(self, X,y):
|
|
57
|
-
self.model.fit(X,y)
|
|
58
|
-
|
|
59
|
-
def predict(self, X):
|
|
60
|
-
self.model.predict(X)
|
|
61
|
-
|
|
62
|
-
class AdaBoostClassifiermodel:
|
|
63
|
-
def __init__(self, **kwargs):
|
|
64
|
-
self.model = adaboostclassifier(**kwargs)
|
|
65
|
-
|
|
66
|
-
def fit(self, X,y):
|
|
67
|
-
self.model.fit(X,y)
|
|
68
|
-
|
|
69
|
-
def predict(self, X):
|
|
70
|
-
self.model.predict(X)
|
|
71
|
-
|
|
72
|
-
class SVClassifiermodel:
|
|
73
|
-
def __init__(self, **kwargs):
|
|
74
|
-
self.model = supportvectorclassifer(**kwargs)
|
|
75
|
-
|
|
76
|
-
def fit(self, X,y):
|
|
77
|
-
self.model.fit(X,y)
|
|
78
|
-
|
|
79
|
-
def predict(self, X):
|
|
80
|
-
self.model.predict(X)
|
|
81
|
-
|
|
82
|
-
class XGBClassifiermodel:
|
|
83
|
-
def __init__(self, **kwargs):
|
|
84
|
-
self.model = xgboostclassifier(**kwargs)
|
|
85
|
-
|
|
86
|
-
def fit (self, X,y):
|
|
87
|
-
self.model.fit(X,y)
|
|
88
|
-
|
|
89
|
-
def predict(self, X):
|
|
1
|
+
from sklearn.linear_model import LogisticRegression as logisticregression
|
|
2
|
+
from sklearn.neighbors import KNeighborsClassifier as knnclassifier
|
|
3
|
+
from sklearn.tree import DecisionTreeClassifier as decisiontreeclassifier
|
|
4
|
+
from sklearn.ensemble import RandomForestClassifier as randomforestclassifier
|
|
5
|
+
from sklearn.ensemble import GradientBoostingClassifier as gradientboostingclassifier
|
|
6
|
+
from sklearn.ensemble import AdaBoostClassifier as adaboostclassifier
|
|
7
|
+
from xgboost import XGBClassifier as xgboostclassifier
|
|
8
|
+
from sklearn.svm import SVC as supportvectorclassifer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LogisticRegressionmodel:
|
|
12
|
+
def __init__(self, **kwargs):
|
|
13
|
+
self.model = logisticregression(**kwargs)
|
|
14
|
+
|
|
15
|
+
def fit(self, X,y):
|
|
16
|
+
self.model.fit(X,y)
|
|
17
|
+
|
|
18
|
+
def predict(self,X):
|
|
19
|
+
self.model.predict(X)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class KNeighborsClassifiermodel:
|
|
23
|
+
def __init__(self, **kwargs):
|
|
24
|
+
self.model = knnclassifier(**kwargs)
|
|
25
|
+
|
|
26
|
+
def fit(self, X,y):
|
|
27
|
+
self.model.fit(X,y)
|
|
28
|
+
|
|
29
|
+
def predict(self,X):
|
|
30
|
+
self.model.predict(X)
|
|
31
|
+
|
|
32
|
+
class DecisionTreeClassifiermodel:
|
|
33
|
+
def __init__(self, **kwargs):
|
|
34
|
+
self.model = decisiontreeclassifier(**kwargs)
|
|
35
|
+
|
|
36
|
+
def fit(self,X,y):
|
|
37
|
+
self.model.fit(X,y)
|
|
38
|
+
|
|
39
|
+
def predict(self, X):
|
|
40
|
+
self.model.predict(X)
|
|
41
|
+
|
|
42
|
+
class RandomForestClassifiermodel:
|
|
43
|
+
def __init__(self, **kwargs):
|
|
44
|
+
self.model = randomforestclassifier(**kwargs)
|
|
45
|
+
|
|
46
|
+
def fit(self, X,y):
|
|
47
|
+
self.model.fit(X,y)
|
|
48
|
+
|
|
49
|
+
def predict(self, X):
|
|
50
|
+
self.model.predict(X)
|
|
51
|
+
|
|
52
|
+
class GradientBoostingClassifiermodel:
|
|
53
|
+
def __init__(self, **kwargs):
|
|
54
|
+
self.model = gradientboostingclassifier(**kwargs)
|
|
55
|
+
|
|
56
|
+
def fit(self, X,y):
|
|
57
|
+
self.model.fit(X,y)
|
|
58
|
+
|
|
59
|
+
def predict(self, X):
|
|
60
|
+
self.model.predict(X)
|
|
61
|
+
|
|
62
|
+
class AdaBoostClassifiermodel:
|
|
63
|
+
def __init__(self, **kwargs):
|
|
64
|
+
self.model = adaboostclassifier(**kwargs)
|
|
65
|
+
|
|
66
|
+
def fit(self, X,y):
|
|
67
|
+
self.model.fit(X,y)
|
|
68
|
+
|
|
69
|
+
def predict(self, X):
|
|
70
|
+
self.model.predict(X)
|
|
71
|
+
|
|
72
|
+
class SVClassifiermodel:
|
|
73
|
+
def __init__(self, **kwargs):
|
|
74
|
+
self.model = supportvectorclassifer(**kwargs)
|
|
75
|
+
|
|
76
|
+
def fit(self, X,y):
|
|
77
|
+
self.model.fit(X,y)
|
|
78
|
+
|
|
79
|
+
def predict(self, X):
|
|
80
|
+
self.model.predict(X)
|
|
81
|
+
|
|
82
|
+
class XGBClassifiermodel:
|
|
83
|
+
def __init__(self, **kwargs):
|
|
84
|
+
self.model = xgboostclassifier(**kwargs)
|
|
85
|
+
|
|
86
|
+
def fit (self, X,y):
|
|
87
|
+
self.model.fit(X,y)
|
|
88
|
+
|
|
89
|
+
def predict(self, X):
|
|
90
90
|
self.model.fit(X)
|
|
@@ -1,21 +1,22 @@
|
|
|
1
|
-
#from .regression import LinearRegression, KNeighborsRegression, DecisionTreeRegression, RandomForestRegression, AdaBoostRegression, GradientBoostingRegression, XGBRegressor,SVR
|
|
2
|
-
#from .classifier import LogisticRegression, KNeighborsClassifier, DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, XGBClassifier, SVC
|
|
3
|
-
|
|
4
|
-
#__all__= [
|
|
5
|
-
# 'LinearRegression', ' KNeighborsRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 'XGBRegressor', 'SVR',
|
|
6
|
-
# 'LogisticRegression', 'KNeighborsClassifier', 'DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier', 'XGBClassifier', 'SVC'
|
|
7
|
-
#]
|
|
8
|
-
|
|
9
|
-
from .regressor import LinearRegressionmodel, KNNRegressionmodel, DecisionTreeRegressionmodel, RandomForestRegressionmodel, GradientBoostingRegressionmodel, AdaBoostRegressionmodel, XGBoostRegressionmodel, ElasticNetRegressionmodel
|
|
10
|
-
from .classifier import LogisticRegressionmodel, KNeighborsClassifiermodel, DecisionTreeClassifiermodel, RandomForestClassifiermodel, AdaBoostClassifiermodel, GradientBoostingClassifiermodel, XGBClassifiermodel, SVClassifiermodel
|
|
11
|
-
from .utils import create_random
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
'
|
|
16
|
-
'
|
|
17
|
-
'
|
|
18
|
-
'
|
|
19
|
-
'
|
|
20
|
-
|
|
21
|
-
|
|
1
|
+
#from .regression import LinearRegression, KNeighborsRegression, DecisionTreeRegression, RandomForestRegression, AdaBoostRegression, GradientBoostingRegression, XGBRegressor,SVR
|
|
2
|
+
#from .classifier import LogisticRegression, KNeighborsClassifier, DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, XGBClassifier, SVC
|
|
3
|
+
|
|
4
|
+
#__all__= [
|
|
5
|
+
# 'LinearRegression', ' KNeighborsRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 'XGBRegressor', 'SVR',
|
|
6
|
+
# 'LogisticRegression', 'KNeighborsClassifier', 'DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier', 'XGBClassifier', 'SVC'
|
|
7
|
+
#]
|
|
8
|
+
|
|
9
|
+
from .regressor import LinearRegressionmodel, KNNRegressionmodel, DecisionTreeRegressionmodel, RandomForestRegressionmodel, GradientBoostingRegressionmodel, AdaBoostRegressionmodel, XGBoostRegressionmodel, ElasticNetRegressionmodel
|
|
10
|
+
from .classifier import LogisticRegressionmodel, KNeighborsClassifiermodel, DecisionTreeClassifiermodel, RandomForestClassifiermodel, AdaBoostClassifiermodel, GradientBoostingClassifiermodel, XGBClassifiermodel, SVClassifiermodel
|
|
11
|
+
from .utils import create_random, ProbabilisticImputer
|
|
12
|
+
from .randomizer import Sampler
|
|
13
|
+
from .split import train_test_split
|
|
14
|
+
__all__=[
|
|
15
|
+
'LinearRegressionmodel','LogisticRegressionmodel', 'KNNRegressionmodel','GradientBoostingRegressionmodel',
|
|
16
|
+
'AdaBoostRegressionmodel', 'XGBoostRegressionmodel', 'ElasticNetRegressionmodel',
|
|
17
|
+
'DecisionTreeRegressionmodel', 'RandomForestRegressionmodel',
|
|
18
|
+
'KNeighborsClassifiermodel', 'DecisionTreeClassifiermodel', 'RandomForestClassifiermodel','AdaBoostClassifiermodel',
|
|
19
|
+
'GradientBoostingClassifiermodel', 'XGBClassifiermodel', 'SVClassifiermodel',
|
|
20
|
+
'create_random', 'Sampler', 'train_test_split', "ProbabilisticImputer",
|
|
21
|
+
]
|
|
22
|
+
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from typing import Union, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
DataType = Union[list, tuple, np.ndarray, pd.DataFrame]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Sampler:
|
|
11
|
+
"""
|
|
12
|
+
Unified random sampler supporting:
|
|
13
|
+
|
|
14
|
+
- Stateful and stateless sampling
|
|
15
|
+
- Sampling with or without replacement
|
|
16
|
+
- list, numpy.ndarray, pandas.DataFrame
|
|
17
|
+
- Row or column sampling for DataFrames
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
data : list, tuple, numpy.ndarray, or pandas.DataFrame
|
|
22
|
+
Data to sample from.
|
|
23
|
+
replace : bool, default=False
|
|
24
|
+
Whether sampling is done with replacement.
|
|
25
|
+
stateful : bool, default=False
|
|
26
|
+
If True, sampled elements are removed from future draws.
|
|
27
|
+
seed : int, optional
|
|
28
|
+
Random seed for reproducibility.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
data: DataType,
|
|
34
|
+
replace: bool = False,
|
|
35
|
+
stateful: bool = False,
|
|
36
|
+
seed: Optional[int] = None
|
|
37
|
+
):
|
|
38
|
+
self.replace = replace
|
|
39
|
+
self.stateful = stateful
|
|
40
|
+
|
|
41
|
+
if seed is not None:
|
|
42
|
+
random.seed(seed)
|
|
43
|
+
np.random.seed(seed)
|
|
44
|
+
|
|
45
|
+
# Detect data type
|
|
46
|
+
if isinstance(data, (list, tuple)):
|
|
47
|
+
self._type = "list"
|
|
48
|
+
self.data = list(data)
|
|
49
|
+
|
|
50
|
+
elif isinstance(data, np.ndarray):
|
|
51
|
+
self._type = "array"
|
|
52
|
+
self.data = data
|
|
53
|
+
|
|
54
|
+
elif isinstance(data, pd.DataFrame):
|
|
55
|
+
self._type = "df"
|
|
56
|
+
self.data = data
|
|
57
|
+
|
|
58
|
+
else:
|
|
59
|
+
raise TypeError(
|
|
60
|
+
"Unsupported data type. Use list, numpy array, or pandas DataFrame."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if self.stateful:
|
|
64
|
+
self._reset_pool()
|
|
65
|
+
|
|
66
|
+
# ---------------- INTERNAL ---------------- #
|
|
67
|
+
|
|
68
|
+
def _reset_pool(self):
|
|
69
|
+
"""Initialize or reset internal sampling pool."""
|
|
70
|
+
if self._type == "list":
|
|
71
|
+
self.pool = self.data.copy()
|
|
72
|
+
random.shuffle(self.pool)
|
|
73
|
+
|
|
74
|
+
elif self._type == "array":
|
|
75
|
+
self.pool = self.data.copy()
|
|
76
|
+
np.random.shuffle(self.pool)
|
|
77
|
+
|
|
78
|
+
elif self._type == "df":
|
|
79
|
+
self.pool = self.data.copy()
|
|
80
|
+
|
|
81
|
+
# ---------------- PUBLIC API ---------------- #
|
|
82
|
+
|
|
83
|
+
def sample(self, n: int = 1, axis: int = 0):
|
|
84
|
+
"""
|
|
85
|
+
Sample data.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
n : int, default=1
|
|
90
|
+
Number of items to sample.
|
|
91
|
+
axis : int, default=0
|
|
92
|
+
Axis to sample from when data is a DataFrame.
|
|
93
|
+
0 = rows, 1 = columns.
|
|
94
|
+
|
|
95
|
+
Returns
|
|
96
|
+
-------
|
|
97
|
+
Sampled data (same type as input).
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
if n <= 0:
|
|
101
|
+
raise ValueError("n must be a positive integer")
|
|
102
|
+
|
|
103
|
+
source = self.pool if self.stateful else self.data
|
|
104
|
+
|
|
105
|
+
# -------- LIST --------
|
|
106
|
+
if self._type == "list":
|
|
107
|
+
if self.stateful:
|
|
108
|
+
if n > len(self.pool):
|
|
109
|
+
raise StopIteration("No items left to sample")
|
|
110
|
+
out = self.pool[:n]
|
|
111
|
+
self.pool = self.pool[n:]
|
|
112
|
+
return out
|
|
113
|
+
|
|
114
|
+
if not self.replace and n > len(source):
|
|
115
|
+
raise ValueError("Cannot sample more elements than population")
|
|
116
|
+
|
|
117
|
+
return (
|
|
118
|
+
random.sample(source, n)
|
|
119
|
+
if not self.replace
|
|
120
|
+
else random.choices(source, k=n)
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# -------- NUMPY ARRAY --------
|
|
124
|
+
if self._type == "array":
|
|
125
|
+
if self.stateful:
|
|
126
|
+
if n > len(self.pool):
|
|
127
|
+
raise StopIteration("No items left to sample")
|
|
128
|
+
out = self.pool[:n]
|
|
129
|
+
self.pool = self.pool[n:]
|
|
130
|
+
return out
|
|
131
|
+
|
|
132
|
+
return np.random.choice(source, size=n, replace=self.replace)
|
|
133
|
+
|
|
134
|
+
# -------- DATAFRAME --------
|
|
135
|
+
if self._type == "df":
|
|
136
|
+
if axis not in (0, 1):
|
|
137
|
+
raise ValueError("axis must be 0 (rows) or 1 (columns)")
|
|
138
|
+
|
|
139
|
+
# Row sampling
|
|
140
|
+
if axis == 0:
|
|
141
|
+
if self.stateful:
|
|
142
|
+
if n > len(self.pool):
|
|
143
|
+
raise StopIteration("No rows left to sample")
|
|
144
|
+
out = self.pool.iloc[:n]
|
|
145
|
+
self.pool = self.pool.iloc[n:]
|
|
146
|
+
return out
|
|
147
|
+
|
|
148
|
+
return self.data.sample(n=n, replace=self.replace)
|
|
149
|
+
|
|
150
|
+
# Column sampling
|
|
151
|
+
cols = list(source.columns)
|
|
152
|
+
|
|
153
|
+
if n > len(cols):
|
|
154
|
+
raise StopIteration("No columns left to sample")
|
|
155
|
+
|
|
156
|
+
chosen = (
|
|
157
|
+
cols[:n]
|
|
158
|
+
if self.stateful
|
|
159
|
+
else random.sample(cols, n)
|
|
160
|
+
if not self.replace
|
|
161
|
+
else random.choices(cols, k=n)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
if self.stateful:
|
|
165
|
+
self.pool = self.pool.drop(columns=chosen)
|
|
166
|
+
|
|
167
|
+
return self.data[chosen]
|
|
168
|
+
|
|
169
|
+
def reset(self):
|
|
170
|
+
"""
|
|
171
|
+
Reset internal state (only for stateful sampler).
|
|
172
|
+
"""
|
|
173
|
+
if not self.stateful:
|
|
174
|
+
raise RuntimeError("reset() is only available when stateful=True")
|
|
175
|
+
self._reset_pool()
|
|
@@ -1,92 +1,92 @@
|
|
|
1
|
-
from sklearn.linear_model import LinearRegression as linearregression
|
|
2
|
-
from sklearn.neighbors import KNeighborsRegressor as knnregressor
|
|
3
|
-
from sklearn.tree import DecisionTreeRegressor as decisiontreeregressor
|
|
4
|
-
from sklearn.ensemble import RandomForestRegressor as randomforestregressor
|
|
5
|
-
from sklearn.ensemble import GradientBoostingRegressor as gradientboostingregressor
|
|
6
|
-
from sklearn.ensemble import AdaBoostRegressor as adaboostregressor
|
|
7
|
-
from xgboost import XGBRegressor as xgboostregressor
|
|
8
|
-
from sklearn.svm import SVR as supportvectorregressor
|
|
9
|
-
from sklearn.linear_model import ElasticNet as elasticnetregressor
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class LinearRegressionmodel:
|
|
14
|
-
def __init__(self, **kwargs):
|
|
15
|
-
self.model = linearregression(**kwargs)
|
|
16
|
-
|
|
17
|
-
def fit(self, X,y):
|
|
18
|
-
self.model.fit(X,y)
|
|
19
|
-
|
|
20
|
-
def predict(self,X):
|
|
21
|
-
self.model.predict(X)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class KNNRegressionmodel:
|
|
25
|
-
def __init__(self, **kwargs):
|
|
26
|
-
self.model = knnregressor(**kwargs)
|
|
27
|
-
|
|
28
|
-
def fit(self, X, y):
|
|
29
|
-
self.model.fit(X,y)
|
|
30
|
-
|
|
31
|
-
def predict(self, X):
|
|
32
|
-
self.model.predict(X)
|
|
33
|
-
|
|
34
|
-
class DecisionTreeRegressionmodel:
|
|
35
|
-
def __init__(self, **kwargs):
|
|
36
|
-
self.model = decisiontreeregressor(**kwargs)
|
|
37
|
-
|
|
38
|
-
def fit(self,X,y):
|
|
39
|
-
self.model.fit(X,y)
|
|
40
|
-
|
|
41
|
-
def predict(self, X):
|
|
42
|
-
self.model.predict(X)
|
|
43
|
-
|
|
44
|
-
class RandomForestRegressionmodel:
|
|
45
|
-
def __init__(self, **kwargs):
|
|
46
|
-
self.model = randomforestregressor(**kwargs)
|
|
47
|
-
|
|
48
|
-
def fit(self, X, y):
|
|
49
|
-
self.model.fit(X,y)
|
|
50
|
-
|
|
51
|
-
def predict(self,X):
|
|
52
|
-
self.model.predict(X)
|
|
53
|
-
|
|
54
|
-
class GradientBoostingRegressionmodel:
|
|
55
|
-
def __init__(self, **kwargs):
|
|
56
|
-
self.model = randomforestregressor(**kwargs)
|
|
57
|
-
|
|
58
|
-
def fit(self, X, y):
|
|
59
|
-
self.model.fit(X,y)
|
|
60
|
-
|
|
61
|
-
def predict(self,X):
|
|
62
|
-
self.model.predict(X)
|
|
63
|
-
|
|
64
|
-
class AdaBoostRegressionmodel:
|
|
65
|
-
def __init__(self, **kwargs):
|
|
66
|
-
self.model = adaboostregressor(**kwargs)
|
|
67
|
-
|
|
68
|
-
def fit(self, X, y):
|
|
69
|
-
self.model.fit(X,y)
|
|
70
|
-
|
|
71
|
-
def predict(self,X):
|
|
72
|
-
self.model.predict(X)
|
|
73
|
-
|
|
74
|
-
class XGBoostRegressionmodel:
|
|
75
|
-
def __init__(self, **kwargs):
|
|
76
|
-
self.model = xgboostregressor(**kwargs)
|
|
77
|
-
|
|
78
|
-
def fit(self, X, y):
|
|
79
|
-
self.model.fit(X,y)
|
|
80
|
-
|
|
81
|
-
def predict(self,X):
|
|
82
|
-
self.model.predict(X)
|
|
83
|
-
|
|
84
|
-
class ElasticNetRegressionmodel:
|
|
85
|
-
def __init__(self, **kwargs):
|
|
86
|
-
self.model = elasticnetregressor(**kwargs)
|
|
87
|
-
|
|
88
|
-
def fit(self, X, y):
|
|
89
|
-
self.model.fit(X,y)
|
|
90
|
-
|
|
91
|
-
def predict(self,X):
|
|
1
|
+
from sklearn.linear_model import LinearRegression as linearregression
|
|
2
|
+
from sklearn.neighbors import KNeighborsRegressor as knnregressor
|
|
3
|
+
from sklearn.tree import DecisionTreeRegressor as decisiontreeregressor
|
|
4
|
+
from sklearn.ensemble import RandomForestRegressor as randomforestregressor
|
|
5
|
+
from sklearn.ensemble import GradientBoostingRegressor as gradientboostingregressor
|
|
6
|
+
from sklearn.ensemble import AdaBoostRegressor as adaboostregressor
|
|
7
|
+
from xgboost import XGBRegressor as xgboostregressor
|
|
8
|
+
from sklearn.svm import SVR as supportvectorregressor
|
|
9
|
+
from sklearn.linear_model import ElasticNet as elasticnetregressor
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LinearRegressionmodel:
|
|
14
|
+
def __init__(self, **kwargs):
|
|
15
|
+
self.model = linearregression(**kwargs)
|
|
16
|
+
|
|
17
|
+
def fit(self, X,y):
|
|
18
|
+
self.model.fit(X,y)
|
|
19
|
+
|
|
20
|
+
def predict(self,X):
|
|
21
|
+
self.model.predict(X)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class KNNRegressionmodel:
|
|
25
|
+
def __init__(self, **kwargs):
|
|
26
|
+
self.model = knnregressor(**kwargs)
|
|
27
|
+
|
|
28
|
+
def fit(self, X, y):
|
|
29
|
+
self.model.fit(X,y)
|
|
30
|
+
|
|
31
|
+
def predict(self, X):
|
|
32
|
+
self.model.predict(X)
|
|
33
|
+
|
|
34
|
+
class DecisionTreeRegressionmodel:
|
|
35
|
+
def __init__(self, **kwargs):
|
|
36
|
+
self.model = decisiontreeregressor(**kwargs)
|
|
37
|
+
|
|
38
|
+
def fit(self,X,y):
|
|
39
|
+
self.model.fit(X,y)
|
|
40
|
+
|
|
41
|
+
def predict(self, X):
|
|
42
|
+
self.model.predict(X)
|
|
43
|
+
|
|
44
|
+
class RandomForestRegressionmodel:
|
|
45
|
+
def __init__(self, **kwargs):
|
|
46
|
+
self.model = randomforestregressor(**kwargs)
|
|
47
|
+
|
|
48
|
+
def fit(self, X, y):
|
|
49
|
+
self.model.fit(X,y)
|
|
50
|
+
|
|
51
|
+
def predict(self,X):
|
|
52
|
+
self.model.predict(X)
|
|
53
|
+
|
|
54
|
+
class GradientBoostingRegressionmodel:
|
|
55
|
+
def __init__(self, **kwargs):
|
|
56
|
+
self.model = randomforestregressor(**kwargs)
|
|
57
|
+
|
|
58
|
+
def fit(self, X, y):
|
|
59
|
+
self.model.fit(X,y)
|
|
60
|
+
|
|
61
|
+
def predict(self,X):
|
|
62
|
+
self.model.predict(X)
|
|
63
|
+
|
|
64
|
+
class AdaBoostRegressionmodel:
|
|
65
|
+
def __init__(self, **kwargs):
|
|
66
|
+
self.model = adaboostregressor(**kwargs)
|
|
67
|
+
|
|
68
|
+
def fit(self, X, y):
|
|
69
|
+
self.model.fit(X,y)
|
|
70
|
+
|
|
71
|
+
def predict(self,X):
|
|
72
|
+
self.model.predict(X)
|
|
73
|
+
|
|
74
|
+
class XGBoostRegressionmodel:
|
|
75
|
+
def __init__(self, **kwargs):
|
|
76
|
+
self.model = xgboostregressor(**kwargs)
|
|
77
|
+
|
|
78
|
+
def fit(self, X, y):
|
|
79
|
+
self.model.fit(X,y)
|
|
80
|
+
|
|
81
|
+
def predict(self,X):
|
|
82
|
+
self.model.predict(X)
|
|
83
|
+
|
|
84
|
+
class ElasticNetRegressionmodel:
|
|
85
|
+
def __init__(self, **kwargs):
|
|
86
|
+
self.model = elasticnetregressor(**kwargs)
|
|
87
|
+
|
|
88
|
+
def fit(self, X, y):
|
|
89
|
+
self.model.fit(X,y)
|
|
90
|
+
|
|
91
|
+
def predict(self,X):
|
|
92
92
|
self.model.predict(X)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from typing import Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
ArrayLike = Union[np.ndarray, pd.DataFrame]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def train_test_split(
|
|
10
|
+
X: ArrayLike,
|
|
11
|
+
y: Optional[np.ndarray] = None,
|
|
12
|
+
test_size: float = 0.25,
|
|
13
|
+
shuffle: bool = True,
|
|
14
|
+
stratify: Optional[np.ndarray] = None,
|
|
15
|
+
random_state: Optional[int] = None
|
|
16
|
+
) -> Tuple:
|
|
17
|
+
"""
|
|
18
|
+
Split data into train and test subsets.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
X : numpy.ndarray or pandas.DataFrame
|
|
23
|
+
Feature data to split.
|
|
24
|
+
y : numpy.ndarray, optional
|
|
25
|
+
Target labels corresponding to X.
|
|
26
|
+
test_size : float, default=0.25
|
|
27
|
+
Proportion of the dataset to include in the test split.
|
|
28
|
+
shuffle : bool, default=True
|
|
29
|
+
Whether to shuffle data before splitting.
|
|
30
|
+
stratify : numpy.ndarray, optional
|
|
31
|
+
Class labels for stratified split (classification only).
|
|
32
|
+
random_state : int, optional
|
|
33
|
+
Seed for reproducibility.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
X_train, X_test : same type as X
|
|
38
|
+
Split feature data.
|
|
39
|
+
y_train, y_test : numpy.ndarray, optional
|
|
40
|
+
Split target labels (only if y is provided).
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
if not 0 < test_size < 1:
|
|
44
|
+
raise ValueError("test_size must be between 0 and 1")
|
|
45
|
+
|
|
46
|
+
if random_state is not None:
|
|
47
|
+
np.random.seed(random_state)
|
|
48
|
+
|
|
49
|
+
n_samples = len(X)
|
|
50
|
+
indices = np.arange(n_samples)
|
|
51
|
+
|
|
52
|
+
# ---------- STRATIFIED SPLIT ----------
|
|
53
|
+
if stratify is not None:
|
|
54
|
+
if y is None:
|
|
55
|
+
raise ValueError("y must be provided when using stratify")
|
|
56
|
+
|
|
57
|
+
if not shuffle:
|
|
58
|
+
raise ValueError("Stratified split requires shuffle=True")
|
|
59
|
+
|
|
60
|
+
stratify = np.asarray(stratify)
|
|
61
|
+
|
|
62
|
+
train_idx = []
|
|
63
|
+
test_idx = []
|
|
64
|
+
|
|
65
|
+
for cls in np.unique(stratify):
|
|
66
|
+
cls_indices = indices[stratify == cls]
|
|
67
|
+
np.random.shuffle(cls_indices)
|
|
68
|
+
|
|
69
|
+
split = int(len(cls_indices) * (1 - test_size))
|
|
70
|
+
train_idx.extend(cls_indices[:split])
|
|
71
|
+
test_idx.extend(cls_indices[split:])
|
|
72
|
+
|
|
73
|
+
train_idx = np.array(train_idx)
|
|
74
|
+
test_idx = np.array(test_idx)
|
|
75
|
+
|
|
76
|
+
# ---------- STANDARD SPLIT ----------
|
|
77
|
+
else:
|
|
78
|
+
if shuffle:
|
|
79
|
+
np.random.shuffle(indices)
|
|
80
|
+
|
|
81
|
+
split = int(n_samples * (1 - test_size))
|
|
82
|
+
train_idx, test_idx = indices[:split], indices[split:]
|
|
83
|
+
|
|
84
|
+
# ---------- APPLY INDICES ----------
|
|
85
|
+
if isinstance(X, pd.DataFrame):
|
|
86
|
+
X_train = X.iloc[train_idx]
|
|
87
|
+
X_test = X.iloc[test_idx]
|
|
88
|
+
else:
|
|
89
|
+
X_train = X[train_idx]
|
|
90
|
+
X_test = X[test_idx]
|
|
91
|
+
|
|
92
|
+
if y is None:
|
|
93
|
+
return X_train, X_test
|
|
94
|
+
|
|
95
|
+
y = np.asarray(y)
|
|
96
|
+
y_train = y[train_idx]
|
|
97
|
+
y_test = y[test_idx]
|
|
98
|
+
|
|
99
|
+
return X_train, X_test, y_train, y_test
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def create_random(mean, std, size, random_state=None):
|
|
4
|
+
"""
|
|
5
|
+
Generate random data with a specified mean and standard deviation.
|
|
6
|
+
|
|
7
|
+
Parameters:
|
|
8
|
+
mean (float): Desired mean of the data.
|
|
9
|
+
std (float): Desired standard deviation of the data.
|
|
10
|
+
size (int): Length of the data to generate.
|
|
11
|
+
random_state (int, optional): Seed for reproducibility. Defaults to None.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
dict: A dictionary containing:
|
|
15
|
+
- "data": Random data with the specified mean and standard deviation.
|
|
16
|
+
- "mean": Actual mean of the generated data.
|
|
17
|
+
- "std": Actual standard deviation of the generated data.
|
|
18
|
+
|
|
19
|
+
Raises:
|
|
20
|
+
ValueError: If std is negative or size is not a positive integer.
|
|
21
|
+
"""
|
|
22
|
+
if std < 0:
|
|
23
|
+
raise ValueError("Standard deviation must be non-negative.")
|
|
24
|
+
if size <= 0:
|
|
25
|
+
raise ValueError("Size must be a positive integer.")
|
|
26
|
+
|
|
27
|
+
# Create a random number generator instance
|
|
28
|
+
rng = np.random.default_rng(random_state)
|
|
29
|
+
|
|
30
|
+
# Generate random normal data
|
|
31
|
+
x = rng.normal(size=size)
|
|
32
|
+
x1 = (x - np.mean(x)) / np.std(x)
|
|
33
|
+
x2 = (x1 * std) + mean
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
"data": x2,
|
|
37
|
+
"mean": np.mean(x2),
|
|
38
|
+
"std": np.std(x2)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
import numpy as np
|
|
43
|
+
import pandas as pd
|
|
44
|
+
from typing import Optional, Dict
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ProbabilisticImputer:
|
|
48
|
+
"""
|
|
49
|
+
Probabilistic, group-aware categorical imputer.
|
|
50
|
+
|
|
51
|
+
Learns conditional probability distributions during `fit()`
|
|
52
|
+
and samples missing values during `transform()`.
|
|
53
|
+
|
|
54
|
+
Default behavior is stateless (fully reproducible).
|
|
55
|
+
If stateful=True, RNG state advances across calls for
|
|
56
|
+
simulation / data augmentation workflows.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
group_col : str
|
|
61
|
+
Column name used to group data (e.g. class, category, segment).
|
|
62
|
+
target_col : str
|
|
63
|
+
Column name to impute.
|
|
64
|
+
random_state : int, optional
|
|
65
|
+
Seed for reproducible randomness.
|
|
66
|
+
stateful : bool, default=False
|
|
67
|
+
If True, RNG state advances across multiple transform calls.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
group_col: str,
|
|
73
|
+
target_col: str,
|
|
74
|
+
random_state: Optional[int] = None,
|
|
75
|
+
stateful: bool = False
|
|
76
|
+
):
|
|
77
|
+
self.group_col = group_col
|
|
78
|
+
self.target_col = target_col
|
|
79
|
+
self.random_state = random_state
|
|
80
|
+
self.stateful = stateful
|
|
81
|
+
|
|
82
|
+
self._fitted = False
|
|
83
|
+
self._dist_map: Dict = {}
|
|
84
|
+
|
|
85
|
+
self._init_rng()
|
|
86
|
+
|
|
87
|
+
# ---------------- INTERNAL ---------------- #
|
|
88
|
+
|
|
89
|
+
def _init_rng(self):
|
|
90
|
+
"""Initialize or reset the random number generator."""
|
|
91
|
+
self.rng = np.random.default_rng(self.random_state)
|
|
92
|
+
|
|
93
|
+
# ---------------- PUBLIC API ---------------- #
|
|
94
|
+
|
|
95
|
+
def fit(self, df: pd.DataFrame):
|
|
96
|
+
"""
|
|
97
|
+
Learn per-group probability distributions from observed data.
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
df : pandas.DataFrame
|
|
102
|
+
DataFrame containing group and target columns.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
self
|
|
107
|
+
"""
|
|
108
|
+
if not isinstance(df, pd.DataFrame):
|
|
109
|
+
raise TypeError("Input must be a pandas DataFrame")
|
|
110
|
+
|
|
111
|
+
if self.group_col not in df.columns or self.target_col not in df.columns:
|
|
112
|
+
raise ValueError("group_col and target_col must exist in DataFrame")
|
|
113
|
+
|
|
114
|
+
self._dist_map.clear()
|
|
115
|
+
|
|
116
|
+
# Build per-group distributions
|
|
117
|
+
grouped = df.dropna(subset=[self.target_col]).groupby(self.group_col)
|
|
118
|
+
|
|
119
|
+
for group, gdf in grouped:
|
|
120
|
+
probs = (
|
|
121
|
+
gdf[self.target_col]
|
|
122
|
+
.value_counts(normalize=True)
|
|
123
|
+
.to_dict()
|
|
124
|
+
)
|
|
125
|
+
self._dist_map[group] = probs
|
|
126
|
+
|
|
127
|
+
# Global fallback distribution
|
|
128
|
+
self._global_dist = (
|
|
129
|
+
df[self.target_col]
|
|
130
|
+
.dropna()
|
|
131
|
+
.value_counts(normalize=True)
|
|
132
|
+
.to_dict()
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
self._fitted = True
|
|
136
|
+
return self
|
|
137
|
+
|
|
138
|
+
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
139
|
+
"""
|
|
140
|
+
Impute missing values using learned distributions.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
df : pandas.DataFrame
|
|
145
|
+
DataFrame to transform.
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
pandas.DataFrame
|
|
150
|
+
New DataFrame with missing values imputed.
|
|
151
|
+
"""
|
|
152
|
+
if not self._fitted:
|
|
153
|
+
raise RuntimeError("Must call fit() before transform()")
|
|
154
|
+
|
|
155
|
+
if not isinstance(df, pd.DataFrame):
|
|
156
|
+
raise TypeError("Input must be a pandas DataFrame")
|
|
157
|
+
|
|
158
|
+
out = df.copy()
|
|
159
|
+
missing_mask = out[self.target_col].isna()
|
|
160
|
+
|
|
161
|
+
for idx in out[missing_mask].index:
|
|
162
|
+
group = out.at[idx, self.group_col]
|
|
163
|
+
|
|
164
|
+
# Get group distribution or fallback to global
|
|
165
|
+
dist = self._dist_map.get(group, self._global_dist)
|
|
166
|
+
|
|
167
|
+
if not dist:
|
|
168
|
+
continue # Nothing to sample from
|
|
169
|
+
|
|
170
|
+
choices = list(dist.keys())
|
|
171
|
+
probs = list(dist.values())
|
|
172
|
+
|
|
173
|
+
out.at[idx, self.target_col] = self.rng.choice(
|
|
174
|
+
choices,
|
|
175
|
+
p=probs
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Reset RNG if stateless (default behavior)
|
|
179
|
+
if not self.stateful:
|
|
180
|
+
self._init_rng()
|
|
181
|
+
|
|
182
|
+
return out
|
|
183
|
+
|
|
184
|
+
def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
185
|
+
"""
|
|
186
|
+
Fit and transform in one step.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
df : pandas.DataFrame
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
pandas.DataFrame
|
|
195
|
+
"""
|
|
196
|
+
return self.fit(df).transform(df)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quicklearnkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: QuickLearnKit: utilities for learning machine learning concepts
|
|
5
|
+
Author: Hazi Afrid
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Masterhazi
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Requires-Python: >=3.8
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: numpy
|
|
32
|
+
Requires-Dist: pandas
|
|
33
|
+
Requires-Dist: scikit-learn
|
|
34
|
+
Requires-Dist: xgboost
|
|
35
|
+
Dynamic: license-file
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
LICENSE
|
|
2
2
|
pyproject.toml
|
|
3
|
-
setup.py
|
|
4
3
|
quicklearnkit/__init__.py
|
|
5
4
|
quicklearnkit/classifier.py
|
|
6
5
|
quicklearnkit/quickimports.py
|
|
6
|
+
quicklearnkit/randomizer.py
|
|
7
7
|
quicklearnkit/regressor.py
|
|
8
|
+
quicklearnkit/split.py
|
|
8
9
|
quicklearnkit/utils.py
|
|
9
10
|
quicklearnkit.egg-info/PKG-INFO
|
|
10
11
|
quicklearnkit.egg-info/SOURCES.txt
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
[egg_info]
|
|
2
|
-
tag_build =
|
|
3
|
-
tag_date = 0
|
|
4
|
-
|
|
1
|
+
[egg_info]
|
|
2
|
+
tag_build =
|
|
3
|
+
tag_date = 0
|
|
4
|
+
|
quicklearnkit-0.0.1/PKG-INFO
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: quicklearnkit
|
|
3
|
-
Version: 0.0.1
|
|
4
|
-
Summary: A simplified interface for machine learning algorithms.
|
|
5
|
-
Home-page: https://github.com/yourusername/quicklearn
|
|
6
|
-
Author: hazi
|
|
7
|
-
Author-email: hajiafribaba@gmail.com
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.6
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
Requires-Dist: scikit-learn
|
|
14
|
-
Requires-Dist: pandas
|
|
15
|
-
Requires-Dist: numpy
|
|
16
|
-
Dynamic: author
|
|
17
|
-
Dynamic: author-email
|
|
18
|
-
Dynamic: classifier
|
|
19
|
-
Dynamic: description
|
|
20
|
-
Dynamic: description-content-type
|
|
21
|
-
Dynamic: home-page
|
|
22
|
-
Dynamic: requires-dist
|
|
23
|
-
Dynamic: requires-python
|
|
24
|
-
Dynamic: summary
|
|
25
|
-
|
|
26
|
-
This is an upcoming wrapper library for machine learning beginners and all the enthusisasts out there who want to reduce the time taken for projects, this library works on simple principle and it is gonna save lot of time in upcoming future
|
quicklearnkit-0.0.1/README.md
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
This is an upcoming wrapper library for machine learning beginners and all the enthusisasts out there who want to reduce the time taken for projects, this library works on simple principle and it is gonna save lot of time in upcoming future
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
def create_random(mean, std, size, random_state=None):
|
|
4
|
-
"""
|
|
5
|
-
Generate random data with a specified mean and standard deviation.
|
|
6
|
-
|
|
7
|
-
Parameters:
|
|
8
|
-
mean (float): Desired mean of the data.
|
|
9
|
-
std (float): Desired standard deviation of the data.
|
|
10
|
-
size (int): Length of the data to generate.
|
|
11
|
-
random_state (int, optional): Seed for reproducibility. Defaults to None.
|
|
12
|
-
|
|
13
|
-
Returns:
|
|
14
|
-
dict: A dictionary containing:
|
|
15
|
-
- "data": Random data with the specified mean and standard deviation.
|
|
16
|
-
- "mean": Actual mean of the generated data.
|
|
17
|
-
- "std": Actual standard deviation of the generated data.
|
|
18
|
-
|
|
19
|
-
Raises:
|
|
20
|
-
ValueError: If std is negative or size is not a positive integer.
|
|
21
|
-
"""
|
|
22
|
-
if std < 0:
|
|
23
|
-
raise ValueError("Standard deviation must be non-negative.")
|
|
24
|
-
if size <= 0:
|
|
25
|
-
raise ValueError("Size must be a positive integer.")
|
|
26
|
-
|
|
27
|
-
# Create a random number generator instance
|
|
28
|
-
rng = np.random.default_rng(random_state)
|
|
29
|
-
|
|
30
|
-
# Generate random normal data
|
|
31
|
-
x = rng.normal(size=size)
|
|
32
|
-
x1 = (x - np.mean(x)) / np.std(x)
|
|
33
|
-
x2 = (x1 * std) + mean
|
|
34
|
-
|
|
35
|
-
return {
|
|
36
|
-
"data": x2,
|
|
37
|
-
"mean": np.mean(x2),
|
|
38
|
-
"std": np.std(x2)
|
|
39
|
-
}
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: quicklearnkit
|
|
3
|
-
Version: 0.0.1
|
|
4
|
-
Summary: A simplified interface for machine learning algorithms.
|
|
5
|
-
Home-page: https://github.com/yourusername/quicklearn
|
|
6
|
-
Author: hazi
|
|
7
|
-
Author-email: hajiafribaba@gmail.com
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.6
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
Requires-Dist: scikit-learn
|
|
14
|
-
Requires-Dist: pandas
|
|
15
|
-
Requires-Dist: numpy
|
|
16
|
-
Dynamic: author
|
|
17
|
-
Dynamic: author-email
|
|
18
|
-
Dynamic: classifier
|
|
19
|
-
Dynamic: description
|
|
20
|
-
Dynamic: description-content-type
|
|
21
|
-
Dynamic: home-page
|
|
22
|
-
Dynamic: requires-dist
|
|
23
|
-
Dynamic: requires-python
|
|
24
|
-
Dynamic: summary
|
|
25
|
-
|
|
26
|
-
This is an upcoming wrapper library for machine learning beginners and all the enthusisasts out there who want to reduce the time taken for projects, this library works on simple principle and it is gonna save lot of time in upcoming future
|
quicklearnkit-0.0.1/setup.py
DELETED
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
from setuptools import setup, find_packages
|
|
2
|
-
|
|
3
|
-
setup(
|
|
4
|
-
name="quicklearnkit", # Package name
|
|
5
|
-
version="0.0.1", # Initial version
|
|
6
|
-
author="hazi", # Your name
|
|
7
|
-
author_email="hajiafribaba@gmail.com", # Your email
|
|
8
|
-
description="A simplified interface for machine learning algorithms.", # Short description
|
|
9
|
-
long_description=open("README.md").read(), # Long description from README
|
|
10
|
-
long_description_content_type="text/markdown", # Format of the long description
|
|
11
|
-
url="https://github.com/yourusername/quicklearn", # Project URL
|
|
12
|
-
packages=find_packages(), # Automatically find all packages
|
|
13
|
-
install_requires=[ # List your dependencies here
|
|
14
|
-
"scikit-learn",
|
|
15
|
-
"pandas",
|
|
16
|
-
"numpy",
|
|
17
|
-
],
|
|
18
|
-
classifiers=[ # Metadata for PyPI
|
|
19
|
-
"Programming Language :: Python :: 3",
|
|
20
|
-
"License :: OSI Approved :: MIT License",
|
|
21
|
-
"Operating System :: OS Independent",
|
|
22
|
-
],
|
|
23
|
-
python_requires=">=3.6", # Python version compatibility
|
|
24
|
-
)
|
|
File without changes
|