quicklearnkit 0.0.1__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Masterhazi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,35 @@
1
+ Metadata-Version: 2.4
2
+ Name: quicklearnkit
3
+ Version: 0.1.0
4
+ Summary: QuickLearnKit: utilities for learning machine learning concepts
5
+ Author: Hazi Afrid
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Masterhazi
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Requires-Python: >=3.8
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: numpy
32
+ Requires-Dist: pandas
33
+ Requires-Dist: scikit-learn
34
+ Requires-Dist: xgboost
35
+ Dynamic: license-file
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "quicklearnkit"
7
+ version = "0.1.0"
8
+ description = "QuickLearnKit: utilities for learning machine learning concepts"
9
+ readme = "README.md"
10
+ license = { file = "LICENSE" }
11
+ authors = [
12
+ { name = "Hazi Afrid" }
13
+ ]
14
+ requires-python = ">=3.8"
15
+ dependencies = [
16
+ "numpy",
17
+ "pandas",
18
+ "scikit-learn",
19
+ "xgboost"
20
+ ]
21
+
22
+ [tool.setuptools.packages.find]
23
+ where = ["."]
@@ -0,0 +1,2 @@
1
+ from .quickimports import *
2
+ from .randomizer import Sampler
@@ -1,90 +1,90 @@
1
- from sklearn.linear_model import LogisticRegression as logisticregression
2
- from sklearn.neighbors import KNeighborsClassifier as knnclassifier
3
- from sklearn.tree import DecisionTreeClassifier as decisiontreeclassifier
4
- from sklearn.ensemble import RandomForestClassifier as randomforestclassifier
5
- from sklearn.ensemble import GradientBoostingClassifier as gradientboostingclassifier
6
- from sklearn.ensemble import AdaBoostClassifier as adaboostclassifier
7
- from xgboost import XGBClassifier as xgboostclassifier
8
- from sklearn.svm import SVC as supportvectorclassifer
9
-
10
-
11
- class LogisticRegressionmodel:
12
- def __init__(self, **kwargs):
13
- self.model = logisticregression(**kwargs)
14
-
15
- def fit(self, X,y):
16
- self.model.fit(X,y)
17
-
18
- def predict(self,X):
19
- self.model.predict(X)
20
-
21
-
22
- class KNeighborsClassifiermodel:
23
- def __init__(self, **kwargs):
24
- self.model = knnclassifier(**kwargs)
25
-
26
- def fit(self, X,y):
27
- self.model.fit(X,y)
28
-
29
- def predict(self,X):
30
- self.model.predict(X)
31
-
32
- class DecisionTreeClassifiermodel:
33
- def __init__(self, **kwargs):
34
- self.model = decisiontreeclassifier(**kwargs)
35
-
36
- def fit(self,X,y):
37
- self.model.fit(X,y)
38
-
39
- def predict(self, X):
40
- self.model.predict(X)
41
-
42
- class RandomForestClassifiermodel:
43
- def __init__(self, **kwargs):
44
- self.model = randomforestclassifier(**kwargs)
45
-
46
- def fit(self, X,y):
47
- self.model.fit(X,y)
48
-
49
- def predict(self, X):
50
- self.model.predict(X)
51
-
52
- class GradientBoostingClassifiermodel:
53
- def __init__(self, **kwargs):
54
- self.model = gradientboostingclassifier(**kwargs)
55
-
56
- def fit(self, X,y):
57
- self.model.fit(X,y)
58
-
59
- def predict(self, X):
60
- self.model.predict(X)
61
-
62
- class AdaBoostClassifiermodel:
63
- def __init__(self, **kwargs):
64
- self.model = adaboostclassifier(**kwargs)
65
-
66
- def fit(self, X,y):
67
- self.model.fit(X,y)
68
-
69
- def predict(self, X):
70
- self.model.predict(X)
71
-
72
- class SVClassifiermodel:
73
- def __init__(self, **kwargs):
74
- self.model = supportvectorclassifer(**kwargs)
75
-
76
- def fit(self, X,y):
77
- self.model.fit(X,y)
78
-
79
- def predict(self, X):
80
- self.model.predict(X)
81
-
82
- class XGBClassifiermodel:
83
- def __init__(self, **kwargs):
84
- self.model = xgboostclassifier(**kwargs)
85
-
86
- def fit (self, X,y):
87
- self.model.fit(X,y)
88
-
89
- def predict(self, X):
1
+ from sklearn.linear_model import LogisticRegression as logisticregression
2
+ from sklearn.neighbors import KNeighborsClassifier as knnclassifier
3
+ from sklearn.tree import DecisionTreeClassifier as decisiontreeclassifier
4
+ from sklearn.ensemble import RandomForestClassifier as randomforestclassifier
5
+ from sklearn.ensemble import GradientBoostingClassifier as gradientboostingclassifier
6
+ from sklearn.ensemble import AdaBoostClassifier as adaboostclassifier
7
+ from xgboost import XGBClassifier as xgboostclassifier
8
+ from sklearn.svm import SVC as supportvectorclassifer
9
+
10
+
11
+ class LogisticRegressionmodel:
12
+ def __init__(self, **kwargs):
13
+ self.model = logisticregression(**kwargs)
14
+
15
+ def fit(self, X,y):
16
+ self.model.fit(X,y)
17
+
18
+ def predict(self,X):
19
+ self.model.predict(X)
20
+
21
+
22
+ class KNeighborsClassifiermodel:
23
+ def __init__(self, **kwargs):
24
+ self.model = knnclassifier(**kwargs)
25
+
26
+ def fit(self, X,y):
27
+ self.model.fit(X,y)
28
+
29
+ def predict(self,X):
30
+ self.model.predict(X)
31
+
32
+ class DecisionTreeClassifiermodel:
33
+ def __init__(self, **kwargs):
34
+ self.model = decisiontreeclassifier(**kwargs)
35
+
36
+ def fit(self,X,y):
37
+ self.model.fit(X,y)
38
+
39
+ def predict(self, X):
40
+ self.model.predict(X)
41
+
42
+ class RandomForestClassifiermodel:
43
+ def __init__(self, **kwargs):
44
+ self.model = randomforestclassifier(**kwargs)
45
+
46
+ def fit(self, X,y):
47
+ self.model.fit(X,y)
48
+
49
+ def predict(self, X):
50
+ self.model.predict(X)
51
+
52
+ class GradientBoostingClassifiermodel:
53
+ def __init__(self, **kwargs):
54
+ self.model = gradientboostingclassifier(**kwargs)
55
+
56
+ def fit(self, X,y):
57
+ self.model.fit(X,y)
58
+
59
+ def predict(self, X):
60
+ self.model.predict(X)
61
+
62
+ class AdaBoostClassifiermodel:
63
+ def __init__(self, **kwargs):
64
+ self.model = adaboostclassifier(**kwargs)
65
+
66
+ def fit(self, X,y):
67
+ self.model.fit(X,y)
68
+
69
+ def predict(self, X):
70
+ self.model.predict(X)
71
+
72
+ class SVClassifiermodel:
73
+ def __init__(self, **kwargs):
74
+ self.model = supportvectorclassifer(**kwargs)
75
+
76
+ def fit(self, X,y):
77
+ self.model.fit(X,y)
78
+
79
+ def predict(self, X):
80
+ self.model.predict(X)
81
+
82
+ class XGBClassifiermodel:
83
+ def __init__(self, **kwargs):
84
+ self.model = xgboostclassifier(**kwargs)
85
+
86
+ def fit (self, X,y):
87
+ self.model.fit(X,y)
88
+
89
+ def predict(self, X):
90
90
  self.model.fit(X)
@@ -1,21 +1,22 @@
1
- #from .regression import LinearRegression, KNeighborsRegression, DecisionTreeRegression, RandomForestRegression, AdaBoostRegression, GradientBoostingRegression, XGBRegressor,SVR
2
- #from .classifier import LogisticRegression, KNeighborsClassifier, DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, XGBClassifier, SVC
3
-
4
- #__all__= [
5
- # 'LinearRegression', ' KNeighborsRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 'XGBRegressor', 'SVR',
6
- # 'LogisticRegression', 'KNeighborsClassifier', 'DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier', 'XGBClassifier', 'SVC'
7
- #]
8
-
9
- from .regressor import LinearRegressionmodel, KNNRegressionmodel, DecisionTreeRegressionmodel, RandomForestRegressionmodel, GradientBoostingRegressionmodel, AdaBoostRegressionmodel, XGBoostRegressionmodel, ElasticNetRegressionmodel
10
- from .classifier import LogisticRegressionmodel, KNeighborsClassifiermodel, DecisionTreeClassifiermodel, RandomForestClassifiermodel, AdaBoostClassifiermodel, GradientBoostingClassifiermodel, XGBClassifiermodel, SVClassifiermodel
11
- from .utils import create_random
12
-
13
- __all__=[
14
- 'LinearRegressionmodel','LogisticRegressionmodel', 'KNNRegressionmodel','GradientBoostingRegressionmodel',
15
- 'AdaBoostRegressionmodel', 'XGBoostRegressionmodel', 'ElasticNetRegressionmodel',
16
- 'DecisionTreeRegressionmodel', 'RandomForestRegressionmodel',
17
- 'KNeighborsClassifiermodel', 'DecisionTreeClassifiermodel', 'RandomForestClassifiermodel','AdaBoostClassifiermodel',
18
- 'GradientBoostingClassifiermodel', 'XGBClassifiermodel', 'SVClassifiermodel',
19
- 'create_random'
20
- ]
21
-
1
+ #from .regression import LinearRegression, KNeighborsRegression, DecisionTreeRegression, RandomForestRegression, AdaBoostRegression, GradientBoostingRegression, XGBRegressor,SVR
2
+ #from .classifier import LogisticRegression, KNeighborsClassifier, DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, XGBClassifier, SVC
3
+
4
+ #__all__= [
5
+ # 'LinearRegression', ' KNeighborsRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 'XGBRegressor', 'SVR',
6
+ # 'LogisticRegression', 'KNeighborsClassifier', 'DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier', 'XGBClassifier', 'SVC'
7
+ #]
8
+
9
+ from .regressor import LinearRegressionmodel, KNNRegressionmodel, DecisionTreeRegressionmodel, RandomForestRegressionmodel, GradientBoostingRegressionmodel, AdaBoostRegressionmodel, XGBoostRegressionmodel, ElasticNetRegressionmodel
10
+ from .classifier import LogisticRegressionmodel, KNeighborsClassifiermodel, DecisionTreeClassifiermodel, RandomForestClassifiermodel, AdaBoostClassifiermodel, GradientBoostingClassifiermodel, XGBClassifiermodel, SVClassifiermodel
11
+ from .utils import create_random, ProbabilisticImputer
12
+ from .randomizer import Sampler
13
+ from .split import train_test_split
14
+ __all__=[
15
+ 'LinearRegressionmodel','LogisticRegressionmodel', 'KNNRegressionmodel','GradientBoostingRegressionmodel',
16
+ 'AdaBoostRegressionmodel', 'XGBoostRegressionmodel', 'ElasticNetRegressionmodel',
17
+ 'DecisionTreeRegressionmodel', 'RandomForestRegressionmodel',
18
+ 'KNeighborsClassifiermodel', 'DecisionTreeClassifiermodel', 'RandomForestClassifiermodel','AdaBoostClassifiermodel',
19
+ 'GradientBoostingClassifiermodel', 'XGBClassifiermodel', 'SVClassifiermodel',
20
+ 'create_random', 'Sampler', 'train_test_split', "ProbabilisticImputer",
21
+ ]
22
+
@@ -0,0 +1,175 @@
1
+ import random
2
+ import numpy as np
3
+ import pandas as pd
4
+ from typing import Union, Optional
5
+
6
+
7
+ DataType = Union[list, tuple, np.ndarray, pd.DataFrame]
8
+
9
+
10
+ class Sampler:
11
+ """
12
+ Unified random sampler supporting:
13
+
14
+ - Stateful and stateless sampling
15
+ - Sampling with or without replacement
16
+ - list, numpy.ndarray, pandas.DataFrame
17
+ - Row or column sampling for DataFrames
18
+
19
+ Parameters
20
+ ----------
21
+ data : list, tuple, numpy.ndarray, or pandas.DataFrame
22
+ Data to sample from.
23
+ replace : bool, default=False
24
+ Whether sampling is done with replacement.
25
+ stateful : bool, default=False
26
+ If True, sampled elements are removed from future draws.
27
+ seed : int, optional
28
+ Random seed for reproducibility.
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ data: DataType,
34
+ replace: bool = False,
35
+ stateful: bool = False,
36
+ seed: Optional[int] = None
37
+ ):
38
+ self.replace = replace
39
+ self.stateful = stateful
40
+
41
+ if seed is not None:
42
+ random.seed(seed)
43
+ np.random.seed(seed)
44
+
45
+ # Detect data type
46
+ if isinstance(data, (list, tuple)):
47
+ self._type = "list"
48
+ self.data = list(data)
49
+
50
+ elif isinstance(data, np.ndarray):
51
+ self._type = "array"
52
+ self.data = data
53
+
54
+ elif isinstance(data, pd.DataFrame):
55
+ self._type = "df"
56
+ self.data = data
57
+
58
+ else:
59
+ raise TypeError(
60
+ "Unsupported data type. Use list, numpy array, or pandas DataFrame."
61
+ )
62
+
63
+ if self.stateful:
64
+ self._reset_pool()
65
+
66
+ # ---------------- INTERNAL ---------------- #
67
+
68
+ def _reset_pool(self):
69
+ """Initialize or reset internal sampling pool."""
70
+ if self._type == "list":
71
+ self.pool = self.data.copy()
72
+ random.shuffle(self.pool)
73
+
74
+ elif self._type == "array":
75
+ self.pool = self.data.copy()
76
+ np.random.shuffle(self.pool)
77
+
78
+ elif self._type == "df":
79
+ self.pool = self.data.copy()
80
+
81
+ # ---------------- PUBLIC API ---------------- #
82
+
83
+ def sample(self, n: int = 1, axis: int = 0):
84
+ """
85
+ Sample data.
86
+
87
+ Parameters
88
+ ----------
89
+ n : int, default=1
90
+ Number of items to sample.
91
+ axis : int, default=0
92
+ Axis to sample from when data is a DataFrame.
93
+ 0 = rows, 1 = columns.
94
+
95
+ Returns
96
+ -------
97
+ Sampled data (same type as input).
98
+ """
99
+
100
+ if n <= 0:
101
+ raise ValueError("n must be a positive integer")
102
+
103
+ source = self.pool if self.stateful else self.data
104
+
105
+ # -------- LIST --------
106
+ if self._type == "list":
107
+ if self.stateful:
108
+ if n > len(self.pool):
109
+ raise StopIteration("No items left to sample")
110
+ out = self.pool[:n]
111
+ self.pool = self.pool[n:]
112
+ return out
113
+
114
+ if not self.replace and n > len(source):
115
+ raise ValueError("Cannot sample more elements than population")
116
+
117
+ return (
118
+ random.sample(source, n)
119
+ if not self.replace
120
+ else random.choices(source, k=n)
121
+ )
122
+
123
+ # -------- NUMPY ARRAY --------
124
+ if self._type == "array":
125
+ if self.stateful:
126
+ if n > len(self.pool):
127
+ raise StopIteration("No items left to sample")
128
+ out = self.pool[:n]
129
+ self.pool = self.pool[n:]
130
+ return out
131
+
132
+ return np.random.choice(source, size=n, replace=self.replace)
133
+
134
+ # -------- DATAFRAME --------
135
+ if self._type == "df":
136
+ if axis not in (0, 1):
137
+ raise ValueError("axis must be 0 (rows) or 1 (columns)")
138
+
139
+ # Row sampling
140
+ if axis == 0:
141
+ if self.stateful:
142
+ if n > len(self.pool):
143
+ raise StopIteration("No rows left to sample")
144
+ out = self.pool.iloc[:n]
145
+ self.pool = self.pool.iloc[n:]
146
+ return out
147
+
148
+ return self.data.sample(n=n, replace=self.replace)
149
+
150
+ # Column sampling
151
+ cols = list(source.columns)
152
+
153
+ if n > len(cols):
154
+ raise StopIteration("No columns left to sample")
155
+
156
+ chosen = (
157
+ cols[:n]
158
+ if self.stateful
159
+ else random.sample(cols, n)
160
+ if not self.replace
161
+ else random.choices(cols, k=n)
162
+ )
163
+
164
+ if self.stateful:
165
+ self.pool = self.pool.drop(columns=chosen)
166
+
167
+ return self.data[chosen]
168
+
169
+ def reset(self):
170
+ """
171
+ Reset internal state (only for stateful sampler).
172
+ """
173
+ if not self.stateful:
174
+ raise RuntimeError("reset() is only available when stateful=True")
175
+ self._reset_pool()
@@ -1,92 +1,92 @@
1
- from sklearn.linear_model import LinearRegression as linearregression
2
- from sklearn.neighbors import KNeighborsRegressor as knnregressor
3
- from sklearn.tree import DecisionTreeRegressor as decisiontreeregressor
4
- from sklearn.ensemble import RandomForestRegressor as randomforestregressor
5
- from sklearn.ensemble import GradientBoostingRegressor as gradientboostingregressor
6
- from sklearn.ensemble import AdaBoostRegressor as adaboostregressor
7
- from xgboost import XGBRegressor as xgboostregressor
8
- from sklearn.svm import SVR as supportvectorregressor
9
- from sklearn.linear_model import ElasticNet as elasticnetregressor
10
-
11
-
12
-
13
- class LinearRegressionmodel:
14
- def __init__(self, **kwargs):
15
- self.model = linearregression(**kwargs)
16
-
17
- def fit(self, X,y):
18
- self.model.fit(X,y)
19
-
20
- def predict(self,X):
21
- self.model.predict(X)
22
-
23
-
24
- class KNNRegressionmodel:
25
- def __init__(self, **kwargs):
26
- self.model = knnregressor(**kwargs)
27
-
28
- def fit(self, X, y):
29
- self.model.fit(X,y)
30
-
31
- def predict(self, X):
32
- self.model.predict(X)
33
-
34
- class DecisionTreeRegressionmodel:
35
- def __init__(self, **kwargs):
36
- self.model = decisiontreeregressor(**kwargs)
37
-
38
- def fit(self,X,y):
39
- self.model.fit(X,y)
40
-
41
- def predict(self, X):
42
- self.model.predict(X)
43
-
44
- class RandomForestRegressionmodel:
45
- def __init__(self, **kwargs):
46
- self.model = randomforestregressor(**kwargs)
47
-
48
- def fit(self, X, y):
49
- self.model.fit(X,y)
50
-
51
- def predict(self,X):
52
- self.model.predict(X)
53
-
54
- class GradientBoostingRegressionmodel:
55
- def __init__(self, **kwargs):
56
- self.model = randomforestregressor(**kwargs)
57
-
58
- def fit(self, X, y):
59
- self.model.fit(X,y)
60
-
61
- def predict(self,X):
62
- self.model.predict(X)
63
-
64
- class AdaBoostRegressionmodel:
65
- def __init__(self, **kwargs):
66
- self.model = adaboostregressor(**kwargs)
67
-
68
- def fit(self, X, y):
69
- self.model.fit(X,y)
70
-
71
- def predict(self,X):
72
- self.model.predict(X)
73
-
74
- class XGBoostRegressionmodel:
75
- def __init__(self, **kwargs):
76
- self.model = xgboostregressor(**kwargs)
77
-
78
- def fit(self, X, y):
79
- self.model.fit(X,y)
80
-
81
- def predict(self,X):
82
- self.model.predict(X)
83
-
84
- class ElasticNetRegressionmodel:
85
- def __init__(self, **kwargs):
86
- self.model = elasticnetregressor(**kwargs)
87
-
88
- def fit(self, X, y):
89
- self.model.fit(X,y)
90
-
91
- def predict(self,X):
1
+ from sklearn.linear_model import LinearRegression as linearregression
2
+ from sklearn.neighbors import KNeighborsRegressor as knnregressor
3
+ from sklearn.tree import DecisionTreeRegressor as decisiontreeregressor
4
+ from sklearn.ensemble import RandomForestRegressor as randomforestregressor
5
+ from sklearn.ensemble import GradientBoostingRegressor as gradientboostingregressor
6
+ from sklearn.ensemble import AdaBoostRegressor as adaboostregressor
7
+ from xgboost import XGBRegressor as xgboostregressor
8
+ from sklearn.svm import SVR as supportvectorregressor
9
+ from sklearn.linear_model import ElasticNet as elasticnetregressor
10
+
11
+
12
+
13
+ class LinearRegressionmodel:
14
+ def __init__(self, **kwargs):
15
+ self.model = linearregression(**kwargs)
16
+
17
+ def fit(self, X,y):
18
+ self.model.fit(X,y)
19
+
20
+ def predict(self,X):
21
+ self.model.predict(X)
22
+
23
+
24
+ class KNNRegressionmodel:
25
+ def __init__(self, **kwargs):
26
+ self.model = knnregressor(**kwargs)
27
+
28
+ def fit(self, X, y):
29
+ self.model.fit(X,y)
30
+
31
+ def predict(self, X):
32
+ self.model.predict(X)
33
+
34
+ class DecisionTreeRegressionmodel:
35
+ def __init__(self, **kwargs):
36
+ self.model = decisiontreeregressor(**kwargs)
37
+
38
+ def fit(self,X,y):
39
+ self.model.fit(X,y)
40
+
41
+ def predict(self, X):
42
+ self.model.predict(X)
43
+
44
+ class RandomForestRegressionmodel:
45
+ def __init__(self, **kwargs):
46
+ self.model = randomforestregressor(**kwargs)
47
+
48
+ def fit(self, X, y):
49
+ self.model.fit(X,y)
50
+
51
+ def predict(self,X):
52
+ self.model.predict(X)
53
+
54
+ class GradientBoostingRegressionmodel:
55
+ def __init__(self, **kwargs):
56
+ self.model = randomforestregressor(**kwargs)
57
+
58
+ def fit(self, X, y):
59
+ self.model.fit(X,y)
60
+
61
+ def predict(self,X):
62
+ self.model.predict(X)
63
+
64
+ class AdaBoostRegressionmodel:
65
+ def __init__(self, **kwargs):
66
+ self.model = adaboostregressor(**kwargs)
67
+
68
+ def fit(self, X, y):
69
+ self.model.fit(X,y)
70
+
71
+ def predict(self,X):
72
+ self.model.predict(X)
73
+
74
+ class XGBoostRegressionmodel:
75
+ def __init__(self, **kwargs):
76
+ self.model = xgboostregressor(**kwargs)
77
+
78
+ def fit(self, X, y):
79
+ self.model.fit(X,y)
80
+
81
+ def predict(self,X):
82
+ self.model.predict(X)
83
+
84
+ class ElasticNetRegressionmodel:
85
+ def __init__(self, **kwargs):
86
+ self.model = elasticnetregressor(**kwargs)
87
+
88
+ def fit(self, X, y):
89
+ self.model.fit(X,y)
90
+
91
+ def predict(self,X):
92
92
  self.model.predict(X)
@@ -0,0 +1,99 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import Optional, Tuple, Union
4
+
5
+
6
+ ArrayLike = Union[np.ndarray, pd.DataFrame]
7
+
8
+
9
+ def train_test_split(
10
+ X: ArrayLike,
11
+ y: Optional[np.ndarray] = None,
12
+ test_size: float = 0.25,
13
+ shuffle: bool = True,
14
+ stratify: Optional[np.ndarray] = None,
15
+ random_state: Optional[int] = None
16
+ ) -> Tuple:
17
+ """
18
+ Split data into train and test subsets.
19
+
20
+ Parameters
21
+ ----------
22
+ X : numpy.ndarray or pandas.DataFrame
23
+ Feature data to split.
24
+ y : numpy.ndarray, optional
25
+ Target labels corresponding to X.
26
+ test_size : float, default=0.25
27
+ Proportion of the dataset to include in the test split.
28
+ shuffle : bool, default=True
29
+ Whether to shuffle data before splitting.
30
+ stratify : numpy.ndarray, optional
31
+ Class labels for stratified split (classification only).
32
+ random_state : int, optional
33
+ Seed for reproducibility.
34
+
35
+ Returns
36
+ -------
37
+ X_train, X_test : same type as X
38
+ Split feature data.
39
+ y_train, y_test : numpy.ndarray, optional
40
+ Split target labels (only if y is provided).
41
+ """
42
+
43
+ if not 0 < test_size < 1:
44
+ raise ValueError("test_size must be between 0 and 1")
45
+
46
+ if random_state is not None:
47
+ np.random.seed(random_state)
48
+
49
+ n_samples = len(X)
50
+ indices = np.arange(n_samples)
51
+
52
+ # ---------- STRATIFIED SPLIT ----------
53
+ if stratify is not None:
54
+ if y is None:
55
+ raise ValueError("y must be provided when using stratify")
56
+
57
+ if not shuffle:
58
+ raise ValueError("Stratified split requires shuffle=True")
59
+
60
+ stratify = np.asarray(stratify)
61
+
62
+ train_idx = []
63
+ test_idx = []
64
+
65
+ for cls in np.unique(stratify):
66
+ cls_indices = indices[stratify == cls]
67
+ np.random.shuffle(cls_indices)
68
+
69
+ split = int(len(cls_indices) * (1 - test_size))
70
+ train_idx.extend(cls_indices[:split])
71
+ test_idx.extend(cls_indices[split:])
72
+
73
+ train_idx = np.array(train_idx)
74
+ test_idx = np.array(test_idx)
75
+
76
+ # ---------- STANDARD SPLIT ----------
77
+ else:
78
+ if shuffle:
79
+ np.random.shuffle(indices)
80
+
81
+ split = int(n_samples * (1 - test_size))
82
+ train_idx, test_idx = indices[:split], indices[split:]
83
+
84
+ # ---------- APPLY INDICES ----------
85
+ if isinstance(X, pd.DataFrame):
86
+ X_train = X.iloc[train_idx]
87
+ X_test = X.iloc[test_idx]
88
+ else:
89
+ X_train = X[train_idx]
90
+ X_test = X[test_idx]
91
+
92
+ if y is None:
93
+ return X_train, X_test
94
+
95
+ y = np.asarray(y)
96
+ y_train = y[train_idx]
97
+ y_test = y[test_idx]
98
+
99
+ return X_train, X_test, y_train, y_test
@@ -0,0 +1,196 @@
1
+ import numpy as np
2
+
3
+ def create_random(mean, std, size, random_state=None):
4
+ """
5
+ Generate random data with a specified mean and standard deviation.
6
+
7
+ Parameters:
8
+ mean (float): Desired mean of the data.
9
+ std (float): Desired standard deviation of the data.
10
+ size (int): Length of the data to generate.
11
+ random_state (int, optional): Seed for reproducibility. Defaults to None.
12
+
13
+ Returns:
14
+ dict: A dictionary containing:
15
+ - "data": Random data with the specified mean and standard deviation.
16
+ - "mean": Actual mean of the generated data.
17
+ - "std": Actual standard deviation of the generated data.
18
+
19
+ Raises:
20
+ ValueError: If std is negative or size is not a positive integer.
21
+ """
22
+ if std < 0:
23
+ raise ValueError("Standard deviation must be non-negative.")
24
+ if size <= 0:
25
+ raise ValueError("Size must be a positive integer.")
26
+
27
+ # Create a random number generator instance
28
+ rng = np.random.default_rng(random_state)
29
+
30
+ # Generate random normal data
31
+ x = rng.normal(size=size)
32
+ x1 = (x - np.mean(x)) / np.std(x)
33
+ x2 = (x1 * std) + mean
34
+
35
+ return {
36
+ "data": x2,
37
+ "mean": np.mean(x2),
38
+ "std": np.std(x2)
39
+ }
40
+
41
+
42
+ import numpy as np
43
+ import pandas as pd
44
+ from typing import Optional, Dict
45
+
46
+
47
+ class ProbabilisticImputer:
48
+ """
49
+ Probabilistic, group-aware categorical imputer.
50
+
51
+ Learns conditional probability distributions during `fit()`
52
+ and samples missing values during `transform()`.
53
+
54
+ Default behavior is stateless (fully reproducible).
55
+ If stateful=True, RNG state advances across calls for
56
+ simulation / data augmentation workflows.
57
+
58
+ Parameters
59
+ ----------
60
+ group_col : str
61
+ Column name used to group data (e.g. class, category, segment).
62
+ target_col : str
63
+ Column name to impute.
64
+ random_state : int, optional
65
+ Seed for reproducible randomness.
66
+ stateful : bool, default=False
67
+ If True, RNG state advances across multiple transform calls.
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ group_col: str,
73
+ target_col: str,
74
+ random_state: Optional[int] = None,
75
+ stateful: bool = False
76
+ ):
77
+ self.group_col = group_col
78
+ self.target_col = target_col
79
+ self.random_state = random_state
80
+ self.stateful = stateful
81
+
82
+ self._fitted = False
83
+ self._dist_map: Dict = {}
84
+
85
+ self._init_rng()
86
+
87
+ # ---------------- INTERNAL ---------------- #
88
+
89
+ def _init_rng(self):
90
+ """Initialize or reset the random number generator."""
91
+ self.rng = np.random.default_rng(self.random_state)
92
+
93
+ # ---------------- PUBLIC API ---------------- #
94
+
95
+ def fit(self, df: pd.DataFrame):
96
+ """
97
+ Learn per-group probability distributions from observed data.
98
+
99
+ Parameters
100
+ ----------
101
+ df : pandas.DataFrame
102
+ DataFrame containing group and target columns.
103
+
104
+ Returns
105
+ -------
106
+ self
107
+ """
108
+ if not isinstance(df, pd.DataFrame):
109
+ raise TypeError("Input must be a pandas DataFrame")
110
+
111
+ if self.group_col not in df.columns or self.target_col not in df.columns:
112
+ raise ValueError("group_col and target_col must exist in DataFrame")
113
+
114
+ self._dist_map.clear()
115
+
116
+ # Build per-group distributions
117
+ grouped = df.dropna(subset=[self.target_col]).groupby(self.group_col)
118
+
119
+ for group, gdf in grouped:
120
+ probs = (
121
+ gdf[self.target_col]
122
+ .value_counts(normalize=True)
123
+ .to_dict()
124
+ )
125
+ self._dist_map[group] = probs
126
+
127
+ # Global fallback distribution
128
+ self._global_dist = (
129
+ df[self.target_col]
130
+ .dropna()
131
+ .value_counts(normalize=True)
132
+ .to_dict()
133
+ )
134
+
135
+ self._fitted = True
136
+ return self
137
+
138
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
139
+ """
140
+ Impute missing values using learned distributions.
141
+
142
+ Parameters
143
+ ----------
144
+ df : pandas.DataFrame
145
+ DataFrame to transform.
146
+
147
+ Returns
148
+ -------
149
+ pandas.DataFrame
150
+ New DataFrame with missing values imputed.
151
+ """
152
+ if not self._fitted:
153
+ raise RuntimeError("Must call fit() before transform()")
154
+
155
+ if not isinstance(df, pd.DataFrame):
156
+ raise TypeError("Input must be a pandas DataFrame")
157
+
158
+ out = df.copy()
159
+ missing_mask = out[self.target_col].isna()
160
+
161
+ for idx in out[missing_mask].index:
162
+ group = out.at[idx, self.group_col]
163
+
164
+ # Get group distribution or fallback to global
165
+ dist = self._dist_map.get(group, self._global_dist)
166
+
167
+ if not dist:
168
+ continue # Nothing to sample from
169
+
170
+ choices = list(dist.keys())
171
+ probs = list(dist.values())
172
+
173
+ out.at[idx, self.target_col] = self.rng.choice(
174
+ choices,
175
+ p=probs
176
+ )
177
+
178
+ # Reset RNG if stateless (default behavior)
179
+ if not self.stateful:
180
+ self._init_rng()
181
+
182
+ return out
183
+
184
+ def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
185
+ """
186
+ Fit and transform in one step.
187
+
188
+ Parameters
189
+ ----------
190
+ df : pandas.DataFrame
191
+
192
+ Returns
193
+ -------
194
+ pandas.DataFrame
195
+ """
196
+ return self.fit(df).transform(df)
@@ -0,0 +1,35 @@
1
+ Metadata-Version: 2.4
2
+ Name: quicklearnkit
3
+ Version: 0.1.0
4
+ Summary: QuickLearnKit: utilities for learning machine learning concepts
5
+ Author: Hazi Afrid
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Masterhazi
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Requires-Python: >=3.8
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: numpy
32
+ Requires-Dist: pandas
33
+ Requires-Dist: scikit-learn
34
+ Requires-Dist: xgboost
35
+ Dynamic: license-file
@@ -1,10 +1,11 @@
1
- README.md
1
+ LICENSE
2
2
  pyproject.toml
3
- setup.py
4
3
  quicklearnkit/__init__.py
5
4
  quicklearnkit/classifier.py
6
5
  quicklearnkit/quickimports.py
6
+ quicklearnkit/randomizer.py
7
7
  quicklearnkit/regressor.py
8
+ quicklearnkit/split.py
8
9
  quicklearnkit/utils.py
9
10
  quicklearnkit.egg-info/PKG-INFO
10
11
  quicklearnkit.egg-info/SOURCES.txt
@@ -1,3 +1,4 @@
1
- scikit-learn
2
- pandas
3
1
  numpy
2
+ pandas
3
+ scikit-learn
4
+ xgboost
@@ -1,4 +1,4 @@
1
- [egg_info]
2
- tag_build =
3
- tag_date = 0
4
-
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -1,26 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: quicklearnkit
3
- Version: 0.0.1
4
- Summary: A simplified interface for machine learning algorithms.
5
- Home-page: https://github.com/yourusername/quicklearn
6
- Author: hazi
7
- Author-email: hajiafribaba@gmail.com
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.6
12
- Description-Content-Type: text/markdown
13
- Requires-Dist: scikit-learn
14
- Requires-Dist: pandas
15
- Requires-Dist: numpy
16
- Dynamic: author
17
- Dynamic: author-email
18
- Dynamic: classifier
19
- Dynamic: description
20
- Dynamic: description-content-type
21
- Dynamic: home-page
22
- Dynamic: requires-dist
23
- Dynamic: requires-python
24
- Dynamic: summary
25
-
26
- This is an upcoming wrapper library for machine learning beginners and all the enthusisasts out there who want to reduce the time taken for projects, this library works on simple principle and it is gonna save lot of time in upcoming future
@@ -1 +0,0 @@
1
- This is an upcoming wrapper library for machine learning beginners and all the enthusisasts out there who want to reduce the time taken for projects, this library works on simple principle and it is gonna save lot of time in upcoming future
@@ -1,3 +0,0 @@
1
- [build-system]
2
- requires = ["setuptools>=42", "wheel"]
3
- build-backend = "setuptools.build_meta"
@@ -1,2 +0,0 @@
1
- from .quickimports import *
2
- from .utils import create_random
@@ -1,39 +0,0 @@
1
- import numpy as np
2
-
3
- def create_random(mean, std, size, random_state=None):
4
- """
5
- Generate random data with a specified mean and standard deviation.
6
-
7
- Parameters:
8
- mean (float): Desired mean of the data.
9
- std (float): Desired standard deviation of the data.
10
- size (int): Length of the data to generate.
11
- random_state (int, optional): Seed for reproducibility. Defaults to None.
12
-
13
- Returns:
14
- dict: A dictionary containing:
15
- - "data": Random data with the specified mean and standard deviation.
16
- - "mean": Actual mean of the generated data.
17
- - "std": Actual standard deviation of the generated data.
18
-
19
- Raises:
20
- ValueError: If std is negative or size is not a positive integer.
21
- """
22
- if std < 0:
23
- raise ValueError("Standard deviation must be non-negative.")
24
- if size <= 0:
25
- raise ValueError("Size must be a positive integer.")
26
-
27
- # Create a random number generator instance
28
- rng = np.random.default_rng(random_state)
29
-
30
- # Generate random normal data
31
- x = rng.normal(size=size)
32
- x1 = (x - np.mean(x)) / np.std(x)
33
- x2 = (x1 * std) + mean
34
-
35
- return {
36
- "data": x2,
37
- "mean": np.mean(x2),
38
- "std": np.std(x2)
39
- }
@@ -1,26 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: quicklearnkit
3
- Version: 0.0.1
4
- Summary: A simplified interface for machine learning algorithms.
5
- Home-page: https://github.com/yourusername/quicklearn
6
- Author: hazi
7
- Author-email: hajiafribaba@gmail.com
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.6
12
- Description-Content-Type: text/markdown
13
- Requires-Dist: scikit-learn
14
- Requires-Dist: pandas
15
- Requires-Dist: numpy
16
- Dynamic: author
17
- Dynamic: author-email
18
- Dynamic: classifier
19
- Dynamic: description
20
- Dynamic: description-content-type
21
- Dynamic: home-page
22
- Dynamic: requires-dist
23
- Dynamic: requires-python
24
- Dynamic: summary
25
-
26
- This is an upcoming wrapper library for machine learning beginners and all the enthusisasts out there who want to reduce the time taken for projects, this library works on simple principle and it is gonna save lot of time in upcoming future
@@ -1,24 +0,0 @@
1
- from setuptools import setup, find_packages
2
-
3
- setup(
4
- name="quicklearnkit", # Package name
5
- version="0.0.1", # Initial version
6
- author="hazi", # Your name
7
- author_email="hajiafribaba@gmail.com", # Your email
8
- description="A simplified interface for machine learning algorithms.", # Short description
9
- long_description=open("README.md").read(), # Long description from README
10
- long_description_content_type="text/markdown", # Format of the long description
11
- url="https://github.com/yourusername/quicklearn", # Project URL
12
- packages=find_packages(), # Automatically find all packages
13
- install_requires=[ # List your dependencies here
14
- "scikit-learn",
15
- "pandas",
16
- "numpy",
17
- ],
18
- classifiers=[ # Metadata for PyPI
19
- "Programming Language :: Python :: 3",
20
- "License :: OSI Approved :: MIT License",
21
- "Operating System :: OS Independent",
22
- ],
23
- python_requires=">=3.6", # Python version compatibility
24
- )