quicklearnkit 0.0.2__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,21 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 Masterhazi
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Masterhazi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,219 @@
1
+ Metadata-Version: 2.4
2
+ Name: quicklearnkit
3
+ Version: 0.2.1
4
+ Summary: Learning-first machine learning utilities library for simplified imports, sampling, splitting, and probabilistic preprocessing.
5
+ Author: Hazi Afrid
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Masterhazi
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Requires-Python: >=3.8
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: numpy
32
+ Requires-Dist: pandas
33
+ Requires-Dist: scikit-learn
34
+ Requires-Dist: xgboost
35
+ Dynamic: license-file
36
+
37
+ ````markdown
38
+ # QuickLearnKit
39
+
40
+ QuickLearnKit is a lightweight, learning-first machine learning utilities library designed to simplify model imports and streamline common ML workflows. No more deep module navigation—import models and tools effortlessly and start building.
41
+
42
+ It focuses on removing *mechanical friction* so students and practitioners can spend more time understanding concepts, not fighting syntax.
43
+
44
+ ---
45
+
46
+ ## Installation
47
+
48
+ Install QuickLearnKit using pip:
49
+
50
+ ```bash
51
+ pip install quicklearnkit
52
+ ````
53
+
54
+ ---
55
+
56
+ ## Quick Model Imports
57
+
58
+ QuickLearnKit provides seamless access to essential machine learning models with minimal syntax. Simply import and initialize models without the usual clutter.
59
+
60
+ ### Example Usage
61
+
62
+ ```python
63
+ from quicklearnkit import (
64
+ LinearRegressionmodel,
65
+ RandomForestRegressionmodel,
66
+ XGBoostRegressionmodel,
67
+ KNeighborsClassifiermodel,
68
+ GradientBoostingClassifiermodel
69
+ )
70
+
71
+ # Initialize models directly
72
+ lr_model = LinearRegressionmodel()
73
+ rf_model = RandomForestRegressionmodel()
74
+ xgb_model = XGBoostRegressionmodel()
75
+
76
+ # Initialize classifiers
77
+ knn_classifier = KNeighborsClassifiermodel()
78
+ gb_classifier = GradientBoostingClassifiermodel()
79
+ ```
80
+
81
+ ---
82
+
83
+ ## Supported Models
84
+
85
+ QuickLearnKit offers easy access to commonly used supervised learning models:
86
+
87
+ ### Regression Models
88
+
89
+ * Linear Regression (`LinearRegressionmodel()`)
90
+ * K-Nearest Neighbors Regression (`KNNRegressionmodel()`)
91
+ * Decision Tree Regression (`DecisionTreeRegressionmodel()`)
92
+ * Random Forest Regression (`RandomForestRegressionmodel()`)
93
+ * Gradient Boosting Regression (`GradientBoostingRegressionmodel()`)
94
+ * AdaBoost Regression (`AdaBoostRegressionmodel()`)
95
+ * XGBoost Regression (`XGBoostRegressionmodel()`)
96
+ * ElasticNet Regression (`ElasticNetRegressionmodel()`)
97
+
98
+ ### Classification Models
99
+
100
+ * Logistic Regression (`LogisticRegressionmodel()`)
101
+ * K-Nearest Neighbors Classifier (`KNeighborsClassifiermodel()`)
102
+ * Decision Tree Classifier (`DecisionTreeClassifiermodel()`)
103
+ * Random Forest Classifier (`RandomForestClassifiermodel()`)
104
+ * AdaBoost Classifier (`AdaBoostClassifiermodel()`)
105
+ * Gradient Boosting Classifier (`GradientBoostingClassifiermodel()`)
106
+ * XGBoost Classifier (`XGBClassifiermodel()`)
107
+ * Support Vector Classifier (`SVClassifiermodel()`)
108
+
109
+ ---
110
+
111
+ ## Utilities & Workflow Tools
112
+
113
+ Beyond models, QuickLearnKit provides practical tools to support the full machine learning workflow.
114
+
115
+ ---
116
+
117
+ ### 🔀 Random Sampling — `Sampler`
118
+
119
+ Randomly sample from lists, NumPy arrays, or pandas DataFrames. Supports both **stateless (reproducible)** and **stateful (streaming / simulation)** modes.
120
+
121
+ ```python
122
+ from quicklearnkit import Sampler
123
+ import seaborn as sns
124
+
125
+ df = sns.load_dataset("titanic")
126
+
127
+ sampler = Sampler(df, n=5, random_state=42)
128
+ sampled_data = sampler.sample()
129
+
130
+ print(sampled_data)
131
+ ```
132
+
133
+ ---
134
+
135
+ ### ✂️ Train–Test Splitting — `train_test_split`
136
+
137
+ Split datasets into training and testing sets with support for:
138
+
139
+ * Shuffling
140
+ * Stratification
141
+ * NumPy arrays and pandas DataFrames
142
+
143
+ ```python
144
+ from quicklearnkit import train_test_split
145
+ import numpy as np
146
+
147
+ X = np.arange(20).reshape(10, 2)
148
+
149
+ X_train, X_test = train_test_split(
150
+ X,
151
+ test_size=0.25,
152
+ shuffle=True,
153
+ random_state=42
154
+ )
155
+
156
+ print(X_train.shape, X_test.shape)
157
+ ```
158
+
159
+ ---
160
+
161
+ ### 🎲 Probabilistic Imputation — `ProbabilisticImputer`
162
+
163
+ A group-aware, probabilistic categorical imputer that learns conditional distributions and samples missing values in a **reproducible** way by default.
164
+
165
+ This is especially useful for:
166
+
167
+ * Teaching how distributions work
168
+ * Simulating realistic missing data handling
169
+ * Data augmentation and robustness testing
170
+
171
+ ```python
172
+ from quicklearnkit import ProbabilisticImputer
173
+ import seaborn as sns
174
+
175
+ df = sns.load_dataset("titanic")
176
+
177
+ imputer = ProbabilisticImputer(
178
+ group_col="pclass",
179
+ target_col="deck",
180
+ random_state=42 # reproducible by default
181
+ )
182
+
183
+ df_imputed = imputer.fit_transform(df)
184
+
185
+ print("Missing before:", df["deck"].isna().sum())
186
+ print("Missing after:", df_imputed["deck"].isna().sum())
187
+ ```
188
+
189
+ ---
190
+
191
+ ## Randomized Data Generation
192
+
193
+ Generate random arrays with specific characteristics:
194
+
195
+ ```python
196
+ from quicklearnkit import create_random
197
+
198
+ random_data = create_random(mean=0, std_dev=1, size=100)
199
+ print(random_data)
200
+ ```
201
+
202
+ ---
203
+
204
+ ## Contribute
205
+
206
+ Want to improve QuickLearnKit? Fork the repository, suggest enhancements, and help make machine learning more accessible and easier to teach.
207
+
208
+ ---
209
+
210
+ ## License
211
+
212
+ This project is licensed under the MIT License.
213
+
214
+ ---
215
+
216
+ QuickLearnKit makes machine learning utilities effortless—so you can focus on **learning, experimenting, and building**, not writing complex import statements. 🚀
217
+
218
+ ````
219
+
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "quicklearnkit"
7
+ version = "0.2.1"
8
+ description = "Learning-first machine learning utilities library for simplified imports, sampling, splitting, and probabilistic preprocessing."
9
+ readme = "readme.md"
10
+ license = { file = "LICENSE" }
11
+ authors = [
12
+ { name = "Hazi Afrid" }
13
+ ]
14
+ requires-python = ">=3.8"
15
+ dependencies = [
16
+ "numpy",
17
+ "pandas",
18
+ "scikit-learn",
19
+ "xgboost"
20
+ ]
21
+
22
+ [tool.setuptools.packages.find]
23
+ where = ["."]
@@ -0,0 +1,2 @@
1
+ from .quickimports import *
2
+ from .randomizer import Sampler
@@ -1,90 +1,90 @@
1
- from sklearn.linear_model import LogisticRegression as logisticregression
2
- from sklearn.neighbors import KNeighborsClassifier as knnclassifier
3
- from sklearn.tree import DecisionTreeClassifier as decisiontreeclassifier
4
- from sklearn.ensemble import RandomForestClassifier as randomforestclassifier
5
- from sklearn.ensemble import GradientBoostingClassifier as gradientboostingclassifier
6
- from sklearn.ensemble import AdaBoostClassifier as adaboostclassifier
7
- from xgboost import XGBClassifier as xgboostclassifier
8
- from sklearn.svm import SVC as supportvectorclassifer
9
-
10
-
11
- class LogisticRegressionmodel:
12
- def __init__(self, **kwargs):
13
- self.model = logisticregression(**kwargs)
14
-
15
- def fit(self, X,y):
16
- self.model.fit(X,y)
17
-
18
- def predict(self,X):
19
- self.model.predict(X)
20
-
21
-
22
- class KNeighborsClassifiermodel:
23
- def __init__(self, **kwargs):
24
- self.model = knnclassifier(**kwargs)
25
-
26
- def fit(self, X,y):
27
- self.model.fit(X,y)
28
-
29
- def predict(self,X):
30
- self.model.predict(X)
31
-
32
- class DecisionTreeClassifiermodel:
33
- def __init__(self, **kwargs):
34
- self.model = decisiontreeclassifier(**kwargs)
35
-
36
- def fit(self,X,y):
37
- self.model.fit(X,y)
38
-
39
- def predict(self, X):
40
- self.model.predict(X)
41
-
42
- class RandomForestClassifiermodel:
43
- def __init__(self, **kwargs):
44
- self.model = randomforestclassifier(**kwargs)
45
-
46
- def fit(self, X,y):
47
- self.model.fit(X,y)
48
-
49
- def predict(self, X):
50
- self.model.predict(X)
51
-
52
- class GradientBoostingClassifiermodel:
53
- def __init__(self, **kwargs):
54
- self.model = gradientboostingclassifier(**kwargs)
55
-
56
- def fit(self, X,y):
57
- self.model.fit(X,y)
58
-
59
- def predict(self, X):
60
- self.model.predict(X)
61
-
62
- class AdaBoostClassifiermodel:
63
- def __init__(self, **kwargs):
64
- self.model = adaboostclassifier(**kwargs)
65
-
66
- def fit(self, X,y):
67
- self.model.fit(X,y)
68
-
69
- def predict(self, X):
70
- self.model.predict(X)
71
-
72
- class SVClassifiermodel:
73
- def __init__(self, **kwargs):
74
- self.model = supportvectorclassifer(**kwargs)
75
-
76
- def fit(self, X,y):
77
- self.model.fit(X,y)
78
-
79
- def predict(self, X):
80
- self.model.predict(X)
81
-
82
- class XGBClassifiermodel:
83
- def __init__(self, **kwargs):
84
- self.model = xgboostclassifier(**kwargs)
85
-
86
- def fit (self, X,y):
87
- self.model.fit(X,y)
88
-
89
- def predict(self, X):
1
+ from sklearn.linear_model import LogisticRegression as logisticregression
2
+ from sklearn.neighbors import KNeighborsClassifier as knnclassifier
3
+ from sklearn.tree import DecisionTreeClassifier as decisiontreeclassifier
4
+ from sklearn.ensemble import RandomForestClassifier as randomforestclassifier
5
+ from sklearn.ensemble import GradientBoostingClassifier as gradientboostingclassifier
6
+ from sklearn.ensemble import AdaBoostClassifier as adaboostclassifier
7
+ from xgboost import XGBClassifier as xgboostclassifier
8
+ from sklearn.svm import SVC as supportvectorclassifer
9
+
10
+
11
+ class LogisticRegressionmodel:
12
+ def __init__(self, **kwargs):
13
+ self.model = logisticregression(**kwargs)
14
+
15
+ def fit(self, X,y):
16
+ self.model.fit(X,y)
17
+
18
+ def predict(self,X):
19
+ self.model.predict(X)
20
+
21
+
22
+ class KNeighborsClassifiermodel:
23
+ def __init__(self, **kwargs):
24
+ self.model = knnclassifier(**kwargs)
25
+
26
+ def fit(self, X,y):
27
+ self.model.fit(X,y)
28
+
29
+ def predict(self,X):
30
+ self.model.predict(X)
31
+
32
+ class DecisionTreeClassifiermodel:
33
+ def __init__(self, **kwargs):
34
+ self.model = decisiontreeclassifier(**kwargs)
35
+
36
+ def fit(self,X,y):
37
+ self.model.fit(X,y)
38
+
39
+ def predict(self, X):
40
+ self.model.predict(X)
41
+
42
+ class RandomForestClassifiermodel:
43
+ def __init__(self, **kwargs):
44
+ self.model = randomforestclassifier(**kwargs)
45
+
46
+ def fit(self, X,y):
47
+ self.model.fit(X,y)
48
+
49
+ def predict(self, X):
50
+ self.model.predict(X)
51
+
52
+ class GradientBoostingClassifiermodel:
53
+ def __init__(self, **kwargs):
54
+ self.model = gradientboostingclassifier(**kwargs)
55
+
56
+ def fit(self, X,y):
57
+ self.model.fit(X,y)
58
+
59
+ def predict(self, X):
60
+ self.model.predict(X)
61
+
62
+ class AdaBoostClassifiermodel:
63
+ def __init__(self, **kwargs):
64
+ self.model = adaboostclassifier(**kwargs)
65
+
66
+ def fit(self, X,y):
67
+ self.model.fit(X,y)
68
+
69
+ def predict(self, X):
70
+ self.model.predict(X)
71
+
72
+ class SVClassifiermodel:
73
+ def __init__(self, **kwargs):
74
+ self.model = supportvectorclassifer(**kwargs)
75
+
76
+ def fit(self, X,y):
77
+ self.model.fit(X,y)
78
+
79
+ def predict(self, X):
80
+ self.model.predict(X)
81
+
82
+ class XGBClassifiermodel:
83
+ def __init__(self, **kwargs):
84
+ self.model = xgboostclassifier(**kwargs)
85
+
86
+ def fit (self, X,y):
87
+ self.model.fit(X,y)
88
+
89
+ def predict(self, X):
90
90
  self.model.fit(X)
@@ -1,21 +1,22 @@
1
- #from .regression import LinearRegression, KNeighborsRegression, DecisionTreeRegression, RandomForestRegression, AdaBoostRegression, GradientBoostingRegression, XGBRegressor,SVR
2
- #from .classifier import LogisticRegression, KNeighborsClassifier, DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, XGBClassifier, SVC
3
-
4
- #__all__= [
5
- # 'LinearRegression', ' KNeighborsRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 'XGBRegressor', 'SVR',
6
- # 'LogisticRegression', 'KNeighborsClassifier', 'DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier', 'XGBClassifier', 'SVC'
7
- #]
8
-
9
- from .regressor import LinearRegressionmodel, KNNRegressionmodel, DecisionTreeRegressionmodel, RandomForestRegressionmodel, GradientBoostingRegressionmodel, AdaBoostRegressionmodel, XGBoostRegressionmodel, ElasticNetRegressionmodel
10
- from .classifier import LogisticRegressionmodel, KNeighborsClassifiermodel, DecisionTreeClassifiermodel, RandomForestClassifiermodel, AdaBoostClassifiermodel, GradientBoostingClassifiermodel, XGBClassifiermodel, SVClassifiermodel
11
- from .utils import create_random
12
-
13
- __all__=[
14
- 'LinearRegressionmodel','LogisticRegressionmodel', 'KNNRegressionmodel','GradientBoostingRegressionmodel',
15
- 'AdaBoostRegressionmodel', 'XGBoostRegressionmodel', 'ElasticNetRegressionmodel',
16
- 'DecisionTreeRegressionmodel', 'RandomForestRegressionmodel',
17
- 'KNeighborsClassifiermodel', 'DecisionTreeClassifiermodel', 'RandomForestClassifiermodel','AdaBoostClassifiermodel',
18
- 'GradientBoostingClassifiermodel', 'XGBClassifiermodel', 'SVClassifiermodel',
19
- 'create_random'
20
- ]
21
-
1
+ #from .regression import LinearRegression, KNeighborsRegression, DecisionTreeRegression, RandomForestRegression, AdaBoostRegression, GradientBoostingRegression, XGBRegressor,SVR
2
+ #from .classifier import LogisticRegression, KNeighborsClassifier, DecisionTreeClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, XGBClassifier, SVC
3
+
4
+ #__all__= [
5
+ # 'LinearRegression', ' KNeighborsRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'AdaBoostRegressor', 'GradientBoostingRegressor', 'XGBRegressor', 'SVR',
6
+ # 'LogisticRegression', 'KNeighborsClassifier', 'DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GradientBoostingClassifier', 'XGBClassifier', 'SVC'
7
+ #]
8
+
9
+ from .regressor import LinearRegressionmodel, KNNRegressionmodel, DecisionTreeRegressionmodel, RandomForestRegressionmodel, GradientBoostingRegressionmodel, AdaBoostRegressionmodel, XGBoostRegressionmodel, ElasticNetRegressionmodel
10
+ from .classifier import LogisticRegressionmodel, KNeighborsClassifiermodel, DecisionTreeClassifiermodel, RandomForestClassifiermodel, AdaBoostClassifiermodel, GradientBoostingClassifiermodel, XGBClassifiermodel, SVClassifiermodel
11
+ from .utils import create_random, ProbabilisticImputer
12
+ from .randomizer import Sampler
13
+ from .split import train_test_split
14
+ __all__=[
15
+ 'LinearRegressionmodel','LogisticRegressionmodel', 'KNNRegressionmodel','GradientBoostingRegressionmodel',
16
+ 'AdaBoostRegressionmodel', 'XGBoostRegressionmodel', 'ElasticNetRegressionmodel',
17
+ 'DecisionTreeRegressionmodel', 'RandomForestRegressionmodel',
18
+ 'KNeighborsClassifiermodel', 'DecisionTreeClassifiermodel', 'RandomForestClassifiermodel','AdaBoostClassifiermodel',
19
+ 'GradientBoostingClassifiermodel', 'XGBClassifiermodel', 'SVClassifiermodel',
20
+ 'create_random', 'Sampler', 'train_test_split', "ProbabilisticImputer",
21
+ ]
22
+
@@ -0,0 +1,175 @@
1
+ import random
2
+ import numpy as np
3
+ import pandas as pd
4
+ from typing import Union, Optional
5
+
6
+
7
+ DataType = Union[list, tuple, np.ndarray, pd.DataFrame]
8
+
9
+
10
+ class Sampler:
11
+ """
12
+ Unified random sampler supporting:
13
+
14
+ - Stateful and stateless sampling
15
+ - Sampling with or without replacement
16
+ - list, numpy.ndarray, pandas.DataFrame
17
+ - Row or column sampling for DataFrames
18
+
19
+ Parameters
20
+ ----------
21
+ data : list, tuple, numpy.ndarray, or pandas.DataFrame
22
+ Data to sample from.
23
+ replace : bool, default=False
24
+ Whether sampling is done with replacement.
25
+ stateful : bool, default=False
26
+ If True, sampled elements are removed from future draws.
27
+ seed : int, optional
28
+ Random seed for reproducibility.
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ data: DataType,
34
+ replace: bool = False,
35
+ stateful: bool = False,
36
+ seed: Optional[int] = None
37
+ ):
38
+ self.replace = replace
39
+ self.stateful = stateful
40
+
41
+ if seed is not None:
42
+ random.seed(seed)
43
+ np.random.seed(seed)
44
+
45
+ # Detect data type
46
+ if isinstance(data, (list, tuple)):
47
+ self._type = "list"
48
+ self.data = list(data)
49
+
50
+ elif isinstance(data, np.ndarray):
51
+ self._type = "array"
52
+ self.data = data
53
+
54
+ elif isinstance(data, pd.DataFrame):
55
+ self._type = "df"
56
+ self.data = data
57
+
58
+ else:
59
+ raise TypeError(
60
+ "Unsupported data type. Use list, numpy array, or pandas DataFrame."
61
+ )
62
+
63
+ if self.stateful:
64
+ self._reset_pool()
65
+
66
+ # ---------------- INTERNAL ---------------- #
67
+
68
+ def _reset_pool(self):
69
+ """Initialize or reset internal sampling pool."""
70
+ if self._type == "list":
71
+ self.pool = self.data.copy()
72
+ random.shuffle(self.pool)
73
+
74
+ elif self._type == "array":
75
+ self.pool = self.data.copy()
76
+ np.random.shuffle(self.pool)
77
+
78
+ elif self._type == "df":
79
+ self.pool = self.data.copy()
80
+
81
+ # ---------------- PUBLIC API ---------------- #
82
+
83
+ def sample(self, n: int = 1, axis: int = 0):
84
+ """
85
+ Sample data.
86
+
87
+ Parameters
88
+ ----------
89
+ n : int, default=1
90
+ Number of items to sample.
91
+ axis : int, default=0
92
+ Axis to sample from when data is a DataFrame.
93
+ 0 = rows, 1 = columns.
94
+
95
+ Returns
96
+ -------
97
+ Sampled data (same type as input).
98
+ """
99
+
100
+ if n <= 0:
101
+ raise ValueError("n must be a positive integer")
102
+
103
+ source = self.pool if self.stateful else self.data
104
+
105
+ # -------- LIST --------
106
+ if self._type == "list":
107
+ if self.stateful:
108
+ if n > len(self.pool):
109
+ raise StopIteration("No items left to sample")
110
+ out = self.pool[:n]
111
+ self.pool = self.pool[n:]
112
+ return out
113
+
114
+ if not self.replace and n > len(source):
115
+ raise ValueError("Cannot sample more elements than population")
116
+
117
+ return (
118
+ random.sample(source, n)
119
+ if not self.replace
120
+ else random.choices(source, k=n)
121
+ )
122
+
123
+ # -------- NUMPY ARRAY --------
124
+ if self._type == "array":
125
+ if self.stateful:
126
+ if n > len(self.pool):
127
+ raise StopIteration("No items left to sample")
128
+ out = self.pool[:n]
129
+ self.pool = self.pool[n:]
130
+ return out
131
+
132
+ return np.random.choice(source, size=n, replace=self.replace)
133
+
134
+ # -------- DATAFRAME --------
135
+ if self._type == "df":
136
+ if axis not in (0, 1):
137
+ raise ValueError("axis must be 0 (rows) or 1 (columns)")
138
+
139
+ # Row sampling
140
+ if axis == 0:
141
+ if self.stateful:
142
+ if n > len(self.pool):
143
+ raise StopIteration("No rows left to sample")
144
+ out = self.pool.iloc[:n]
145
+ self.pool = self.pool.iloc[n:]
146
+ return out
147
+
148
+ return self.data.sample(n=n, replace=self.replace)
149
+
150
+ # Column sampling
151
+ cols = list(source.columns)
152
+
153
+ if n > len(cols):
154
+ raise StopIteration("No columns left to sample")
155
+
156
+ chosen = (
157
+ cols[:n]
158
+ if self.stateful
159
+ else random.sample(cols, n)
160
+ if not self.replace
161
+ else random.choices(cols, k=n)
162
+ )
163
+
164
+ if self.stateful:
165
+ self.pool = self.pool.drop(columns=chosen)
166
+
167
+ return self.data[chosen]
168
+
169
+ def reset(self):
170
+ """
171
+ Reset internal state (only for stateful sampler).
172
+ """
173
+ if not self.stateful:
174
+ raise RuntimeError("reset() is only available when stateful=True")
175
+ self._reset_pool()