quicklearnkit 0.2.2__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quicklearnkit-0.4.0/PKG-INFO +680 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/pyproject.toml +4 -2
- quicklearnkit-0.4.0/quicklearnkit/pipeline.py +255 -0
- quicklearnkit-0.4.0/quicklearnkit/plotting.py +278 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit/quickimports.py +11 -1
- quicklearnkit-0.4.0/quicklearnkit.egg-info/PKG-INFO +680 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit.egg-info/SOURCES.txt +2 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit.egg-info/requires.txt +2 -0
- quicklearnkit-0.4.0/readme.md +642 -0
- quicklearnkit-0.2.2/PKG-INFO +0 -219
- quicklearnkit-0.2.2/quicklearnkit.egg-info/PKG-INFO +0 -219
- quicklearnkit-0.2.2/readme.md +0 -183
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/LICENSE +0 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit/__init__.py +0 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit/classifier.py +0 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit/randomizer.py +0 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit/regressor.py +0 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit/split.py +0 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit/utils.py +0 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit.egg-info/dependency_links.txt +0 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/quicklearnkit.egg-info/top_level.txt +0 -0
- {quicklearnkit-0.2.2 → quicklearnkit-0.4.0}/setup.cfg +0 -0
|
@@ -0,0 +1,680 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quicklearnkit
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Learning-first machine learning utilities library for simplified imports, sampling, splitting, and probabilistic preprocessing.
|
|
5
|
+
Author: Hazi Afrid
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Masterhazi
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Requires-Python: >=3.8
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: numpy
|
|
32
|
+
Requires-Dist: pandas
|
|
33
|
+
Requires-Dist: scikit-learn
|
|
34
|
+
Requires-Dist: xgboost
|
|
35
|
+
Requires-Dist: seaborn
|
|
36
|
+
Requires-Dist: matplotlib
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# QuickLearnKit
|
|
40
|
+
|
|
41
|
+
QuickLearnKit is a **learning-first machine learning utilities library** designed to make common ML and data science workflows simple, readable, and beginner-friendly — without blocking advanced users from full customization.
|
|
42
|
+
|
|
43
|
+
The philosophy is:
|
|
44
|
+
|
|
45
|
+
> **Remove mechanical friction so students can focus on concepts, not syntax.**
|
|
46
|
+
|
|
47
|
+
QuickLearnKit provides:
|
|
48
|
+
|
|
49
|
+
* Easy model imports
|
|
50
|
+
* Random sampling utilities
|
|
51
|
+
* Train–test splitting
|
|
52
|
+
* Probabilistic, group-aware imputation
|
|
53
|
+
* Teaching-friendly plotting with optional value labels
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install quicklearnkit
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Quick Start
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from quicklearnkit import (
|
|
69
|
+
Sampler,
|
|
70
|
+
train_test_split,
|
|
71
|
+
ProbabilisticImputer,
|
|
72
|
+
bar_plot
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
import seaborn as sns
|
|
76
|
+
|
|
77
|
+
# Load example dataset
|
|
78
|
+
df = sns.load_dataset("titanic")
|
|
79
|
+
|
|
80
|
+
# Sample data
|
|
81
|
+
sampler = Sampler(df, n=5, random_state=42)
|
|
82
|
+
print(sampler.sample())
|
|
83
|
+
|
|
84
|
+
# Split data
|
|
85
|
+
train, test = train_test_split(df, test_size=0.25)
|
|
86
|
+
|
|
87
|
+
# Impute missing values
|
|
88
|
+
imputer = ProbabilisticImputer("pclass", "deck", random_state=42)
|
|
89
|
+
df_imputed = imputer.fit_transform(df)
|
|
90
|
+
|
|
91
|
+
# Plot
|
|
92
|
+
bar_plot(df, x="class", y="fare", show_values="yes")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
# Model Imports
|
|
98
|
+
|
|
99
|
+
QuickLearnKit allows you to import commonly used machine learning models without navigating deep module paths.
|
|
100
|
+
|
|
101
|
+
### Example
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from quicklearnkit import (
|
|
105
|
+
LinearRegressionmodel,
|
|
106
|
+
RandomForestRegressionmodel,
|
|
107
|
+
XGBoostRegressionmodel,
|
|
108
|
+
KNeighborsClassifiermodel,
|
|
109
|
+
GradientBoostingClassifiermodel
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
lr_model = LinearRegressionmodel()
|
|
113
|
+
rf_model = RandomForestRegressionmodel()
|
|
114
|
+
xgb_model = XGBoostRegressionmodel()
|
|
115
|
+
|
|
116
|
+
knn_classifier = KNeighborsClassifiermodel()
|
|
117
|
+
gb_classifier = GradientBoostingClassifiermodel()
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Supported Models
|
|
123
|
+
|
|
124
|
+
### Regression Models
|
|
125
|
+
|
|
126
|
+
* `LinearRegressionmodel()`
|
|
127
|
+
* `KNNRegressionmodel()`
|
|
128
|
+
* `DecisionTreeRegressionmodel()`
|
|
129
|
+
* `RandomForestRegressionmodel()`
|
|
130
|
+
* `GradientBoostingRegressionmodel()`
|
|
131
|
+
* `AdaBoostRegressionmodel()`
|
|
132
|
+
* `XGBoostRegressionmodel()`
|
|
133
|
+
* `ElasticNetRegressionmodel()`
|
|
134
|
+
|
|
135
|
+
### Classification Models
|
|
136
|
+
|
|
137
|
+
* `LogisticRegressionmodel()`
|
|
138
|
+
* `KNeighborsClassifiermodel()`
|
|
139
|
+
* `DecisionTreeClassifiermodel()`
|
|
140
|
+
* `RandomForestClassifiermodel()`
|
|
141
|
+
* `AdaBoostClassifiermodel()`
|
|
142
|
+
* `GradientBoostingClassifiermodel()`
|
|
143
|
+
* `XGBClassifiermodel()`
|
|
144
|
+
* `SVClassifiermodel()`
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
# Utilities
|
|
149
|
+
|
|
150
|
+
## 1. Sampler
|
|
151
|
+
|
|
152
|
+
The `Sampler` class allows you to randomly select elements from:
|
|
153
|
+
|
|
154
|
+
* Python lists
|
|
155
|
+
* NumPy arrays
|
|
156
|
+
* pandas DataFrames
|
|
157
|
+
|
|
158
|
+
It supports both **stateless** (reproducible) and **stateful** (streaming/simulation) modes.
|
|
159
|
+
|
|
160
|
+
### Initialization
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
Sampler(data, n=1, random_state=None, replace=False, axis=0, stateful=False)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Parameters Explained
|
|
167
|
+
|
|
168
|
+
| Parameter | Type | Description |
|
|
169
|
+
| -------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------ |
|
|
170
|
+
| `data` | list, numpy.ndarray, pandas.DataFrame | The dataset to sample from |
|
|
171
|
+
| `n` | int | Number of samples to return |
|
|
172
|
+
| `random_state` | int or None | Seed for reproducibility. Same seed = same result |
|
|
173
|
+
| `replace` | bool | If `True`, sampling is done **with replacement** (duplicates allowed). If `False`, no duplicates |
|
|
174
|
+
| `axis` | int | Only applies to DataFrames. `0` = sample rows, `1` = sample columns |
|
|
175
|
+
| `stateful` | bool | If `True`, RNG state continues across calls. If `False`, sampling is reproducible on every call |
|
|
176
|
+
|
|
177
|
+
### Example
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
from quicklearnkit import Sampler
|
|
181
|
+
import seaborn as sns
|
|
182
|
+
|
|
183
|
+
df = sns.load_dataset("tips")
|
|
184
|
+
|
|
185
|
+
sampler = Sampler(df, n=3, random_state=42, replace=False)
|
|
186
|
+
|
|
187
|
+
sample1 = sampler.sample()
|
|
188
|
+
sample2 = sampler.sample() # Same output if stateful=False
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## 2. Train–Test Split
|
|
194
|
+
|
|
195
|
+
Split datasets into training and testing sets with support for:
|
|
196
|
+
|
|
197
|
+
* Shuffling
|
|
198
|
+
* Stratification
|
|
199
|
+
* NumPy arrays
|
|
200
|
+
* pandas DataFrames
|
|
201
|
+
|
|
202
|
+
### Function
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
train_test_split(data, test_size=0.25, shuffle=True, stratify=None, random_state=None)
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### Parameters Explained
|
|
209
|
+
|
|
210
|
+
| Parameter | Type | Description |
|
|
211
|
+
| -------------- | ------------------------------ | ---------------------------------------------------------- |
|
|
212
|
+
| `data` | array-like or pandas.DataFrame | Dataset to split |
|
|
213
|
+
| `test_size` | float | Proportion of data to use as test set (e.g., `0.25` = 25%) |
|
|
214
|
+
| `shuffle` | bool | If `True`, data is shuffled before splitting |
|
|
215
|
+
| `stratify` | array-like or str | Column or labels to preserve class distribution in splits |
|
|
216
|
+
| `random_state` | int or None | Seed for reproducibility |
|
|
217
|
+
|
|
218
|
+
### Example
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
from quicklearnkit import train_test_split
|
|
222
|
+
|
|
223
|
+
train, test = train_test_split(df, test_size=0.3, shuffle=True, random_state=42)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
---
|
|
227
|
+
|
|
228
|
+
## 3. ProbabilisticImputer
|
|
229
|
+
|
|
230
|
+
A **group-aware, probabilistic categorical imputer**. It learns probability distributions from observed data and fills missing values by sampling from those distributions.
|
|
231
|
+
|
|
232
|
+
This allows:
|
|
233
|
+
|
|
234
|
+
* Realistic missing data handling
|
|
235
|
+
* Teaching probability-based imputation
|
|
236
|
+
* Reproducible preprocessing
|
|
237
|
+
|
|
238
|
+
### Initialization
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
ProbabilisticImputer(group_col, target_col, random_state=None, stateful=False)
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Parameters Explained
|
|
245
|
+
|
|
246
|
+
| Parameter | Type | Description |
|
|
247
|
+
| -------------- | ----------- | ------------------------------------------------------------------------------------------------------ |
|
|
248
|
+
| `group_col` | str | Column used to group data (e.g., class, category) |
|
|
249
|
+
| `target_col` | str | Column where missing values will be imputed |
|
|
250
|
+
| `random_state` | int or None | Seed for reproducibility |
|
|
251
|
+
| `stateful` | bool | If `True`, RNG state advances across calls (useful for simulation). If `False`, output is reproducible |
|
|
252
|
+
|
|
253
|
+
### Methods
|
|
254
|
+
|
|
255
|
+
| Method | Description |
|
|
256
|
+
| ------------------- | -------------------------------------------------- |
|
|
257
|
+
| `fit(df)` | Learns probability distributions from known values |
|
|
258
|
+
| `transform(df)` | Imputes missing values using learned distributions |
|
|
259
|
+
| `fit_transform(df)` | Fit and transform in one step |
|
|
260
|
+
|
|
261
|
+
### Example
|
|
262
|
+
|
|
263
|
+
```python
|
|
264
|
+
from quicklearnkit import ProbabilisticImputer
|
|
265
|
+
import seaborn as sns
|
|
266
|
+
|
|
267
|
+
df = sns.load_dataset("titanic")
|
|
268
|
+
|
|
269
|
+
imputer = ProbabilisticImputer(
|
|
270
|
+
group_col="pclass",
|
|
271
|
+
target_col="deck",
|
|
272
|
+
random_state=42
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
imputed_df = imputer.fit_transform(df)
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
# Plotting (Teaching-Friendly Visualization)
|
|
281
|
+
|
|
282
|
+
QuickLearnKit provides wrappers around **seaborn + matplotlib** that allow students to display values on plots using a simple switch:
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
show_values="yes"
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
All plotting functions:
|
|
289
|
+
|
|
290
|
+
* Return a **matplotlib Axes object**
|
|
291
|
+
* Allow full customization (labels, limits, grids, styles)
|
|
292
|
+
* Automatically display the plot by default
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
## Common Parameters (All Plot Functions)
|
|
297
|
+
|
|
298
|
+
| Parameter | Type | Description |
|
|
299
|
+
| ------------- | ---------------- | -------------------------------------------------------------------------------- |
|
|
300
|
+
| `data` | pandas.DataFrame | Dataset used for plotting |
|
|
301
|
+
| `x` | str | Column for x-axis |
|
|
302
|
+
| `y` | str | Column for y-axis (if applicable) |
|
|
303
|
+
| `title` | str or None | Plot title |
|
|
304
|
+
| `show_values` | str | "yes" or "no" — whether to display numeric values |
|
|
305
|
+
| `fmt` | str | Format string for value labels (e.g. `{:.2f}`) |
|
|
306
|
+
| `show` | bool | If `True`, displays plot immediately. If `False`, returns Axes for customization |
|
|
307
|
+
|
|
308
|
+
---
|
|
309
|
+
|
|
310
|
+
## bar_plot
|
|
311
|
+
|
|
312
|
+
```python
|
|
313
|
+
bar_plot(data, x, y, title=None, show_values="no", fmt="{:.1f}", show=True)
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### Example
|
|
317
|
+
|
|
318
|
+
```python
|
|
319
|
+
from quicklearnkit import bar_plot
|
|
320
|
+
import seaborn as sns
|
|
321
|
+
|
|
322
|
+
_df = sns.load_dataset("tips")
|
|
323
|
+
|
|
324
|
+
ax = bar_plot(
|
|
325
|
+
_df,
|
|
326
|
+
x="day",
|
|
327
|
+
y="total_bill",
|
|
328
|
+
show_values="yes",
|
|
329
|
+
show=False
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
ax.set_xlabel("Day of Week")
|
|
333
|
+
ax.set_ylabel("Average Bill")
|
|
334
|
+
ax.set_ylim(0, 40)
|
|
335
|
+
|
|
336
|
+
import matplotlib.pyplot as plt
|
|
337
|
+
plt.show()
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
---
|
|
341
|
+
|
|
342
|
+
## line_plot
|
|
343
|
+
|
|
344
|
+
```python
|
|
345
|
+
line_plot(data, x, y, title=None, show_values="no", fmt="{:.2f}", show=True)
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
---
|
|
349
|
+
|
|
350
|
+
## scatter_plot
|
|
351
|
+
|
|
352
|
+
```python
|
|
353
|
+
scatter_plot(data, x, y, title=None, show_values="no", fmt="{:.2f}", show=True)
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
---
|
|
357
|
+
|
|
358
|
+
## count_plot
|
|
359
|
+
|
|
360
|
+
```python
|
|
361
|
+
count_plot(data, x, title=None, show_values="no", show=True)
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
---
|
|
365
|
+
|
|
366
|
+
## box_plot
|
|
367
|
+
|
|
368
|
+
Displays **mean values** when `show_values="yes"`.
|
|
369
|
+
|
|
370
|
+
```python
|
|
371
|
+
box_plot(data, x=None, y=None, title=None, show_values="no", fmt="{:.2f}", show=True)
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
---
|
|
375
|
+
|
|
376
|
+
## hist_plot
|
|
377
|
+
|
|
378
|
+
Displays **bin counts** when `show_values="yes"`.
|
|
379
|
+
|
|
380
|
+
```python
|
|
381
|
+
hist_plot(data, x, bins=10, title=None, show_values="no", fmt="{:.0f}", show=True)
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
---
|
|
385
|
+
|
|
386
|
+
# Random Data Generation
|
|
387
|
+
|
|
388
|
+
Generate random numerical arrays for experiments and demonstrations.
|
|
389
|
+
|
|
390
|
+
```python
|
|
391
|
+
from quicklearnkit import create_random
|
|
392
|
+
|
|
393
|
+
random_data = create_random(mean=0, std_dev=1, size=100)
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
---
|
|
397
|
+
|
|
398
|
+
# Pipeline (Notebook → Script Bridge)
|
|
399
|
+
|
|
400
|
+
QuickLearnKit now includes a **Pipeline system** that helps you move from interactive notebook experimentation to clean, structured Python scripts.
|
|
401
|
+
|
|
402
|
+
The goal is to make transitioning from:
|
|
403
|
+
|
|
404
|
+
> 🧪 Exploration in notebooks → 🧾 Reproducible `.py` pipelines
|
|
405
|
+
|
|
406
|
+
simple, explicit, and disciplined.
|
|
407
|
+
|
|
408
|
+
---
|
|
409
|
+
|
|
410
|
+
## Creating a Pipeline
|
|
411
|
+
|
|
412
|
+
```python
|
|
413
|
+
from quicklearnkit import Pipeline
|
|
414
|
+
|
|
415
|
+
pipe = Pipeline()
|
|
416
|
+
```
|
|
417
|
+
|
|
418
|
+
Each `Pipeline()` instance is isolated and independent.
|
|
419
|
+
|
|
420
|
+
---
|
|
421
|
+
|
|
422
|
+
## 1. Manual Commit
|
|
423
|
+
|
|
424
|
+
Commit top-level functions explicitly:
|
|
425
|
+
|
|
426
|
+
```python
|
|
427
|
+
def preprocess(X):
|
|
428
|
+
return X
|
|
429
|
+
|
|
430
|
+
def train(X):
|
|
431
|
+
return "model"
|
|
432
|
+
|
|
433
|
+
pipe.commit(preprocess, outputs=["X_scaled"], stage="Preprocessing")
|
|
434
|
+
pipe.commit(train, inputs=["X_scaled"], outputs=["model"], stage="Training")
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
### Parameters Explained
|
|
438
|
+
|
|
439
|
+
| Parameter | Type | Description |
|
|
440
|
+
| --------- | ----------- | ------------------------------------------------ |
|
|
441
|
+
| `func` | Callable | Top-level function to include in the pipeline |
|
|
442
|
+
| `inputs` | list of str | Expected input variable names (for validation) |
|
|
443
|
+
| `outputs` | list of str | Output variable names produced by the function |
|
|
444
|
+
| `stage` | str | Optional grouping label for script organization |
|
|
445
|
+
| `mode` | str | `"functions"` enables semi-automatic commit mode |
|
|
446
|
+
|
|
447
|
+
---
|
|
448
|
+
|
|
449
|
+
## 2. Semi-Automatic Commit
|
|
450
|
+
|
|
451
|
+
Capture all user-defined top-level functions in the current notebook:
|
|
452
|
+
|
|
453
|
+
```python
|
|
454
|
+
pipe.commit(mode="functions")
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
Semi-auto mode:
|
|
458
|
+
|
|
459
|
+
* Detects user-defined functions
|
|
460
|
+
* Skips built-in functions
|
|
461
|
+
* Skips private functions (`_helper`)
|
|
462
|
+
* Skips nested functions
|
|
463
|
+
* Avoids duplicates
|
|
464
|
+
|
|
465
|
+
Metadata can later be updated:
|
|
466
|
+
|
|
467
|
+
```python
|
|
468
|
+
pipe.commit(train, inputs=["X_scaled"], outputs=["model"])
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
---
|
|
472
|
+
|
|
473
|
+
## 3. Pipeline Summary
|
|
474
|
+
|
|
475
|
+
Inspect committed functions and metadata:
|
|
476
|
+
|
|
477
|
+
```python
|
|
478
|
+
pipe.summary()
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
Example output:
|
|
482
|
+
|
|
483
|
+
```
|
|
484
|
+
[QuickLearn] 📦 Pipeline Summary
|
|
485
|
+
|
|
486
|
+
1. preprocess
|
|
487
|
+
Stage: Preprocessing
|
|
488
|
+
Inputs: []
|
|
489
|
+
Outputs: ['X_scaled']
|
|
490
|
+
|
|
491
|
+
2. train
|
|
492
|
+
Stage: Training
|
|
493
|
+
Inputs: ['X_scaled']
|
|
494
|
+
Outputs: ['model']
|
|
495
|
+
```
|
|
496
|
+
|
|
497
|
+
---
|
|
498
|
+
|
|
499
|
+
## 4. Register Imports
|
|
500
|
+
|
|
501
|
+
QuickLearnKit does **not** automatically capture notebook imports.
|
|
502
|
+
Imports must be registered explicitly to be included in the compiled script.
|
|
503
|
+
|
|
504
|
+
### Multiline string:
|
|
505
|
+
|
|
506
|
+
```python
|
|
507
|
+
pipe.add_import("""
|
|
508
|
+
import pandas as pd
|
|
509
|
+
import numpy as np
|
|
510
|
+
from quicklearnkit import RandomForestClassifiermodel
|
|
511
|
+
""")
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
### List format:
|
|
515
|
+
|
|
516
|
+
```python
|
|
517
|
+
pipe.add_import([
|
|
518
|
+
"import pandas as pd",
|
|
519
|
+
"import numpy as np"
|
|
520
|
+
])
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
Imports are:
|
|
524
|
+
|
|
525
|
+
* Validated
|
|
526
|
+
* Deduplicated
|
|
527
|
+
* Inserted at the top of the generated script
|
|
528
|
+
|
|
529
|
+
---
|
|
530
|
+
|
|
531
|
+
## 5. Dependency Validation
|
|
532
|
+
|
|
533
|
+
If `inputs` and `outputs` metadata are provided, QuickLearnKit can validate logical ordering.
|
|
534
|
+
|
|
535
|
+
```python
|
|
536
|
+
pipe.compile("pipeline.py", validate=True)
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
Validation checks:
|
|
540
|
+
|
|
541
|
+
* Missing inputs
|
|
542
|
+
* Duplicate outputs
|
|
543
|
+
* Incorrect dependency flow
|
|
544
|
+
|
|
545
|
+
### Strict Mode
|
|
546
|
+
|
|
547
|
+
```python
|
|
548
|
+
pipe.compile("pipeline.py", validate="strict")
|
|
549
|
+
```
|
|
550
|
+
|
|
551
|
+
Strict mode raises an error instead of warning.
|
|
552
|
+
|
|
553
|
+
---
|
|
554
|
+
|
|
555
|
+
## 6. Compile to Script
|
|
556
|
+
|
|
557
|
+
Generate a clean Python script:
|
|
558
|
+
|
|
559
|
+
```python
|
|
560
|
+
pipe.compile("pipeline.py")
|
|
561
|
+
```
|
|
562
|
+
|
|
563
|
+
Generated file includes:
|
|
564
|
+
|
|
565
|
+
* Registered imports
|
|
566
|
+
* Stage-based comment grouping
|
|
567
|
+
* Ordered function definitions
|
|
568
|
+
* A clean execution block
|
|
569
|
+
|
|
570
|
+
Example structure:
|
|
571
|
+
|
|
572
|
+
```python
|
|
573
|
+
import pandas as pd
|
|
574
|
+
|
|
575
|
+
# ==============================
|
|
576
|
+
# Preprocessing
|
|
577
|
+
# ==============================
|
|
578
|
+
|
|
579
|
+
def preprocess(X):
|
|
580
|
+
return X
|
|
581
|
+
|
|
582
|
+
# ==============================
|
|
583
|
+
# Training
|
|
584
|
+
# ==============================
|
|
585
|
+
|
|
586
|
+
def train(X):
|
|
587
|
+
return "model"
|
|
588
|
+
|
|
589
|
+
if __name__ == '__main__':
|
|
590
|
+
print('Pipeline ready.')
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
---
|
|
594
|
+
|
|
595
|
+
## 7. Reset Pipeline
|
|
596
|
+
|
|
597
|
+
Clear committed functions and imports:
|
|
598
|
+
|
|
599
|
+
```python
|
|
600
|
+
pipe.reset()
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
This resets:
|
|
604
|
+
|
|
605
|
+
* Committed functions
|
|
606
|
+
* Metadata
|
|
607
|
+
* Imports
|
|
608
|
+
* Compile lock
|
|
609
|
+
|
|
610
|
+
---
|
|
611
|
+
|
|
612
|
+
## Guard Rails
|
|
613
|
+
|
|
614
|
+
To ensure clean compilation, QuickLearnKit prevents committing:
|
|
615
|
+
|
|
616
|
+
* Lambda functions
|
|
617
|
+
* Built-in functions
|
|
618
|
+
* Class methods
|
|
619
|
+
* Nested functions
|
|
620
|
+
* Non-user-defined callables
|
|
621
|
+
|
|
622
|
+
Only top-level Python functions can be committed.
|
|
623
|
+
|
|
624
|
+
---
|
|
625
|
+
|
|
626
|
+
## Example: Full Hybrid Workflow
|
|
627
|
+
|
|
628
|
+
```python
|
|
629
|
+
from quicklearnkit import Pipeline
|
|
630
|
+
|
|
631
|
+
pipe = Pipeline()
|
|
632
|
+
|
|
633
|
+
pipe.add_import("""
|
|
634
|
+
import pandas as pd
|
|
635
|
+
from quicklearnkit import RandomForestClassifiermodel
|
|
636
|
+
""")
|
|
637
|
+
|
|
638
|
+
def preprocess(X):
|
|
639
|
+
return X
|
|
640
|
+
|
|
641
|
+
def train(X):
|
|
642
|
+
model = RandomForestClassifiermodel()
|
|
643
|
+
return model
|
|
644
|
+
|
|
645
|
+
pipe.commit(preprocess, outputs=["X_clean"], stage="Preprocessing")
|
|
646
|
+
pipe.commit(train, inputs=["X_clean"], outputs=["model"], stage="Training")
|
|
647
|
+
|
|
648
|
+
pipe.compile("ml_pipeline.py", validate=True)
|
|
649
|
+
```
|
|
650
|
+
|
|
651
|
+
---
|
|
652
|
+
|
|
653
|
+
This extends QuickLearnKit from a learning utility library into a structured bridge between experimentation and scripting — while keeping full control in the developer’s hands.
|
|
654
|
+
|
|
655
|
+
---
|
|
656
|
+
|
|
657
|
+
If you’d like next, we can:
|
|
658
|
+
|
|
659
|
+
* Add a **Table of Contents** (recommended now that it's growing)
|
|
660
|
+
* Split README into sections for docs hosting
|
|
661
|
+
* Or prepare a clean `CHANGELOG.md` for this release
|
|
662
|
+
|
|
663
|
+
# Contributing
|
|
664
|
+
|
|
665
|
+
Want to improve QuickLearnKit?
|
|
666
|
+
|
|
667
|
+
1. Fork the repository
|
|
668
|
+
2. Create a feature branch
|
|
669
|
+
3. Add tests and documentation
|
|
670
|
+
4. Submit a pull request
|
|
671
|
+
|
|
672
|
+
---
|
|
673
|
+
|
|
674
|
+
# License
|
|
675
|
+
|
|
676
|
+
MIT License
|
|
677
|
+
|
|
678
|
+
---
|
|
679
|
+
|
|
680
|
+
QuickLearnKit helps you move from *learning* to *building* faster — without sacrificing clarity or control. 🚀
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "quicklearnkit"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.0"
|
|
8
8
|
description = "Learning-first machine learning utilities library for simplified imports, sampling, splitting, and probabilistic preprocessing."
|
|
9
9
|
readme = "readme.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -16,7 +16,9 @@ dependencies = [
|
|
|
16
16
|
"numpy",
|
|
17
17
|
"pandas",
|
|
18
18
|
"scikit-learn",
|
|
19
|
-
"xgboost"
|
|
19
|
+
"xgboost",
|
|
20
|
+
"seaborn",
|
|
21
|
+
"matplotlib"
|
|
20
22
|
]
|
|
21
23
|
|
|
22
24
|
[tool.setuptools.packages.find]
|