dragon-ml-toolbox 3.12.6__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- dragon_ml_toolbox-4.1.0.dist-info/METADATA +253 -0
- dragon_ml_toolbox-4.1.0.dist-info/RECORD +30 -0
- ml_tools/ETL_engineering.py +2 -2
- ml_tools/GUI_tools.py +2 -2
- ml_tools/MICE_imputation.py +4 -3
- ml_tools/ML_callbacks.py +8 -4
- ml_tools/ML_evaluation.py +11 -6
- ml_tools/ML_inference.py +131 -0
- ml_tools/ML_trainer.py +17 -8
- ml_tools/PSO_optimization.py +116 -62
- ml_tools/RNN_forecast.py +5 -0
- ml_tools/SQL.py +272 -0
- ml_tools/VIF_factor.py +4 -3
- ml_tools/_logger.py +36 -0
- ml_tools/_pytorch_models.py +1 -1
- ml_tools/_script_info.py +8 -0
- ml_tools/{logger.py → custom_logger.py} +4 -66
- ml_tools/data_exploration.py +2 -66
- ml_tools/datasetmaster.py +3 -2
- ml_tools/ensemble_inference.py +249 -0
- ml_tools/ensemble_learning.py +40 -294
- ml_tools/handle_excel.py +3 -2
- ml_tools/keys.py +13 -2
- ml_tools/path_manager.py +194 -31
- ml_tools/utilities.py +2 -180
- dragon_ml_toolbox-3.12.6.dist-info/METADATA +0 -137
- dragon_ml_toolbox-3.12.6.dist-info/RECORD +0 -26
- ml_tools/ML_tutorial.py +0 -300
- {dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/top_level.txt +0 -0
ml_tools/ML_tutorial.py
DELETED
|
@@ -1,300 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from typing import Literal, Optional, Union
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from .logger import _LOGGER
|
|
5
|
-
from .utilities import make_fullpath, sanitize_filename
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"generate_notebook"
|
|
10
|
-
]
|
|
11
|
-
|
|
12
|
-
def _get_notebook_content(kind: str):
|
|
13
|
-
"""Helper function to generate the cell content for the notebook."""
|
|
14
|
-
|
|
15
|
-
# --- Common Cells ---
|
|
16
|
-
imports_cell = {
|
|
17
|
-
"cell_type": "code",
|
|
18
|
-
"source": [
|
|
19
|
-
"import torch\n",
|
|
20
|
-
"from torch import nn\n",
|
|
21
|
-
"from torch.utils.data import TensorDataset, DataLoader\n",
|
|
22
|
-
"import numpy as np\n",
|
|
23
|
-
"from pathlib import Path\n",
|
|
24
|
-
"\n",
|
|
25
|
-
"# Import from dragon_ml_toolbox\n",
|
|
26
|
-
"from ml_tools.ML_trainer import MyTrainer\n",
|
|
27
|
-
"from ml_tools.ML_callbacks import EarlyStopping, ModelCheckpoint"
|
|
28
|
-
"from ml_tools.keys import LogKeys"
|
|
29
|
-
]
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
device_cell = {
|
|
33
|
-
"cell_type": "code",
|
|
34
|
-
"source": [
|
|
35
|
-
"import torch\\n",
|
|
36
|
-
"if torch.cuda.is_available():\\n",
|
|
37
|
-
" device = 'cuda'\\n",
|
|
38
|
-
"elif torch.backends.mps.is_available():\\n",
|
|
39
|
-
" device = 'mps'\\n",
|
|
40
|
-
"else:\\n",
|
|
41
|
-
" device = 'cpu'\\n",
|
|
42
|
-
"\\n",
|
|
43
|
-
"print(f'Using device: {device}')"
|
|
44
|
-
]
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
model_definition_cell = {
|
|
48
|
-
"cell_type": "markdown",
|
|
49
|
-
"source": [
|
|
50
|
-
"### 3. Define the Model, Criterion, and Optimizer\n",
|
|
51
|
-
"Next, we define a simple neural network for our task. We also need to choose a loss function (`criterion`) and an `optimizer`."
|
|
52
|
-
]
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
callbacks_cell = {
|
|
56
|
-
"cell_type": "code",
|
|
57
|
-
"source": [
|
|
58
|
-
"# Define callbacks for training\n",
|
|
59
|
-
"model_filepath = 'best_model.pth'\n",
|
|
60
|
-
"monitor_metric = LogKeys.VAL_LOSS\n",
|
|
61
|
-
"\n",
|
|
62
|
-
"model_checkpoint = ModelCheckpoint(\n",
|
|
63
|
-
" filepath=model_filepath, \n",
|
|
64
|
-
" save_best_only=True, \n",
|
|
65
|
-
" monitor=monitor_metric, \n",
|
|
66
|
-
" mode='min'\n",
|
|
67
|
-
")\n",
|
|
68
|
-
"\n",
|
|
69
|
-
"early_stopping = EarlyStopping(\n",
|
|
70
|
-
" patience=10, \n",
|
|
71
|
-
" monitor=monitor_metric, \n",
|
|
72
|
-
" mode='min'\n",
|
|
73
|
-
")"
|
|
74
|
-
]
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
trainer_instantiation_cell = {
|
|
78
|
-
"cell_type": "code",
|
|
79
|
-
"source": [
|
|
80
|
-
"trainer = MyTrainer(\n",
|
|
81
|
-
" model=model,\n",
|
|
82
|
-
" train_dataset=train_dataset,\n",
|
|
83
|
-
" test_dataset=test_dataset,\n",
|
|
84
|
-
f" kind='{kind}',\n",
|
|
85
|
-
" criterion=criterion,\n",
|
|
86
|
-
" optimizer=optimizer,\n",
|
|
87
|
-
" device=device,\\n",
|
|
88
|
-
" callbacks=[model_checkpoint, early_stopping]\n",
|
|
89
|
-
")"
|
|
90
|
-
]
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
fit_cell = {
|
|
94
|
-
"cell_type": "code",
|
|
95
|
-
"source": [
|
|
96
|
-
"history = trainer.fit(epochs=100, batch_size=16)"
|
|
97
|
-
]
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
evaluation_cell = {
|
|
101
|
-
"cell_type": "code",
|
|
102
|
-
"source": [
|
|
103
|
-
"save_dir = Path('tutorial_results')\n",
|
|
104
|
-
"\n",
|
|
105
|
-
"# The evaluate method will automatically use the test_loader.\n",
|
|
106
|
-
"# First, we load the best weights saved by ModelCheckpoint.\n",
|
|
107
|
-
"model_path = Path(model_filepath)\n",
|
|
108
|
-
"if model_path.exists():\n",
|
|
109
|
-
" print(f'Loading best model from {model_path}')\n",
|
|
110
|
-
" trainer.model.load_state_dict(torch.load(model_path))\n",
|
|
111
|
-
"\n",
|
|
112
|
-
"print('\\n--- Evaluating Model ---')\n",
|
|
113
|
-
"# All evaluation artifacts will be saved in the 'evaluation' subdirectory.\n",
|
|
114
|
-
"trainer.evaluate(save_dir=save_dir / 'evaluation')"
|
|
115
|
-
]
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
explanation_cell = {
|
|
119
|
-
"cell_type": "code",
|
|
120
|
-
"source": [
|
|
121
|
-
"print('\\n--- Explaining Model ---')\n",
|
|
122
|
-
"# We can also generate SHAP plots to explain the model's predictions.\n",
|
|
123
|
-
"# All SHAP artifacts will be saved in the 'explanation' subdirectory.\n",
|
|
124
|
-
"trainer.explain(\n",
|
|
125
|
-
" background_loader=trainer.train_loader,\n",
|
|
126
|
-
" explain_loader=trainer.test_loader,\n",
|
|
127
|
-
" save_dir=save_dir / 'explanation'\n",
|
|
128
|
-
")"
|
|
129
|
-
]
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
# --- Task-Specific Cells ---
|
|
134
|
-
if kind == 'classification':
|
|
135
|
-
title = "Classification Tutorial"
|
|
136
|
-
data_prep_source = [
|
|
137
|
-
"### 2. Prepare the Data\n",
|
|
138
|
-
"For this example, we'll generate some simple, linearly separable mock data for a binary classification task. We'll then wrap it in PyTorch `TensorDataset` objects."
|
|
139
|
-
]
|
|
140
|
-
data_creation_source = [
|
|
141
|
-
"from sklearn.datasets import make_classification\n",
|
|
142
|
-
"from sklearn.model_selection import train_test_split\n",
|
|
143
|
-
"\n",
|
|
144
|
-
"X, y = make_classification(n_samples=200, n_features=10, n_informative=5, n_redundant=0, random_state=42)\n",
|
|
145
|
-
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|
146
|
-
"\n",
|
|
147
|
-
"# Convert to PyTorch tensors\n",
|
|
148
|
-
"X_train = torch.FloatTensor(X_train)\n",
|
|
149
|
-
"y_train = torch.LongTensor(y_train)\n",
|
|
150
|
-
"X_test = torch.FloatTensor(X_test)\n",
|
|
151
|
-
"y_test = torch.LongTensor(y_test)\n",
|
|
152
|
-
"\n",
|
|
153
|
-
"# Create TensorDatasets\n",
|
|
154
|
-
"train_dataset = TensorDataset(X_train, y_train)\n",
|
|
155
|
-
"test_dataset = TensorDataset(X_test, y_test)"
|
|
156
|
-
]
|
|
157
|
-
model_creation_source = [
|
|
158
|
-
"class SimpleClassifier(nn.Module):\n",
|
|
159
|
-
" def __init__(self, input_features, num_classes):\n",
|
|
160
|
-
" super().__init__()\n",
|
|
161
|
-
" self.layer_1 = nn.Linear(input_features, 32)\n",
|
|
162
|
-
" self.layer_2 = nn.Linear(32, num_classes)\n",
|
|
163
|
-
" self.relu = nn.ReLU()\n",
|
|
164
|
-
" \n",
|
|
165
|
-
" def forward(self, x):\n",
|
|
166
|
-
" return self.layer_2(self.relu(self.layer_1(x)))\n",
|
|
167
|
-
"\n",
|
|
168
|
-
"model = SimpleClassifier(input_features=10, num_classes=2)\n",
|
|
169
|
-
"criterion = nn.CrossEntropyLoss()\n",
|
|
170
|
-
"optimizer = torch.optim.Adam(model.parameters(), lr=0.001)"
|
|
171
|
-
]
|
|
172
|
-
|
|
173
|
-
elif kind == 'regression':
|
|
174
|
-
title = "Regression Tutorial"
|
|
175
|
-
data_prep_source = [
|
|
176
|
-
"### 2. Prepare the Data\n",
|
|
177
|
-
"For this example, we'll generate some simple mock data for a regression task. We'll then wrap it in PyTorch `TensorDataset` objects."
|
|
178
|
-
]
|
|
179
|
-
data_creation_source = [
|
|
180
|
-
"from sklearn.datasets import make_regression\n",
|
|
181
|
-
"from sklearn.model_selection import train_test_split\n",
|
|
182
|
-
"\n",
|
|
183
|
-
"X, y = make_regression(n_samples=200, n_features=5, noise=15, random_state=42)\n",
|
|
184
|
-
"y = y.reshape(-1, 1) # Reshape for compatibility with MSELoss\n",
|
|
185
|
-
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|
186
|
-
"\n",
|
|
187
|
-
"# Convert to PyTorch tensors\n",
|
|
188
|
-
"X_train = torch.FloatTensor(X_train)\n",
|
|
189
|
-
"y_train = torch.FloatTensor(y_train)\n",
|
|
190
|
-
"X_test = torch.FloatTensor(X_test)\n",
|
|
191
|
-
"y_test = torch.FloatTensor(y_test)\n",
|
|
192
|
-
"\n",
|
|
193
|
-
"# Create TensorDatasets\n",
|
|
194
|
-
"train_dataset = TensorDataset(X_train, y_train)\n",
|
|
195
|
-
"test_dataset = TensorDataset(X_test, y_test)"
|
|
196
|
-
]
|
|
197
|
-
model_creation_source = [
|
|
198
|
-
"class SimpleRegressor(nn.Module):\n",
|
|
199
|
-
" def __init__(self, input_features, output_features):\n",
|
|
200
|
-
" super().__init__()\n",
|
|
201
|
-
" self.layer_1 = nn.Linear(input_features, 32)\n",
|
|
202
|
-
" self.layer_2 = nn.Linear(32, output_features)\n",
|
|
203
|
-
" self.relu = nn.ReLU()\n",
|
|
204
|
-
" \n",
|
|
205
|
-
" def forward(self, x):\n",
|
|
206
|
-
" return self.layer_2(self.relu(self.layer_1(x)))\n",
|
|
207
|
-
"\n",
|
|
208
|
-
"model = SimpleRegressor(input_features=5, output_features=1)\n",
|
|
209
|
-
"criterion = nn.MSELoss()\n",
|
|
210
|
-
"optimizer = torch.optim.Adam(model.parameters(), lr=0.001)"
|
|
211
|
-
]
|
|
212
|
-
else:
|
|
213
|
-
raise ValueError("kind must be 'classification' or 'regression'")
|
|
214
|
-
|
|
215
|
-
# --- Assemble Notebook ---
|
|
216
|
-
cells = [
|
|
217
|
-
{"cell_type": "markdown", "source": [f"# Dragon ML Toolbox - {title}\n", "This notebook demonstrates how to use the `MyTrainer` class for a complete training and evaluation workflow."]},
|
|
218
|
-
{"cell_type": "markdown", "source": ["### 1. Imports\n", "First, let's import all the necessary components."]},
|
|
219
|
-
imports_cell,
|
|
220
|
-
{"cell_type": "markdown", "source": data_prep_source},
|
|
221
|
-
{"cell_type": "code", "source": data_creation_source},
|
|
222
|
-
model_definition_cell,
|
|
223
|
-
{"cell_type": "code", "source": model_creation_source},
|
|
224
|
-
{"cell_type": "markdown", "source": ["### 4. Configure Callbacks\n", "We'll set up `ModelCheckpoint` to save the best model and `EarlyStopping` to prevent overfitting."]},
|
|
225
|
-
callbacks_cell,
|
|
226
|
-
{"cell_type": "markdown", "source": ["### 5. Initialize the Trainer\\n", "First, we'll determine the best device to run on. Then, we can instantiate `MyTrainer` with all our components."]},
|
|
227
|
-
device_cell,
|
|
228
|
-
trainer_instantiation_cell,
|
|
229
|
-
{"cell_type": "markdown", "source": ["### 6. Train the Model\n", "Call the `.fit()` method to start training."]},
|
|
230
|
-
fit_cell,
|
|
231
|
-
{"cell_type": "markdown", "source": ["### 7. Evaluate the Model\n", "Finally, call the `.evaluate()` method to see the performance report and save all plots and metrics."]},
|
|
232
|
-
evaluation_cell,
|
|
233
|
-
{"cell_type": "markdown", "source": ["### 8. Explain the Model\n", "We can also use the `.explain()` method to generate and save SHAP plots for model interpretability."]},
|
|
234
|
-
explanation_cell,
|
|
235
|
-
]
|
|
236
|
-
|
|
237
|
-
# Add execution counts to code cells
|
|
238
|
-
for cell in cells:
|
|
239
|
-
if cell['cell_type'] == 'code':
|
|
240
|
-
cell['execution_count'] = None
|
|
241
|
-
cell['metadata'] = {}
|
|
242
|
-
cell['outputs'] = []
|
|
243
|
-
|
|
244
|
-
return cells
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
def generate_notebook(kind: Literal['classification', 'regression'] = 'classification', filepath: Optional[Union[str,Path]] = None):
|
|
248
|
-
"""
|
|
249
|
-
Generates a tutorial Jupyter Notebook (.ipynb) file.
|
|
250
|
-
|
|
251
|
-
This function creates a complete, runnable notebook with mock data,
|
|
252
|
-
a simple model, and a full training/evaluation cycle using MyTrainer.
|
|
253
|
-
|
|
254
|
-
Args:
|
|
255
|
-
kind (str): The type of tutorial to generate, either 'classification' or 'regression'.
|
|
256
|
-
filepath (str | Path | None): The path to save the notebook file.
|
|
257
|
-
If None, defaults to 'classification_tutorial.ipynb' or
|
|
258
|
-
'regression_tutorial.ipynb' in the current directory.
|
|
259
|
-
"""
|
|
260
|
-
if kind not in ["classification", "regression"]:
|
|
261
|
-
raise ValueError("kind must be 'classification' or 'regression'")
|
|
262
|
-
|
|
263
|
-
if filepath is None:
|
|
264
|
-
sanitized_filepath = f"{kind}_tutorial.ipynb"
|
|
265
|
-
else:
|
|
266
|
-
sanitized_filepath = sanitize_filename(str(filepath))
|
|
267
|
-
|
|
268
|
-
# check suffix
|
|
269
|
-
if not sanitized_filepath.endswith(".ipynb"):
|
|
270
|
-
sanitized_filepath = sanitized_filepath + ".ipynb"
|
|
271
|
-
|
|
272
|
-
new_filepath = make_fullpath(sanitized_filepath, make=True)
|
|
273
|
-
|
|
274
|
-
_LOGGER.info(f"Generating {kind} tutorial notebook at: {filepath}")
|
|
275
|
-
|
|
276
|
-
cells = _get_notebook_content(kind)
|
|
277
|
-
|
|
278
|
-
notebook = {
|
|
279
|
-
"cells": cells,
|
|
280
|
-
"metadata": {
|
|
281
|
-
"kernelspec": {
|
|
282
|
-
"display_name": "Python 3",
|
|
283
|
-
"language": "python",
|
|
284
|
-
"name": "python3"
|
|
285
|
-
},
|
|
286
|
-
"language_info": {
|
|
287
|
-
"name": "python",
|
|
288
|
-
"version": "3.10.0"
|
|
289
|
-
}
|
|
290
|
-
},
|
|
291
|
-
"nbformat": 4,
|
|
292
|
-
"nbformat_minor": 2
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
try:
|
|
296
|
-
with open(new_filepath, 'w') as f:
|
|
297
|
-
json.dump(notebook, f, indent=2)
|
|
298
|
-
_LOGGER.info("Notebook generated successfully.")
|
|
299
|
-
except Exception as e:
|
|
300
|
-
_LOGGER.error(f"Error generating notebook: {e}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|