dragon-ml-toolbox 6.4.1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-6.4.1.dist-info → dragon_ml_toolbox-8.0.0.dist-info}/METADATA +4 -1
- {dragon_ml_toolbox-6.4.1.dist-info → dragon_ml_toolbox-8.0.0.dist-info}/RECORD +14 -11
- ml_tools/ML_datasetmaster.py +285 -438
- ml_tools/ML_evaluation.py +119 -51
- ml_tools/ML_evaluation_multi.py +296 -0
- ml_tools/ML_inference.py +251 -31
- ml_tools/ML_models.py +468 -47
- ml_tools/ML_scaler.py +197 -0
- ml_tools/ML_trainer.py +246 -73
- ml_tools/_ML_optimization_multi.py +231 -0
- {dragon_ml_toolbox-6.4.1.dist-info → dragon_ml_toolbox-8.0.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-6.4.1.dist-info → dragon_ml_toolbox-8.0.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-6.4.1.dist-info → dragon_ml_toolbox-8.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-6.4.1.dist-info → dragon_ml_toolbox-8.0.0.dist-info}/top_level.txt +0 -0
ml_tools/ML_inference.py
CHANGED
|
@@ -3,7 +3,9 @@ from torch import nn
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Union, Literal, Dict, Any, Optional
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
6
7
|
|
|
8
|
+
from .ML_scaler import PytorchScaler
|
|
7
9
|
from ._script_info import _script_info
|
|
8
10
|
from ._logger import _LOGGER
|
|
9
11
|
from .path_manager import make_fullpath
|
|
@@ -11,35 +13,44 @@ from .keys import PyTorchInferenceKeys
|
|
|
11
13
|
|
|
12
14
|
__all__ = [
|
|
13
15
|
"PyTorchInferenceHandler",
|
|
16
|
+
"PyTorchInferenceHandlerMulti",
|
|
14
17
|
"multi_inference_regression",
|
|
15
18
|
"multi_inference_classification"
|
|
16
19
|
]
|
|
17
20
|
|
|
18
|
-
|
|
21
|
+
|
|
22
|
+
class _BaseInferenceHandler(ABC):
|
|
19
23
|
"""
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
Abstract base class for PyTorch inference handlers.
|
|
25
|
+
|
|
26
|
+
Manages common tasks like loading a model's state dictionary, validating
|
|
27
|
+
the target device, and preprocessing input features.
|
|
22
28
|
"""
|
|
23
29
|
def __init__(self,
|
|
24
30
|
model: nn.Module,
|
|
25
31
|
state_dict: Union[str, Path],
|
|
26
|
-
task: Literal["classification", "regression"],
|
|
27
32
|
device: str = 'cpu',
|
|
28
|
-
|
|
33
|
+
scaler: Optional[Union[PytorchScaler, str, Path]] = None):
|
|
29
34
|
"""
|
|
30
|
-
Initializes the handler
|
|
35
|
+
Initializes the handler.
|
|
31
36
|
|
|
32
37
|
Args:
|
|
33
|
-
model (nn.Module): An instantiated PyTorch model
|
|
34
|
-
state_dict (str | Path):
|
|
35
|
-
task (str): The type of task, 'regression' or 'classification'.
|
|
38
|
+
model (nn.Module): An instantiated PyTorch model.
|
|
39
|
+
state_dict (str | Path): Path to the saved .pth model state_dict file.
|
|
36
40
|
device (str): The device to run inference on ('cpu', 'cuda', 'mps').
|
|
37
|
-
|
|
41
|
+
scaler (PytorchScaler | str | Path | None): An optional scaler or path to a saved scaler state.
|
|
38
42
|
"""
|
|
39
43
|
self.model = model
|
|
40
|
-
self.task = task
|
|
41
44
|
self.device = self._validate_device(device)
|
|
42
|
-
|
|
45
|
+
|
|
46
|
+
# Load the scaler if a path is provided
|
|
47
|
+
if scaler is not None:
|
|
48
|
+
if isinstance(scaler, (str, Path)):
|
|
49
|
+
self.scaler = PytorchScaler.load(scaler)
|
|
50
|
+
else:
|
|
51
|
+
self.scaler = scaler
|
|
52
|
+
else:
|
|
53
|
+
self.scaler = None
|
|
43
54
|
|
|
44
55
|
model_p = make_fullpath(state_dict, enforce="file")
|
|
45
56
|
|
|
@@ -60,53 +71,116 @@ class PyTorchInferenceHandler:
|
|
|
60
71
|
_LOGGER.warning("⚠️ CUDA not available, switching to CPU.")
|
|
61
72
|
device_lower = "cpu"
|
|
62
73
|
elif device_lower == "mps" and not torch.backends.mps.is_available():
|
|
74
|
+
# Your M-series Mac will appreciate this check!
|
|
63
75
|
_LOGGER.warning("⚠️ Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
|
|
64
76
|
device_lower = "cpu"
|
|
65
77
|
return torch.device(device_lower)
|
|
66
78
|
|
|
67
79
|
def _preprocess_input(self, features: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
|
|
68
|
-
"""
|
|
80
|
+
"""
|
|
81
|
+
Converts input to a torch.Tensor, applies scaling if a scaler is
|
|
82
|
+
present, and moves it to the correct device.
|
|
83
|
+
"""
|
|
69
84
|
if isinstance(features, np.ndarray):
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
85
|
+
features_tensor = torch.from_numpy(features).float()
|
|
86
|
+
else:
|
|
87
|
+
features_tensor = features.float()
|
|
88
|
+
|
|
89
|
+
if self.scaler:
|
|
90
|
+
features_tensor = self.scaler.transform(features_tensor)
|
|
91
|
+
|
|
92
|
+
return features_tensor.to(self.device)
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
|
|
96
|
+
"""Core batch prediction method. Must be implemented by subclasses."""
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
|
|
101
|
+
"""Core single-sample prediction method. Must be implemented by subclasses."""
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class PyTorchInferenceHandler(_BaseInferenceHandler):
|
|
106
|
+
"""
|
|
107
|
+
Handles loading a PyTorch model's state dictionary and performing inference
|
|
108
|
+
for single-target regression or classification tasks.
|
|
109
|
+
"""
|
|
110
|
+
def __init__(self,
|
|
111
|
+
model: nn.Module,
|
|
112
|
+
state_dict: Union[str, Path],
|
|
113
|
+
task: Literal["classification", "regression"],
|
|
114
|
+
device: str = 'cpu',
|
|
115
|
+
target_id: Optional[str] = None,
|
|
116
|
+
scaler: Optional[Union[PytorchScaler, str, Path]] = None):
|
|
117
|
+
"""
|
|
118
|
+
Initializes the handler for single-target tasks.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
model (nn.Module): An instantiated PyTorch model architecture.
|
|
122
|
+
state_dict (str | Path): Path to the saved .pth model state_dict file.
|
|
123
|
+
task (str): The type of task, 'regression' or 'classification'.
|
|
124
|
+
device (str): The device to run inference on ('cpu', 'cuda', 'mps').
|
|
125
|
+
target_id (str | None): An optional identifier for the target.
|
|
126
|
+
scaler (PytorchScaler | str | Path | None): A PytorchScaler instance or the file path to a saved PytorchScaler state.
|
|
127
|
+
"""
|
|
128
|
+
# Call the parent constructor to handle model loading, device, and scaler
|
|
129
|
+
super().__init__(model, state_dict, device, scaler)
|
|
130
|
+
|
|
131
|
+
if task not in ["classification", "regression"]:
|
|
132
|
+
raise ValueError("`task` must be 'classification' or 'regression'.")
|
|
133
|
+
self.task = task
|
|
134
|
+
self.target_id = target_id
|
|
135
|
+
|
|
75
136
|
def predict_batch(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
|
|
76
137
|
"""
|
|
77
|
-
Core batch prediction method
|
|
138
|
+
Core batch prediction method for single-target models.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
features (np.ndarray | torch.Tensor): A 2D array/tensor of input features.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
A dictionary containing the raw output tensors from the model.
|
|
78
145
|
"""
|
|
79
146
|
if features.ndim != 2:
|
|
80
147
|
raise ValueError("Input for batch prediction must be a 2D array or tensor.")
|
|
81
148
|
|
|
82
149
|
input_tensor = self._preprocess_input(features)
|
|
83
|
-
|
|
150
|
+
|
|
84
151
|
with torch.no_grad():
|
|
85
|
-
# Output tensor remains on the model's device (e.g., 'mps' or 'cuda')
|
|
86
152
|
output = self.model(input_tensor)
|
|
87
153
|
|
|
88
154
|
if self.task == "classification":
|
|
89
|
-
probs =
|
|
155
|
+
probs = torch.softmax(output, dim=1)
|
|
90
156
|
labels = torch.argmax(probs, dim=1)
|
|
91
157
|
return {
|
|
92
158
|
PyTorchInferenceKeys.LABELS: labels,
|
|
93
159
|
PyTorchInferenceKeys.PROBABILITIES: probs
|
|
94
160
|
}
|
|
95
161
|
else: # regression
|
|
96
|
-
|
|
162
|
+
# For single-target regression, ensure output is flattened
|
|
163
|
+
return {PyTorchInferenceKeys.PREDICTIONS: output.flatten()}
|
|
97
164
|
|
|
98
165
|
def predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, torch.Tensor]:
|
|
99
166
|
"""
|
|
100
|
-
Core single-sample prediction
|
|
167
|
+
Core single-sample prediction method for single-target models.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
features (np.ndarray | torch.Tensor): A 1D array/tensor of input features.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
A dictionary containing the raw output tensors for a single sample.
|
|
101
174
|
"""
|
|
102
175
|
if features.ndim == 1:
|
|
103
|
-
features = features.reshape(1, -1)
|
|
104
|
-
|
|
176
|
+
features = features.reshape(1, -1) # Reshape to a batch of one
|
|
177
|
+
|
|
105
178
|
if features.shape[0] != 1:
|
|
106
179
|
raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
|
|
107
180
|
|
|
108
181
|
batch_results = self.predict_batch(features)
|
|
109
|
-
|
|
182
|
+
|
|
183
|
+
# Extract the first (and only) result from the batch output
|
|
110
184
|
single_results = {key: value[0] for key, value in batch_results.items()}
|
|
111
185
|
return single_results
|
|
112
186
|
|
|
@@ -117,7 +191,6 @@ class PyTorchInferenceHandler:
|
|
|
117
191
|
Convenience wrapper for predict_batch that returns NumPy arrays.
|
|
118
192
|
"""
|
|
119
193
|
tensor_results = self.predict_batch(features)
|
|
120
|
-
# Move tensor to CPU before converting to NumPy
|
|
121
194
|
numpy_results = {key: value.cpu().numpy() for key, value in tensor_results.items()}
|
|
122
195
|
return numpy_results
|
|
123
196
|
|
|
@@ -126,16 +199,163 @@ class PyTorchInferenceHandler:
|
|
|
126
199
|
Convenience wrapper for predict that returns NumPy arrays or scalars.
|
|
127
200
|
"""
|
|
128
201
|
tensor_results = self.predict(features)
|
|
129
|
-
|
|
202
|
+
|
|
130
203
|
if self.task == "regression":
|
|
131
|
-
# .item() implicitly moves to CPU
|
|
204
|
+
# .item() implicitly moves to CPU and returns a Python scalar
|
|
132
205
|
return {PyTorchInferenceKeys.PREDICTIONS: tensor_results[PyTorchInferenceKeys.PREDICTIONS].item()}
|
|
133
206
|
else: # classification
|
|
134
207
|
return {
|
|
135
208
|
PyTorchInferenceKeys.LABELS: tensor_results[PyTorchInferenceKeys.LABELS].item(),
|
|
136
|
-
# Move tensor to CPU before converting to NumPy
|
|
137
209
|
PyTorchInferenceKeys.PROBABILITIES: tensor_results[PyTorchInferenceKeys.PROBABILITIES].cpu().numpy()
|
|
138
210
|
}
|
|
211
|
+
|
|
212
|
+
def quick_predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
|
|
213
|
+
"""
|
|
214
|
+
Convenience wrapper to get the mapping {target_name: prediction} or {target_name: label}
|
|
215
|
+
|
|
216
|
+
`target_id` must be implemented.
|
|
217
|
+
"""
|
|
218
|
+
if self.target_id is None:
|
|
219
|
+
raise AttributeError(f"'target_id' has not been implemented.")
|
|
220
|
+
|
|
221
|
+
if self.task == "regression":
|
|
222
|
+
result = self.predict_numpy(features)[PyTorchInferenceKeys.PREDICTIONS]
|
|
223
|
+
else:
|
|
224
|
+
result = self.predict_numpy(features)[PyTorchInferenceKeys.LABELS]
|
|
225
|
+
|
|
226
|
+
return {self.target_id: result}
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class PyTorchInferenceHandlerMulti(_BaseInferenceHandler):
|
|
230
|
+
"""
|
|
231
|
+
Handles loading a PyTorch model's state dictionary and performing inference
|
|
232
|
+
for multi-target regression or multi-label classification tasks.
|
|
233
|
+
"""
|
|
234
|
+
def __init__(self,
|
|
235
|
+
model: nn.Module,
|
|
236
|
+
state_dict: Union[str, Path],
|
|
237
|
+
task: Literal["multi_target_regression", "multi_label_classification"],
|
|
238
|
+
device: str = 'cpu',
|
|
239
|
+
target_ids: Optional[list[str]] = None,
|
|
240
|
+
scaler: Optional[Union[PytorchScaler, str, Path]] = None):
|
|
241
|
+
"""
|
|
242
|
+
Initializes the handler for multi-target tasks.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
model (nn.Module): An instantiated PyTorch model.
|
|
246
|
+
state_dict (str | Path): Path to the saved .pth model state_dict file.
|
|
247
|
+
task (str): The type of task, 'multi_target_regression' or 'multi_label_classification'.
|
|
248
|
+
device (str): The device to run inference on ('cpu', 'cuda', 'mps').
|
|
249
|
+
target_ids (list[str] | None): An optional identifier for the targets.
|
|
250
|
+
scaler (PytorchScaler | str | Path | None): A PytorchScaler instance or the file path to a saved PytorchScaler state.
|
|
251
|
+
"""
|
|
252
|
+
super().__init__(model, state_dict, device, scaler)
|
|
253
|
+
|
|
254
|
+
if task not in ["multi_target_regression", "multi_label_classification"]:
|
|
255
|
+
raise ValueError("`task` must be 'multi_target_regression' or 'multi_label_classification'.")
|
|
256
|
+
self.task = task
|
|
257
|
+
self.target_ids = target_ids
|
|
258
|
+
|
|
259
|
+
def predict_batch(self,
|
|
260
|
+
features: Union[np.ndarray, torch.Tensor],
|
|
261
|
+
classification_threshold: float = 0.5
|
|
262
|
+
) -> Dict[str, torch.Tensor]:
|
|
263
|
+
"""
|
|
264
|
+
Core batch prediction method for multi-target models.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
features (np.ndarray | torch.Tensor): A 2D array/tensor of input features.
|
|
268
|
+
classification_threshold (float): The threshold to convert probabilities
|
|
269
|
+
into binary predictions for multi-label classification.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
A dictionary containing the raw output tensors from the model.
|
|
273
|
+
"""
|
|
274
|
+
if features.ndim != 2:
|
|
275
|
+
raise ValueError("Input for batch prediction must be a 2D array or tensor.")
|
|
276
|
+
|
|
277
|
+
input_tensor = self._preprocess_input(features)
|
|
278
|
+
|
|
279
|
+
with torch.no_grad():
|
|
280
|
+
output = self.model(input_tensor)
|
|
281
|
+
|
|
282
|
+
if self.task == "multi_label_classification":
|
|
283
|
+
probs = torch.sigmoid(output)
|
|
284
|
+
# Get binary predictions based on the threshold
|
|
285
|
+
labels = (probs >= classification_threshold).int()
|
|
286
|
+
return {
|
|
287
|
+
PyTorchInferenceKeys.LABELS: labels,
|
|
288
|
+
PyTorchInferenceKeys.PROBABILITIES: probs
|
|
289
|
+
}
|
|
290
|
+
else: # multi_target_regression
|
|
291
|
+
# The output is already in the correct [batch_size, n_targets] shape
|
|
292
|
+
return {PyTorchInferenceKeys.PREDICTIONS: output}
|
|
293
|
+
|
|
294
|
+
def predict(self,
|
|
295
|
+
features: Union[np.ndarray, torch.Tensor],
|
|
296
|
+
classification_threshold: float = 0.5
|
|
297
|
+
) -> Dict[str, torch.Tensor]:
|
|
298
|
+
"""
|
|
299
|
+
Core single-sample prediction method for multi-target models.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
features (np.ndarray | torch.Tensor): A 1D array/tensor of input features.
|
|
303
|
+
classification_threshold (float): The threshold for multi-label tasks.
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
A dictionary containing the raw output tensors for a single sample.
|
|
307
|
+
"""
|
|
308
|
+
if features.ndim == 1:
|
|
309
|
+
features = features.reshape(1, -1)
|
|
310
|
+
|
|
311
|
+
if features.shape[0] != 1:
|
|
312
|
+
raise ValueError("The predict() method is for a single sample. Use predict_batch() for multiple samples.")
|
|
313
|
+
|
|
314
|
+
batch_results = self.predict_batch(features, classification_threshold)
|
|
315
|
+
|
|
316
|
+
single_results = {key: value[0] for key, value in batch_results.items()}
|
|
317
|
+
return single_results
|
|
318
|
+
|
|
319
|
+
# --- NumPy Convenience Wrappers (on CPU) ---
|
|
320
|
+
|
|
321
|
+
def predict_batch_numpy(self,
|
|
322
|
+
features: Union[np.ndarray, torch.Tensor],
|
|
323
|
+
classification_threshold: float = 0.5
|
|
324
|
+
) -> Dict[str, np.ndarray]:
|
|
325
|
+
"""
|
|
326
|
+
Convenience wrapper for predict_batch that returns NumPy arrays.
|
|
327
|
+
"""
|
|
328
|
+
tensor_results = self.predict_batch(features, classification_threshold)
|
|
329
|
+
numpy_results = {key: value.cpu().numpy() for key, value in tensor_results.items()}
|
|
330
|
+
return numpy_results
|
|
331
|
+
|
|
332
|
+
def predict_numpy(self,
|
|
333
|
+
features: Union[np.ndarray, torch.Tensor],
|
|
334
|
+
classification_threshold: float = 0.5
|
|
335
|
+
) -> Dict[str, np.ndarray]:
|
|
336
|
+
"""
|
|
337
|
+
Convenience wrapper for predict that returns NumPy arrays for a single sample.
|
|
338
|
+
Note: For multi-target models, the output is always an array.
|
|
339
|
+
"""
|
|
340
|
+
tensor_results = self.predict(features, classification_threshold)
|
|
341
|
+
numpy_results = {key: value.cpu().numpy() for key, value in tensor_results.items()}
|
|
342
|
+
return numpy_results
|
|
343
|
+
|
|
344
|
+
def quick_predict(self, features: Union[np.ndarray, torch.Tensor]) -> Dict[str, Any]:
|
|
345
|
+
"""
|
|
346
|
+
Convenience wrapper to get the mapping {target_name: prediction} or {target_name: label}
|
|
347
|
+
|
|
348
|
+
`target_ids` must be implemented.
|
|
349
|
+
"""
|
|
350
|
+
if self.target_ids is None:
|
|
351
|
+
raise AttributeError(f"'target_id' has not been implemented.")
|
|
352
|
+
|
|
353
|
+
if self.task == "multi_target_regression":
|
|
354
|
+
result = self.predict_numpy(features)[PyTorchInferenceKeys.PREDICTIONS].flatten().tolist()
|
|
355
|
+
else:
|
|
356
|
+
result = self.predict_numpy(features)[PyTorchInferenceKeys.LABELS].flatten().tolist()
|
|
357
|
+
|
|
358
|
+
return {key: value for key, value in zip(self.target_ids, result)}
|
|
139
359
|
|
|
140
360
|
|
|
141
361
|
def multi_inference_regression(handlers: list[PyTorchInferenceHandler],
|