dragon-ml-toolbox 14.7.0__py3-none-any.whl → 16.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/METADATA +9 -5
- dragon_ml_toolbox-16.2.1.dist-info/RECORD +51 -0
- ml_tools/ETL_cleaning.py +20 -20
- ml_tools/ETL_engineering.py +23 -25
- ml_tools/GUI_tools.py +20 -20
- ml_tools/MICE_imputation.py +3 -3
- ml_tools/ML_callbacks.py +43 -26
- ml_tools/ML_configuration.py +726 -32
- ml_tools/ML_datasetmaster.py +235 -280
- ml_tools/ML_evaluation.py +160 -42
- ml_tools/ML_evaluation_multi.py +103 -35
- ml_tools/ML_inference.py +290 -208
- ml_tools/ML_models.py +13 -102
- ml_tools/ML_models_advanced.py +1 -1
- ml_tools/ML_optimization.py +12 -12
- ml_tools/ML_scaler.py +11 -11
- ml_tools/ML_sequence_datasetmaster.py +341 -0
- ml_tools/ML_sequence_evaluation.py +219 -0
- ml_tools/ML_sequence_inference.py +391 -0
- ml_tools/ML_sequence_models.py +139 -0
- ml_tools/ML_trainer.py +1342 -386
- ml_tools/ML_utilities.py +1 -1
- ml_tools/ML_vision_datasetmaster.py +120 -72
- ml_tools/ML_vision_evaluation.py +30 -6
- ml_tools/ML_vision_inference.py +129 -152
- ml_tools/ML_vision_models.py +1 -1
- ml_tools/ML_vision_transformers.py +121 -40
- ml_tools/PSO_optimization.py +6 -6
- ml_tools/SQL.py +4 -4
- ml_tools/{keys.py → _keys.py} +45 -0
- ml_tools/_schema.py +1 -1
- ml_tools/ensemble_evaluation.py +1 -1
- ml_tools/ensemble_inference.py +7 -33
- ml_tools/ensemble_learning.py +1 -1
- ml_tools/optimization_tools.py +2 -2
- ml_tools/path_manager.py +5 -5
- ml_tools/utilities.py +1 -2
- dragon_ml_toolbox-14.7.0.dist-info/RECORD +0 -49
- ml_tools/RNN_forecast.py +0 -56
- ml_tools/_ML_vision_recipe.py +0 -88
- {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/top_level.txt +0 -0
|
@@ -2,10 +2,11 @@ from typing import Union, Dict, Type, Callable, Optional, Any, List, Literal
|
|
|
2
2
|
from PIL import ImageOps, Image
|
|
3
3
|
from torchvision import transforms
|
|
4
4
|
from pathlib import Path
|
|
5
|
+
import json
|
|
5
6
|
|
|
6
7
|
from ._logger import _LOGGER
|
|
7
8
|
from ._script_info import _script_info
|
|
8
|
-
from .
|
|
9
|
+
from ._keys import VisionTransformRecipeKeys
|
|
9
10
|
from .path_manager import make_fullpath
|
|
10
11
|
|
|
11
12
|
|
|
@@ -52,49 +53,12 @@ class ResizeAspectFill:
|
|
|
52
53
|
return ImageOps.expand(image, padding, fill=self.pad_color)
|
|
53
54
|
|
|
54
55
|
|
|
56
|
+
#############################################################
|
|
55
57
|
#NOTE: Add custom transforms.
|
|
56
58
|
TRANSFORM_REGISTRY: Dict[str, Type[Callable]] = {
|
|
57
59
|
"ResizeAspectFill": ResizeAspectFill,
|
|
58
60
|
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def _build_transform_from_recipe(recipe: Dict[str, Any]) -> transforms.Compose:
|
|
62
|
-
"""Internal helper to build a transform pipeline from a recipe dict."""
|
|
63
|
-
pipeline_steps: List[Callable] = []
|
|
64
|
-
|
|
65
|
-
if VisionTransformRecipeKeys.PIPELINE not in recipe:
|
|
66
|
-
_LOGGER.error("Recipe dict is invalid: missing 'pipeline' key.")
|
|
67
|
-
raise ValueError("Invalid recipe format.")
|
|
68
|
-
|
|
69
|
-
for step in recipe[VisionTransformRecipeKeys.PIPELINE]:
|
|
70
|
-
t_name = step.get(VisionTransformRecipeKeys.NAME)
|
|
71
|
-
t_kwargs = step.get(VisionTransformRecipeKeys.KWARGS, {})
|
|
72
|
-
|
|
73
|
-
if not t_name:
|
|
74
|
-
_LOGGER.error(f"Invalid transform step, missing 'name': {step}")
|
|
75
|
-
continue
|
|
76
|
-
|
|
77
|
-
transform_class: Any = None
|
|
78
|
-
|
|
79
|
-
# 1. Check standard torchvision transforms
|
|
80
|
-
if hasattr(transforms, t_name):
|
|
81
|
-
transform_class = getattr(transforms, t_name)
|
|
82
|
-
# 2. Check custom transforms
|
|
83
|
-
elif t_name in TRANSFORM_REGISTRY:
|
|
84
|
-
transform_class = TRANSFORM_REGISTRY[t_name]
|
|
85
|
-
# 3. Not found
|
|
86
|
-
else:
|
|
87
|
-
_LOGGER.error(f"Unknown transform '{t_name}' in recipe. Not found in torchvision.transforms or TRANSFORM_REGISTRY.")
|
|
88
|
-
raise ValueError(f"Unknown transform name: {t_name}")
|
|
89
|
-
|
|
90
|
-
# Instantiate the transform
|
|
91
|
-
try:
|
|
92
|
-
pipeline_steps.append(transform_class(**t_kwargs))
|
|
93
|
-
except Exception as e:
|
|
94
|
-
_LOGGER.error(f"Failed to instantiate transform '{t_name}' with kwargs {t_kwargs}: {e}")
|
|
95
|
-
raise
|
|
96
|
-
|
|
97
|
-
return transforms.Compose(pipeline_steps)
|
|
61
|
+
#############################################################
|
|
98
62
|
|
|
99
63
|
|
|
100
64
|
def create_offline_augmentations(
|
|
@@ -199,5 +163,122 @@ def create_offline_augmentations(
|
|
|
199
163
|
_LOGGER.info(f"Offline augmentation complete. Saved {total_saved} new images.")
|
|
200
164
|
|
|
201
165
|
|
|
166
|
+
def _build_transform_from_recipe(recipe: Dict[str, Any]) -> transforms.Compose:
|
|
167
|
+
"""Internal helper to build a transform pipeline from a recipe dict."""
|
|
168
|
+
pipeline_steps: List[Callable] = []
|
|
169
|
+
|
|
170
|
+
if VisionTransformRecipeKeys.PIPELINE not in recipe:
|
|
171
|
+
_LOGGER.error("Recipe dict is invalid: missing 'pipeline' key.")
|
|
172
|
+
raise ValueError("Invalid recipe format.")
|
|
173
|
+
|
|
174
|
+
for step in recipe[VisionTransformRecipeKeys.PIPELINE]:
|
|
175
|
+
t_name = step.get(VisionTransformRecipeKeys.NAME)
|
|
176
|
+
t_kwargs = step.get(VisionTransformRecipeKeys.KWARGS, {})
|
|
177
|
+
|
|
178
|
+
if not t_name:
|
|
179
|
+
_LOGGER.error(f"Invalid transform step, missing 'name': {step}")
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
transform_class: Any = None
|
|
183
|
+
|
|
184
|
+
# 1. Check standard torchvision transforms
|
|
185
|
+
if hasattr(transforms, t_name):
|
|
186
|
+
transform_class = getattr(transforms, t_name)
|
|
187
|
+
# 2. Check custom transforms
|
|
188
|
+
elif t_name in TRANSFORM_REGISTRY:
|
|
189
|
+
transform_class = TRANSFORM_REGISTRY[t_name]
|
|
190
|
+
# 3. Not found
|
|
191
|
+
else:
|
|
192
|
+
_LOGGER.error(f"Unknown transform '{t_name}' in recipe. Not found in torchvision.transforms or TRANSFORM_REGISTRY.")
|
|
193
|
+
raise ValueError(f"Unknown transform name: {t_name}")
|
|
194
|
+
|
|
195
|
+
# Instantiate the transform
|
|
196
|
+
try:
|
|
197
|
+
pipeline_steps.append(transform_class(**t_kwargs))
|
|
198
|
+
except Exception as e:
|
|
199
|
+
_LOGGER.error(f"Failed to instantiate transform '{t_name}' with kwargs {t_kwargs}: {e}")
|
|
200
|
+
raise
|
|
201
|
+
|
|
202
|
+
return transforms.Compose(pipeline_steps)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _save_recipe(recipe: Dict[str, Any], filepath: Path) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Saves a transform recipe dictionary to a JSON file.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
recipe (Dict[str, Any]): The recipe dictionary to save.
|
|
211
|
+
filepath (str): The path to the output .json file.
|
|
212
|
+
"""
|
|
213
|
+
final_filepath = filepath.with_suffix(".json")
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
with open(final_filepath, 'w') as f:
|
|
217
|
+
json.dump(recipe, f, indent=4)
|
|
218
|
+
_LOGGER.info(f"Transform recipe saved as '{final_filepath.name}'.")
|
|
219
|
+
except Exception as e:
|
|
220
|
+
_LOGGER.error(f"Failed to save recipe to '{final_filepath}': {e}")
|
|
221
|
+
raise
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _load_recipe_and_build_transform(filepath: Union[str,Path]) -> transforms.Compose:
|
|
225
|
+
"""
|
|
226
|
+
Loads a transform recipe from a .json file and reconstructs the
|
|
227
|
+
torchvision.transforms.Compose pipeline.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
filepath (str): Path to the saved transform recipe .json file.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
transforms.Compose: The reconstructed transformation pipeline.
|
|
234
|
+
|
|
235
|
+
Raises:
|
|
236
|
+
ValueError: If a transform name in the recipe is not found in
|
|
237
|
+
torchvision.transforms or the custom TRANSFORM_REGISTRY.
|
|
238
|
+
"""
|
|
239
|
+
# validate filepath
|
|
240
|
+
final_filepath = make_fullpath(filepath, enforce="file")
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
with open(final_filepath, 'r') as f:
|
|
244
|
+
recipe = json.load(f)
|
|
245
|
+
except Exception as e:
|
|
246
|
+
_LOGGER.error(f"Failed to load recipe from '{final_filepath}': {e}")
|
|
247
|
+
raise
|
|
248
|
+
|
|
249
|
+
pipeline_steps: List[Callable] = []
|
|
250
|
+
|
|
251
|
+
if VisionTransformRecipeKeys.PIPELINE not in recipe:
|
|
252
|
+
_LOGGER.error("Recipe file is invalid: missing 'pipeline' key.")
|
|
253
|
+
raise ValueError("Invalid recipe format.")
|
|
254
|
+
|
|
255
|
+
for step in recipe[VisionTransformRecipeKeys.PIPELINE]:
|
|
256
|
+
t_name = step[VisionTransformRecipeKeys.NAME]
|
|
257
|
+
t_kwargs = step[VisionTransformRecipeKeys.KWARGS]
|
|
258
|
+
|
|
259
|
+
transform_class: Any = None
|
|
260
|
+
|
|
261
|
+
# 1. Check standard torchvision transforms
|
|
262
|
+
if hasattr(transforms, t_name):
|
|
263
|
+
transform_class = getattr(transforms, t_name)
|
|
264
|
+
# 2. Check custom transforms
|
|
265
|
+
elif t_name in TRANSFORM_REGISTRY:
|
|
266
|
+
transform_class = TRANSFORM_REGISTRY[t_name]
|
|
267
|
+
# 3. Not found
|
|
268
|
+
else:
|
|
269
|
+
_LOGGER.error(f"Unknown transform '{t_name}' in recipe. Not found in torchvision.transforms or TRANSFORM_REGISTRY.")
|
|
270
|
+
raise ValueError(f"Unknown transform name: {t_name}")
|
|
271
|
+
|
|
272
|
+
# Instantiate the transform
|
|
273
|
+
try:
|
|
274
|
+
pipeline_steps.append(transform_class(**t_kwargs))
|
|
275
|
+
except Exception as e:
|
|
276
|
+
_LOGGER.error(f"Failed to instantiate transform '{t_name}' with kwargs {t_kwargs}: {e}")
|
|
277
|
+
raise
|
|
278
|
+
|
|
279
|
+
_LOGGER.info(f"Successfully loaded and built transform pipeline from '{final_filepath.name}'.")
|
|
280
|
+
return transforms.Compose(pipeline_steps)
|
|
281
|
+
|
|
282
|
+
|
|
202
283
|
def info():
|
|
203
284
|
_script_info(__all__)
|
ml_tools/PSO_optimization.py
CHANGED
|
@@ -12,9 +12,9 @@ from .serde import deserialize_object
|
|
|
12
12
|
from .math_utilities import threshold_binary_values, threshold_binary_values_batch
|
|
13
13
|
from .path_manager import sanitize_filename, make_fullpath, list_files_by_extension
|
|
14
14
|
from ._logger import _LOGGER
|
|
15
|
-
from .
|
|
15
|
+
from ._keys import EnsembleKeys
|
|
16
16
|
from ._script_info import _script_info
|
|
17
|
-
from .SQL import
|
|
17
|
+
from .SQL import DragonSQL
|
|
18
18
|
from .optimization_tools import _save_result
|
|
19
19
|
|
|
20
20
|
"""
|
|
@@ -191,7 +191,7 @@ def _set_feature_names(size: int, names: Union[list[str], None]):
|
|
|
191
191
|
return names
|
|
192
192
|
|
|
193
193
|
|
|
194
|
-
def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, random_state: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[
|
|
194
|
+
def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, random_state: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DragonSQL], db_table_name: str):
|
|
195
195
|
"""Helper for a single PSO run that also handles saving."""
|
|
196
196
|
pso_args.update({"seed": random_state})
|
|
197
197
|
|
|
@@ -213,7 +213,7 @@ def _run_single_pso(objective_function: ObjectiveFunction, pso_args: dict, featu
|
|
|
213
213
|
return best_features_named, best_target_named
|
|
214
214
|
|
|
215
215
|
|
|
216
|
-
def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, repetitions: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[
|
|
216
|
+
def _run_post_hoc_pso(objective_function: ObjectiveFunction, pso_args: dict, feature_names: list[str], target_name: str, repetitions: int, save_format: Literal['csv', 'sqlite', 'both'], csv_path: Path, db_manager: Optional[DragonSQL], db_table_name: str):
|
|
217
217
|
"""Helper for post-hoc analysis that saves results incrementally."""
|
|
218
218
|
progress = trange(repetitions, desc="Post-Hoc PSO", unit="run")
|
|
219
219
|
for _ in progress:
|
|
@@ -342,7 +342,7 @@ def run_pso(lower_boundaries: list[float],
|
|
|
342
342
|
schema = {"result_id": "INTEGER PRIMARY KEY AUTOINCREMENT", **schema}
|
|
343
343
|
|
|
344
344
|
# Create table
|
|
345
|
-
with
|
|
345
|
+
with DragonSQL(db_path) as db:
|
|
346
346
|
db.create_table(db_table_name, schema)
|
|
347
347
|
|
|
348
348
|
pso_arguments = {
|
|
@@ -357,7 +357,7 @@ def run_pso(lower_boundaries: list[float],
|
|
|
357
357
|
|
|
358
358
|
# --- Dispatcher ---
|
|
359
359
|
# Use a real or dummy context manager to handle the DB connection cleanly
|
|
360
|
-
db_context =
|
|
360
|
+
db_context = DragonSQL(db_path) if save_format in ['sqlite', 'both'] else nullcontext()
|
|
361
361
|
|
|
362
362
|
with db_context as db_manager:
|
|
363
363
|
if post_hoc_analysis is None or post_hoc_analysis <= 1:
|
ml_tools/SQL.py
CHANGED
|
@@ -9,11 +9,11 @@ from .path_manager import make_fullpath, sanitize_filename
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
12
|
-
"
|
|
12
|
+
"DragonSQL",
|
|
13
13
|
]
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
class
|
|
16
|
+
class DragonSQL:
|
|
17
17
|
"""
|
|
18
18
|
A user-friendly context manager for handling SQLite database operations.
|
|
19
19
|
|
|
@@ -35,7 +35,7 @@ class DatabaseManager:
|
|
|
35
35
|
... "feature_a": "REAL",
|
|
36
36
|
... "score": "REAL"
|
|
37
37
|
... }
|
|
38
|
-
>>> with
|
|
38
|
+
>>> with DragonSQL("my_results.db") as db:
|
|
39
39
|
... db.create_table("experiments", schema)
|
|
40
40
|
... data = {"run_name": "first_run", "feature_a": 0.123, "score": 95.5}
|
|
41
41
|
... db.insert_row("experiments", data)
|
|
@@ -43,7 +43,7 @@ class DatabaseManager:
|
|
|
43
43
|
... print(df)
|
|
44
44
|
"""
|
|
45
45
|
def __init__(self, db_path: Union[str, Path]):
|
|
46
|
-
"""Initializes the
|
|
46
|
+
"""Initializes the DragonSQL with the path to the database file."""
|
|
47
47
|
if isinstance(db_path, str):
|
|
48
48
|
if not db_path.endswith(".db"):
|
|
49
49
|
db_path = db_path + ".db"
|
ml_tools/{keys.py → _keys.py}
RENAMED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
class MagicWords:
|
|
2
|
+
"""General purpose keys"""
|
|
3
|
+
LATEST = "latest"
|
|
4
|
+
CURRENT = "current"
|
|
5
|
+
RENAME = "rename"
|
|
6
|
+
|
|
7
|
+
|
|
1
8
|
class PyTorchLogKeys:
|
|
2
9
|
"""
|
|
3
10
|
Used internally for ML scripts module.
|
|
@@ -7,6 +14,7 @@ class PyTorchLogKeys:
|
|
|
7
14
|
# --- Epoch Level ---
|
|
8
15
|
TRAIN_LOSS = 'train_loss'
|
|
9
16
|
VAL_LOSS = 'val_loss'
|
|
17
|
+
LEARNING_RATE = 'lr'
|
|
10
18
|
|
|
11
19
|
# --- Batch Level ---
|
|
12
20
|
BATCH_LOSS = 'loss'
|
|
@@ -79,6 +87,15 @@ class PyTorchCheckpointKeys:
|
|
|
79
87
|
SCHEDULER_STATE = "scheduler_state_dict"
|
|
80
88
|
EPOCH = "epoch"
|
|
81
89
|
BEST_SCORE = "best_score"
|
|
90
|
+
HISTORY = "history"
|
|
91
|
+
CHECKPOINT_NAME = "PyModelCheckpoint"
|
|
92
|
+
# Finalized config
|
|
93
|
+
CLASSIFICATION_THRESHOLD = "classification_threshold"
|
|
94
|
+
CLASS_MAP = "class_map"
|
|
95
|
+
SEQUENCE_LENGTH = "sequence_length"
|
|
96
|
+
INITIAL_SEQUENCE = "initial_sequence"
|
|
97
|
+
TARGET_NAME = "target_name"
|
|
98
|
+
TARGET_NAMES = "target_names"
|
|
82
99
|
|
|
83
100
|
|
|
84
101
|
class UtilityKeys:
|
|
@@ -119,6 +136,34 @@ class ObjectDetectionKeys:
|
|
|
119
136
|
LABELS = "labels"
|
|
120
137
|
|
|
121
138
|
|
|
139
|
+
class MLTaskKeys:
|
|
140
|
+
"""Used by the Trainer and InferenceHandlers"""
|
|
141
|
+
REGRESSION = "regression"
|
|
142
|
+
MULTITARGET_REGRESSION = "multitarget regression"
|
|
143
|
+
|
|
144
|
+
BINARY_CLASSIFICATION = "binary classification"
|
|
145
|
+
MULTICLASS_CLASSIFICATION = "multiclass classification"
|
|
146
|
+
MULTILABEL_BINARY_CLASSIFICATION = "multilabel binary classification"
|
|
147
|
+
|
|
148
|
+
BINARY_IMAGE_CLASSIFICATION = "binary image classification"
|
|
149
|
+
MULTICLASS_IMAGE_CLASSIFICATION = "multiclass image classification"
|
|
150
|
+
|
|
151
|
+
BINARY_SEGMENTATION = "binary segmentation"
|
|
152
|
+
MULTICLASS_SEGMENTATION = "multiclass segmentation"
|
|
153
|
+
|
|
154
|
+
OBJECT_DETECTION = "object detection"
|
|
155
|
+
|
|
156
|
+
SEQUENCE_SEQUENCE = "sequence-to-sequence"
|
|
157
|
+
SEQUENCE_VALUE = "sequence-to-value"
|
|
158
|
+
|
|
159
|
+
ALL_BINARY_TASKS = [BINARY_CLASSIFICATION, MULTILABEL_BINARY_CLASSIFICATION, BINARY_IMAGE_CLASSIFICATION, BINARY_SEGMENTATION]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class DragonTrainerKeys:
|
|
163
|
+
VALIDATION_METRICS_DIR = "Validation_Metrics"
|
|
164
|
+
TEST_METRICS_DIR = "Test_Metrics"
|
|
165
|
+
|
|
166
|
+
|
|
122
167
|
class _OneHotOtherPlaceholder:
|
|
123
168
|
"""Used internally by GUI_tools."""
|
|
124
169
|
OTHER_GUI = "OTHER"
|
ml_tools/_schema.py
CHANGED
ml_tools/ensemble_evaluation.py
CHANGED
|
@@ -25,7 +25,7 @@ from typing import Union, Optional, Literal
|
|
|
25
25
|
from .path_manager import sanitize_filename, make_fullpath
|
|
26
26
|
from ._script_info import _script_info
|
|
27
27
|
from ._logger import _LOGGER
|
|
28
|
-
from .
|
|
28
|
+
from ._keys import SHAPKeys
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
__all__ = [
|
ml_tools/ensemble_inference.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import Union, Literal, Dict, Any, Optional, List
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
import json
|
|
4
|
-
import joblib
|
|
5
4
|
import numpy as np
|
|
6
5
|
# Inference models
|
|
7
6
|
import xgboost
|
|
@@ -10,16 +9,17 @@ import lightgbm
|
|
|
10
9
|
from ._script_info import _script_info
|
|
11
10
|
from ._logger import _LOGGER
|
|
12
11
|
from .path_manager import make_fullpath, list_files_by_extension
|
|
13
|
-
from .
|
|
12
|
+
from ._keys import EnsembleKeys
|
|
13
|
+
from .serde import deserialize_object
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
__all__ = [
|
|
17
|
-
"
|
|
17
|
+
"DragonEnsembleInferenceHandler",
|
|
18
18
|
"model_report"
|
|
19
19
|
]
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class
|
|
22
|
+
class DragonEnsembleInferenceHandler:
|
|
23
23
|
"""
|
|
24
24
|
Handles loading ensemble models and performing inference for either regression or classification tasks.
|
|
25
25
|
"""
|
|
@@ -44,9 +44,9 @@ class InferenceHandler:
|
|
|
44
44
|
for fname, fpath in model_files.items():
|
|
45
45
|
try:
|
|
46
46
|
full_object: dict
|
|
47
|
-
full_object =
|
|
47
|
+
full_object = deserialize_object(filepath=fpath,
|
|
48
48
|
verbose=self.verbose,
|
|
49
|
-
|
|
49
|
+
expected_type=dict)
|
|
50
50
|
|
|
51
51
|
model: Any = full_object[EnsembleKeys.MODEL]
|
|
52
52
|
target_name: str = full_object[EnsembleKeys.TARGET]
|
|
@@ -170,7 +170,7 @@ def model_report(
|
|
|
170
170
|
|
|
171
171
|
# --- 2. Deserialize and Extract Info ---
|
|
172
172
|
try:
|
|
173
|
-
full_object: dict =
|
|
173
|
+
full_object: dict = deserialize_object(model_p, expected_type=dict, verbose=verbose) # type: ignore
|
|
174
174
|
model = full_object[EnsembleKeys.MODEL]
|
|
175
175
|
target = full_object[EnsembleKeys.TARGET]
|
|
176
176
|
features = full_object[EnsembleKeys.FEATURES]
|
|
@@ -218,31 +218,5 @@ def model_report(
|
|
|
218
218
|
return report_data
|
|
219
219
|
|
|
220
220
|
|
|
221
|
-
# Local implementation to avoid calling utilities dependencies
|
|
222
|
-
def _deserialize_object(filepath: Union[str,Path], verbose: bool=True, raise_on_error: bool=True) -> Optional[Any]:
|
|
223
|
-
"""
|
|
224
|
-
Loads a serialized object from a .joblib file.
|
|
225
|
-
|
|
226
|
-
Parameters:
|
|
227
|
-
filepath (str | Path): Full path to the serialized .joblib file.
|
|
228
|
-
|
|
229
|
-
Returns:
|
|
230
|
-
(Any | None): The deserialized Python object, or None if loading fails.
|
|
231
|
-
"""
|
|
232
|
-
true_filepath = make_fullpath(filepath)
|
|
233
|
-
|
|
234
|
-
try:
|
|
235
|
-
obj = joblib.load(true_filepath)
|
|
236
|
-
except (IOError, OSError, EOFError, TypeError, ValueError) as e:
|
|
237
|
-
_LOGGER.error(f"Failed to deserialize object from '{true_filepath}'.")
|
|
238
|
-
if raise_on_error:
|
|
239
|
-
raise e
|
|
240
|
-
return None
|
|
241
|
-
else:
|
|
242
|
-
if verbose:
|
|
243
|
-
_LOGGER.info(f"Loaded object of type '{type(obj)}'")
|
|
244
|
-
return obj
|
|
245
|
-
|
|
246
|
-
|
|
247
221
|
def info():
|
|
248
222
|
_script_info(__all__)
|
ml_tools/ensemble_learning.py
CHANGED
|
@@ -17,7 +17,7 @@ from .utilities import yield_dataframes_from_dir, train_dataset_yielder
|
|
|
17
17
|
from .serde import serialize_object_filename
|
|
18
18
|
from .path_manager import sanitize_filename, make_fullpath
|
|
19
19
|
from ._script_info import _script_info
|
|
20
|
-
from .
|
|
20
|
+
from ._keys import EnsembleKeys
|
|
21
21
|
from ._logger import _LOGGER
|
|
22
22
|
from .ensemble_evaluation import (evaluate_model_classification,
|
|
23
23
|
plot_roc_curve,
|
ml_tools/optimization_tools.py
CHANGED
|
@@ -8,7 +8,7 @@ from .path_manager import make_fullpath, list_csv_paths, sanitize_filename
|
|
|
8
8
|
from .utilities import yield_dataframes_from_dir
|
|
9
9
|
from ._logger import _LOGGER
|
|
10
10
|
from ._script_info import _script_info
|
|
11
|
-
from .SQL import
|
|
11
|
+
from .SQL import DragonSQL
|
|
12
12
|
from ._schema import FeatureSchema
|
|
13
13
|
|
|
14
14
|
|
|
@@ -262,7 +262,7 @@ def _save_result(
|
|
|
262
262
|
result_dict: dict,
|
|
263
263
|
save_format: Literal['csv', 'sqlite', 'both'],
|
|
264
264
|
csv_path: Path,
|
|
265
|
-
db_manager: Optional[
|
|
265
|
+
db_manager: Optional[DragonSQL] = None,
|
|
266
266
|
db_table_name: Optional[str] = None,
|
|
267
267
|
categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None
|
|
268
268
|
):
|
ml_tools/path_manager.py
CHANGED
|
@@ -9,7 +9,7 @@ from ._logger import _LOGGER
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
12
|
-
"
|
|
12
|
+
"DragonPathManager",
|
|
13
13
|
"make_fullpath",
|
|
14
14
|
"sanitize_filename",
|
|
15
15
|
"list_csv_paths",
|
|
@@ -18,7 +18,7 @@ __all__ = [
|
|
|
18
18
|
]
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class
|
|
21
|
+
class DragonPathManager:
|
|
22
22
|
"""
|
|
23
23
|
Manages and stores a project's file paths, acting as a centralized
|
|
24
24
|
"path database". It supports both development mode and applications
|
|
@@ -43,7 +43,7 @@ class PathManager:
|
|
|
43
43
|
|
|
44
44
|
Args:
|
|
45
45
|
anchor_file (str): The path to a file within your package, typically
|
|
46
|
-
the `__file__` of the script where
|
|
46
|
+
the `__file__` of the script where DragonPathManager
|
|
47
47
|
is instantiated. This is used to locate the
|
|
48
48
|
package root directory.
|
|
49
49
|
base_directories (List[str] | None): An optional list of strings,
|
|
@@ -149,7 +149,7 @@ class PathManager:
|
|
|
149
149
|
if key in self._paths:
|
|
150
150
|
path_items.append((key, self._paths[key]))
|
|
151
151
|
elif verbose:
|
|
152
|
-
_LOGGER.warning(f"Key '{key}' not found in
|
|
152
|
+
_LOGGER.warning(f"Key '{key}' not found in DragonPathManager, skipping.")
|
|
153
153
|
else:
|
|
154
154
|
path_items = self._paths.items()
|
|
155
155
|
|
|
@@ -194,7 +194,7 @@ class PathManager:
|
|
|
194
194
|
def __repr__(self) -> str:
|
|
195
195
|
"""Provides a string representation of the stored paths."""
|
|
196
196
|
path_list = "\n".join(f" '{k}': '{v}'" for k, v in self._paths.items())
|
|
197
|
-
return f"
|
|
197
|
+
return f"DragonPathManager(\n{path_list}\n)"
|
|
198
198
|
|
|
199
199
|
# --- Dictionary-Style Methods ---
|
|
200
200
|
def __getitem__(self, key: str) -> Path:
|
ml_tools/utilities.py
CHANGED
|
@@ -10,7 +10,6 @@ from ._logger import _LOGGER
|
|
|
10
10
|
from ._schema import FeatureSchema
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
# Keep track of available tools
|
|
14
13
|
__all__ = [
|
|
15
14
|
"load_dataframe",
|
|
16
15
|
"load_dataframe_greedy",
|
|
@@ -361,7 +360,7 @@ def save_dataframe_filename(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Uni
|
|
|
361
360
|
elif isinstance(df, pl.DataFrame):
|
|
362
361
|
# Transform empty strings to Null
|
|
363
362
|
df_to_save = df.with_columns(
|
|
364
|
-
pl.when(pl.col(pl.Utf8).str.strip() == "")
|
|
363
|
+
pl.when(pl.col(pl.Utf8).str.strip() == "") # type: ignore
|
|
365
364
|
.then(None)
|
|
366
365
|
.otherwise(pl.col(pl.Utf8))
|
|
367
366
|
)
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
dragon_ml_toolbox-14.7.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
|
|
2
|
-
dragon_ml_toolbox-14.7.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=gkOdNDbKYpIJezwSo2CEnISkLeYfYHv9t8b5K2-P69A,2687
|
|
3
|
-
ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
|
|
4
|
-
ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
|
|
5
|
-
ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
|
|
6
|
-
ml_tools/MICE_imputation.py,sha256=KLJXGQLKJ6AuWWttAG-LCCaxpS-ygM4dXPiguHDaL6Y,20815
|
|
7
|
-
ml_tools/ML_callbacks.py,sha256=elD2Yr030sv_6gX_m9GVd6HTyrbmt34nFS8lrgS4HtM,15808
|
|
8
|
-
ml_tools/ML_configuration.py,sha256=DaYmm7Yklcu1emLyo-pRQG74SK4YEkCYFRT6_aV3rqA,4417
|
|
9
|
-
ml_tools/ML_datasetmaster.py,sha256=Zi5jBnBI_U6tD8mpCVL5bQcsqsGEMAzMsCVI_wFD2QU,30175
|
|
10
|
-
ml_tools/ML_evaluation.py,sha256=EvlgFeMQeZ1RSEMtNd-nv7W0d0SVcR4n6cwW5UG16DU,25358
|
|
11
|
-
ml_tools/ML_evaluation_multi.py,sha256=bQZ2gJY-dBzKQxvtd-B6wVaGBdFpQGVBr7tQZFokp5E,17166
|
|
12
|
-
ml_tools/ML_inference.py,sha256=YJ953bhNWsdlPRtJQh3h2ACfMIgp8dQ9KtL9Azar-5s,23489
|
|
13
|
-
ml_tools/ML_models.py,sha256=PqOcNlws7vCJMbiVCKqlPuktxvskZVUHG3VfU-Yshf8,31415
|
|
14
|
-
ml_tools/ML_models_advanced.py,sha256=vk3PZBSu3DVso2S1rKTxxdS43XG8Q5FnasIL3-rMajc,12410
|
|
15
|
-
ml_tools/ML_optimization.py,sha256=P0zkhKAwTpkorIBtR0AOIDcyexo5ngmvFUzo3DfNO-E,22692
|
|
16
|
-
ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
|
|
17
|
-
ml_tools/ML_trainer.py,sha256=salZxfv3RWRCiinp5S9xeUsHysMbMQ52EecR8GyEbaM,51461
|
|
18
|
-
ml_tools/ML_utilities.py,sha256=eYe2N-65FTzaOHF5gmiJl-HmicyzhqcdvlDiIivr5_g,22993
|
|
19
|
-
ml_tools/ML_vision_datasetmaster.py,sha256=bmHDC6SsBUxDSFjqQGuyzGfKuf1Imi1Ng6O2-dYF7I4,62607
|
|
20
|
-
ml_tools/ML_vision_evaluation.py,sha256=t12R7i1RkOCt9zu1_lxSBr8OH6A6Get0k8ftDLctn6I,10486
|
|
21
|
-
ml_tools/ML_vision_inference.py,sha256=He3KV3VJAm8PwO-fOq4b9VO8UXFr-GmpuCnoHXf4VZI,20588
|
|
22
|
-
ml_tools/ML_vision_models.py,sha256=WqiRN9JAjv--BcwkDrooXAs4Qo26JHPCHh3JSPm4kMI,26226
|
|
23
|
-
ml_tools/ML_vision_transformers.py,sha256=h332O9BjDMgxrBc0I-bJwJODWlcp7nJHbX1QS2etwBk,7738
|
|
24
|
-
ml_tools/PSO_optimization.py,sha256=T-HWHMRJUnPvPwixdU5jif3_rnnI36TzcL8u3oSCwuA,22960
|
|
25
|
-
ml_tools/RNN_forecast.py,sha256=Qa2KoZfdAvSjZ4yE78N4BFXtr3tTr0Gx7tQJZPotsh0,1967
|
|
26
|
-
ml_tools/SQL.py,sha256=vXLPGfVVg8bfkbBE3HVfyEclVbdJy0TBhuQONtMwSCQ,11234
|
|
27
|
-
ml_tools/VIF_factor.py,sha256=at5IVqPvicja2-DNSTSIIy3SkzDWCmLzo3qTG_qr5n8,10422
|
|
28
|
-
ml_tools/_ML_vision_recipe.py,sha256=zrgxFUvTJqQVuwR7jWlbIC2FD29u6eNFPkTRoJ7yEZI,3178
|
|
29
|
-
ml_tools/__init__.py,sha256=kJiankjz9_qXu7gU92mYqYg_anLvt-B6RtW0mMH8uGo,76
|
|
30
|
-
ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
|
|
31
|
-
ml_tools/_schema.py,sha256=yu6aWmn_2Z4_AxAtJGDDCIa96y6JcUp-vgnCS013Qmw,3908
|
|
32
|
-
ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
|
|
33
|
-
ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
|
|
34
|
-
ml_tools/custom_logger.py,sha256=TGc0Ww2Xlqj2XE3q4bP43hV7T3qnb5ci9f0pYHXF5TY,11226
|
|
35
|
-
ml_tools/data_exploration.py,sha256=bwHzFJ-IAo5GN3T53F-1J_pXUg8VHS91sG_90utAsfg,69911
|
|
36
|
-
ml_tools/ensemble_evaluation.py,sha256=2sJ3jD6yBNPRNwSokyaLKqKHi0QhF13ChoFe5yd4zwg,28368
|
|
37
|
-
ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
|
|
38
|
-
ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
|
|
39
|
-
ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
|
|
40
|
-
ml_tools/keys.py,sha256=-OiL9G0RIOKQk6BwETKIP3LWz2s5-x6lZW2YitJa4mY,3330
|
|
41
|
-
ml_tools/math_utilities.py,sha256=xeKq1quR_3DYLgowcp4Uam_4s3JltUyOnqMOGuAiYWU,8802
|
|
42
|
-
ml_tools/optimization_tools.py,sha256=TYFQ2nSnp7xxs-VyoZISWgnGJghFbsWasHjruegyJRs,12763
|
|
43
|
-
ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
|
|
44
|
-
ml_tools/serde.py,sha256=c8uDYjYry_VrLvoG4ixqDj5pij88lVn6Tu4NHcPkwDU,6943
|
|
45
|
-
ml_tools/utilities.py,sha256=aWqvYzmxlD74PD5Yqu1VuTekDJeYLQrmPIU_VeVyRp0,22526
|
|
46
|
-
dragon_ml_toolbox-14.7.0.dist-info/METADATA,sha256=NTifVXiC2zr5RhzCUTuUMEcU-wfswXxoYOO6N3UXFmM,6492
|
|
47
|
-
dragon_ml_toolbox-14.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
48
|
-
dragon_ml_toolbox-14.7.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
49
|
-
dragon_ml_toolbox-14.7.0.dist-info/RECORD,,
|
ml_tools/RNN_forecast.py
DELETED
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import torch
|
|
2
|
-
from torch import nn
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
from ._script_info import _script_info
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"rnn_forecast"
|
|
10
|
-
]
|
|
11
|
-
|
|
12
|
-
def rnn_forecast(model: nn.Module, start_sequence: torch.Tensor, steps: int, device: str = 'cpu'):
|
|
13
|
-
"""
|
|
14
|
-
Runs a sequential forecast for a trained RNN-based model.
|
|
15
|
-
|
|
16
|
-
This function iteratively predicts future time steps, where each new prediction
|
|
17
|
-
is generated by feeding the previous prediction back into the model.
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
model (nn.Module): The trained PyTorch RNN model (e.g., LSTM, GRU).
|
|
21
|
-
start_sequence (torch.Tensor): The initial sequence to start the forecast from.
|
|
22
|
-
Shape should be (sequence_length, num_features).
|
|
23
|
-
steps (int): The number of future time steps to predict.
|
|
24
|
-
device (str, optional): The device to run the forecast on ('cpu', 'cuda', 'mps').
|
|
25
|
-
Defaults to 'cpu'.
|
|
26
|
-
|
|
27
|
-
Returns:
|
|
28
|
-
np.ndarray: A numpy array containing the forecasted values.
|
|
29
|
-
"""
|
|
30
|
-
model.eval()
|
|
31
|
-
model.to(device)
|
|
32
|
-
|
|
33
|
-
predictions = []
|
|
34
|
-
current_sequence = start_sequence.to(device)
|
|
35
|
-
|
|
36
|
-
with torch.no_grad():
|
|
37
|
-
for _ in range(steps):
|
|
38
|
-
# Get the model's prediction for the current sequence
|
|
39
|
-
output = model(current_sequence.unsqueeze(0)) # Add batch dimension
|
|
40
|
-
|
|
41
|
-
# The prediction is the last element of the output sequence
|
|
42
|
-
next_pred = output[0, -1, :].view(1, -1)
|
|
43
|
-
|
|
44
|
-
# Store the prediction
|
|
45
|
-
predictions.append(next_pred.cpu().numpy())
|
|
46
|
-
|
|
47
|
-
# Update the sequence for the next iteration:
|
|
48
|
-
# Drop the first element and append the new prediction
|
|
49
|
-
current_sequence = torch.cat([current_sequence[1:], next_pred], dim=0)
|
|
50
|
-
|
|
51
|
-
# Concatenate all predictions and flatten the array for easy use
|
|
52
|
-
return np.concatenate(predictions).flatten()
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def info():
|
|
56
|
-
_script_info(__all__)
|