dragon-ml-toolbox 19.13.0__py3-none-any.whl → 20.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
- dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
- ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
- ml_tools/ETL_cleaning/_basic_clean.py +351 -0
- ml_tools/ETL_cleaning/_clean_tools.py +128 -0
- ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
- ml_tools/ETL_cleaning/_imprimir.py +13 -0
- ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
- ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
- ml_tools/ETL_engineering/_imprimir.py +24 -0
- ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
- ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
- ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
- ml_tools/GUI_tools/_imprimir.py +12 -0
- ml_tools/IO_tools/_IO_loggers.py +235 -0
- ml_tools/IO_tools/_IO_save_load.py +151 -0
- ml_tools/IO_tools/_IO_utils.py +140 -0
- ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
- ml_tools/IO_tools/_imprimir.py +14 -0
- ml_tools/MICE/_MICE_imputation.py +132 -0
- ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
- ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
- ml_tools/MICE/_imprimir.py +11 -0
- ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
- ml_tools/ML_callbacks/_base.py +101 -0
- ml_tools/ML_callbacks/_checkpoint.py +232 -0
- ml_tools/ML_callbacks/_early_stop.py +208 -0
- ml_tools/ML_callbacks/_imprimir.py +12 -0
- ml_tools/ML_callbacks/_scheduler.py +197 -0
- ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
- ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
- ml_tools/ML_chain/_dragon_chain.py +140 -0
- ml_tools/ML_chain/_imprimir.py +11 -0
- ml_tools/ML_configuration/__init__.py +90 -0
- ml_tools/ML_configuration/_base_model_config.py +69 -0
- ml_tools/ML_configuration/_finalize.py +366 -0
- ml_tools/ML_configuration/_imprimir.py +47 -0
- ml_tools/ML_configuration/_metrics.py +593 -0
- ml_tools/ML_configuration/_models.py +206 -0
- ml_tools/ML_configuration/_training.py +124 -0
- ml_tools/ML_datasetmaster/__init__.py +28 -0
- ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
- ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
- ml_tools/ML_datasetmaster/_imprimir.py +15 -0
- ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
- ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
- ml_tools/ML_evaluation/__init__.py +53 -0
- ml_tools/ML_evaluation/_classification.py +629 -0
- ml_tools/ML_evaluation/_feature_importance.py +409 -0
- ml_tools/ML_evaluation/_imprimir.py +25 -0
- ml_tools/ML_evaluation/_loss.py +92 -0
- ml_tools/ML_evaluation/_regression.py +273 -0
- ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
- ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
- ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
- ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
- ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
- ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
- ml_tools/ML_finalize_handler/__init__.py +10 -0
- ml_tools/ML_finalize_handler/_imprimir.py +8 -0
- ml_tools/ML_inference/__init__.py +22 -0
- ml_tools/ML_inference/_base_inference.py +166 -0
- ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
- ml_tools/ML_inference/_dragon_inference.py +332 -0
- ml_tools/ML_inference/_imprimir.py +11 -0
- ml_tools/ML_inference/_multi_inference.py +180 -0
- ml_tools/ML_inference_sequence/__init__.py +10 -0
- ml_tools/ML_inference_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
- ml_tools/ML_inference_vision/__init__.py +10 -0
- ml_tools/ML_inference_vision/_imprimir.py +8 -0
- ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
- ml_tools/ML_models/__init__.py +32 -0
- ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
- ml_tools/ML_models/_base_mlp_attention.py +198 -0
- ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
- ml_tools/ML_models/_dragon_tabular.py +248 -0
- ml_tools/ML_models/_imprimir.py +18 -0
- ml_tools/ML_models/_mlp_attention.py +134 -0
- ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
- ml_tools/ML_models_sequence/__init__.py +10 -0
- ml_tools/ML_models_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
- ml_tools/ML_models_vision/__init__.py +29 -0
- ml_tools/ML_models_vision/_base_wrapper.py +254 -0
- ml_tools/ML_models_vision/_image_classification.py +182 -0
- ml_tools/ML_models_vision/_image_segmentation.py +108 -0
- ml_tools/ML_models_vision/_imprimir.py +16 -0
- ml_tools/ML_models_vision/_object_detection.py +135 -0
- ml_tools/ML_optimization/__init__.py +21 -0
- ml_tools/ML_optimization/_imprimir.py +13 -0
- ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
- ml_tools/ML_optimization/_single_dragon.py +203 -0
- ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
- ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
- ml_tools/ML_scaler/__init__.py +10 -0
- ml_tools/ML_scaler/_imprimir.py +8 -0
- ml_tools/ML_trainer/__init__.py +20 -0
- ml_tools/ML_trainer/_base_trainer.py +297 -0
- ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
- ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
- ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
- ml_tools/ML_trainer/_imprimir.py +10 -0
- ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
- ml_tools/ML_utilities/_artifact_finder.py +382 -0
- ml_tools/ML_utilities/_imprimir.py +16 -0
- ml_tools/ML_utilities/_inspection.py +325 -0
- ml_tools/ML_utilities/_train_tools.py +205 -0
- ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
- ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
- ml_tools/ML_vision_transformers/_imprimir.py +14 -0
- ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
- ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
- ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
- ml_tools/PSO_optimization/_imprimir.py +10 -0
- ml_tools/SQL/__init__.py +7 -0
- ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
- ml_tools/SQL/_imprimir.py +8 -0
- ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
- ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
- ml_tools/VIF/_imprimir.py +10 -0
- ml_tools/_core/__init__.py +7 -1
- ml_tools/_core/_logger.py +8 -18
- ml_tools/_core/_schema_load_ops.py +43 -0
- ml_tools/_core/_script_info.py +2 -2
- ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
- ml_tools/data_exploration/_analysis.py +214 -0
- ml_tools/data_exploration/_cleaning.py +566 -0
- ml_tools/data_exploration/_features.py +583 -0
- ml_tools/data_exploration/_imprimir.py +32 -0
- ml_tools/data_exploration/_plotting.py +487 -0
- ml_tools/data_exploration/_schema_ops.py +176 -0
- ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
- ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
- ml_tools/ensemble_evaluation/_imprimir.py +14 -0
- ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
- ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
- ml_tools/ensemble_inference/_imprimir.py +9 -0
- ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
- ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
- ml_tools/ensemble_learning/_imprimir.py +10 -0
- ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
- ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
- ml_tools/excel_handler/_imprimir.py +13 -0
- ml_tools/{keys.py → keys/__init__.py} +4 -1
- ml_tools/keys/_imprimir.py +11 -0
- ml_tools/{_core → keys}/_keys.py +2 -0
- ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
- ml_tools/math_utilities/_imprimir.py +11 -0
- ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
- ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
- ml_tools/optimization_tools/_imprimir.py +13 -0
- ml_tools/optimization_tools/_optimization_bounds.py +236 -0
- ml_tools/optimization_tools/_optimization_plots.py +218 -0
- ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
- ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
- ml_tools/path_manager/_imprimir.py +15 -0
- ml_tools/path_manager/_path_tools.py +346 -0
- ml_tools/plot_fonts/__init__.py +8 -0
- ml_tools/plot_fonts/_imprimir.py +8 -0
- ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
- ml_tools/schema/__init__.py +15 -0
- ml_tools/schema/_feature_schema.py +223 -0
- ml_tools/schema/_gui_schema.py +191 -0
- ml_tools/schema/_imprimir.py +10 -0
- ml_tools/{serde.py → serde/__init__.py} +4 -2
- ml_tools/serde/_imprimir.py +10 -0
- ml_tools/{_core → serde}/_serde.py +3 -8
- ml_tools/{utilities.py → utilities/__init__.py} +11 -6
- ml_tools/utilities/_imprimir.py +18 -0
- ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
- ml_tools/utilities/_utility_tools.py +192 -0
- dragon_ml_toolbox-19.13.0.dist-info/RECORD +0 -111
- ml_tools/ML_chaining_inference.py +0 -8
- ml_tools/ML_configuration.py +0 -86
- ml_tools/ML_configuration_pytab.py +0 -14
- ml_tools/ML_datasetmaster.py +0 -10
- ml_tools/ML_evaluation.py +0 -16
- ml_tools/ML_evaluation_multi.py +0 -12
- ml_tools/ML_finalize_handler.py +0 -8
- ml_tools/ML_inference.py +0 -12
- ml_tools/ML_models.py +0 -14
- ml_tools/ML_models_advanced.py +0 -14
- ml_tools/ML_models_pytab.py +0 -14
- ml_tools/ML_optimization.py +0 -14
- ml_tools/ML_optimization_pareto.py +0 -8
- ml_tools/ML_scaler.py +0 -8
- ml_tools/ML_sequence_datasetmaster.py +0 -8
- ml_tools/ML_sequence_evaluation.py +0 -10
- ml_tools/ML_sequence_inference.py +0 -8
- ml_tools/ML_sequence_models.py +0 -8
- ml_tools/ML_trainer.py +0 -12
- ml_tools/ML_vision_datasetmaster.py +0 -12
- ml_tools/ML_vision_evaluation.py +0 -10
- ml_tools/ML_vision_inference.py +0 -8
- ml_tools/ML_vision_models.py +0 -18
- ml_tools/SQL.py +0 -8
- ml_tools/_core/_ETL_cleaning.py +0 -694
- ml_tools/_core/_IO_tools.py +0 -498
- ml_tools/_core/_ML_callbacks.py +0 -702
- ml_tools/_core/_ML_configuration.py +0 -1332
- ml_tools/_core/_ML_configuration_pytab.py +0 -102
- ml_tools/_core/_ML_evaluation.py +0 -867
- ml_tools/_core/_ML_evaluation_multi.py +0 -544
- ml_tools/_core/_ML_inference.py +0 -646
- ml_tools/_core/_ML_models.py +0 -668
- ml_tools/_core/_ML_models_pytab.py +0 -693
- ml_tools/_core/_ML_trainer.py +0 -2323
- ml_tools/_core/_ML_utilities.py +0 -886
- ml_tools/_core/_ML_vision_models.py +0 -644
- ml_tools/_core/_data_exploration.py +0 -1901
- ml_tools/_core/_optimization_tools.py +0 -493
- ml_tools/_core/_schema.py +0 -359
- ml_tools/plot_fonts.py +0 -8
- ml_tools/schema.py +0 -12
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
ml_tools/_core/_schema.py
DELETED
|
@@ -1,359 +0,0 @@
|
|
|
1
|
-
from typing import NamedTuple, Tuple, Optional, Dict, Union, Any
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
import json
|
|
4
|
-
|
|
5
|
-
from ._IO_tools import save_list_strings
|
|
6
|
-
from ._keys import DatasetKeys, SchemaKeys
|
|
7
|
-
from ._logger import get_logger
|
|
8
|
-
from ._path_manager import make_fullpath
|
|
9
|
-
from ._script_info import _script_info
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
_LOGGER = get_logger("FeatureSchema")
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
__all__ = [
|
|
16
|
-
"FeatureSchema",
|
|
17
|
-
"create_guischema_template",
|
|
18
|
-
"make_multibinary_groups",
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class FeatureSchema(NamedTuple):
|
|
23
|
-
"""Holds the final, definitive schema for the model pipeline."""
|
|
24
|
-
|
|
25
|
-
# The final, ordered list of all feature names
|
|
26
|
-
feature_names: Tuple[str, ...]
|
|
27
|
-
|
|
28
|
-
# List of all continuous feature names
|
|
29
|
-
continuous_feature_names: Tuple[str, ...]
|
|
30
|
-
|
|
31
|
-
# List of all categorical feature names
|
|
32
|
-
categorical_feature_names: Tuple[str, ...]
|
|
33
|
-
|
|
34
|
-
# Map of {column_index: cardinality} for categorical features
|
|
35
|
-
categorical_index_map: Optional[Dict[int, int]]
|
|
36
|
-
|
|
37
|
-
# Map string-to-int category values (e.g., {'color': {'red': 0, 'blue': 1}})
|
|
38
|
-
categorical_mappings: Optional[Dict[str, Dict[str, int]]]
|
|
39
|
-
|
|
40
|
-
def to_json(self, directory: Union[str, Path], verbose: bool = True) -> None:
|
|
41
|
-
"""
|
|
42
|
-
Saves the schema as 'FeatureSchema.json' to the provided directory.
|
|
43
|
-
|
|
44
|
-
Handles conversion of Tuple->List and IntKeys->StrKeys automatically.
|
|
45
|
-
"""
|
|
46
|
-
# validate path
|
|
47
|
-
dir_path = make_fullpath(directory, enforce="directory")
|
|
48
|
-
file_path = dir_path / SchemaKeys.SCHEMA_FILENAME
|
|
49
|
-
|
|
50
|
-
try:
|
|
51
|
-
# Convert named tuple to dict
|
|
52
|
-
data = self._asdict()
|
|
53
|
-
|
|
54
|
-
# Write to disk
|
|
55
|
-
with open(file_path, 'w', encoding='utf-8') as f:
|
|
56
|
-
json.dump(data, f, indent=4)
|
|
57
|
-
|
|
58
|
-
if verbose:
|
|
59
|
-
_LOGGER.info(f"FeatureSchema saved to '{dir_path.name}/{SchemaKeys.SCHEMA_FILENAME}'")
|
|
60
|
-
|
|
61
|
-
except (IOError, TypeError) as e:
|
|
62
|
-
_LOGGER.error(f"Failed to save FeatureSchema to JSON: {e}")
|
|
63
|
-
raise e
|
|
64
|
-
|
|
65
|
-
@classmethod
|
|
66
|
-
def from_json(cls, directory: Union[str, Path], verbose: bool = True) -> 'FeatureSchema':
|
|
67
|
-
"""
|
|
68
|
-
Loads a 'FeatureSchema.json' from the provided directory.
|
|
69
|
-
|
|
70
|
-
Restores Tuples from Lists and Integer Keys from Strings.
|
|
71
|
-
"""
|
|
72
|
-
# validate directory
|
|
73
|
-
dir_path = make_fullpath(directory, enforce="directory")
|
|
74
|
-
file_path = dir_path / SchemaKeys.SCHEMA_FILENAME
|
|
75
|
-
|
|
76
|
-
if not file_path.exists():
|
|
77
|
-
_LOGGER.error(f"FeatureSchema file not found at '{directory}'")
|
|
78
|
-
raise FileNotFoundError()
|
|
79
|
-
|
|
80
|
-
try:
|
|
81
|
-
with open(file_path, 'r', encoding='utf-8') as f:
|
|
82
|
-
data: Dict[str, Any] = json.load(f)
|
|
83
|
-
|
|
84
|
-
# 1. Restore Tuples (JSON loads them as lists)
|
|
85
|
-
feature_names = tuple(data.get("feature_names", []))
|
|
86
|
-
cont_names = tuple(data.get("continuous_feature_names", []))
|
|
87
|
-
cat_names = tuple(data.get("categorical_feature_names", []))
|
|
88
|
-
|
|
89
|
-
# 2. Restore Integer Keys for categorical_index_map
|
|
90
|
-
raw_map = data.get("categorical_index_map")
|
|
91
|
-
cat_index_map: Optional[Dict[int, int]] = None
|
|
92
|
-
if raw_map is not None:
|
|
93
|
-
cat_index_map = {int(k): v for k, v in raw_map.items()}
|
|
94
|
-
|
|
95
|
-
# 3. Mappings (keys are strings, no conversion needed)
|
|
96
|
-
cat_mappings = data.get("categorical_mappings")
|
|
97
|
-
|
|
98
|
-
schema = cls(
|
|
99
|
-
feature_names=feature_names,
|
|
100
|
-
continuous_feature_names=cont_names,
|
|
101
|
-
categorical_feature_names=cat_names,
|
|
102
|
-
categorical_index_map=cat_index_map,
|
|
103
|
-
categorical_mappings=cat_mappings
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
if verbose:
|
|
107
|
-
_LOGGER.info(f"FeatureSchema loaded from '{dir_path.name}'")
|
|
108
|
-
|
|
109
|
-
return schema
|
|
110
|
-
|
|
111
|
-
except (IOError, ValueError, KeyError) as e:
|
|
112
|
-
_LOGGER.error(f"Failed to load FeatureSchema from '{dir_path}': {e}")
|
|
113
|
-
raise e
|
|
114
|
-
|
|
115
|
-
def _save_helper(self, artifact: Tuple[str, ...], directory: Union[str,Path], filename: str, verbose: bool):
|
|
116
|
-
to_save = list(artifact)
|
|
117
|
-
|
|
118
|
-
# empty check
|
|
119
|
-
if not to_save:
|
|
120
|
-
_LOGGER.warning(f"Skipping save for '{filename}': The feature list is empty.")
|
|
121
|
-
return
|
|
122
|
-
|
|
123
|
-
save_list_strings(list_strings=to_save,
|
|
124
|
-
directory=directory,
|
|
125
|
-
filename=filename,
|
|
126
|
-
verbose=verbose)
|
|
127
|
-
|
|
128
|
-
def save_all_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
129
|
-
"""
|
|
130
|
-
Saves all feature names to a text file.
|
|
131
|
-
|
|
132
|
-
Args:
|
|
133
|
-
directory: The directory where the file will be saved.
|
|
134
|
-
verbose: If True, prints a confirmation message upon saving.
|
|
135
|
-
"""
|
|
136
|
-
self._save_helper(artifact=self.feature_names,
|
|
137
|
-
directory=directory,
|
|
138
|
-
filename=DatasetKeys.FEATURE_NAMES,
|
|
139
|
-
verbose=verbose)
|
|
140
|
-
|
|
141
|
-
def save_continuous_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
142
|
-
"""
|
|
143
|
-
Saves continuous feature names to a text file.
|
|
144
|
-
|
|
145
|
-
Args:
|
|
146
|
-
directory: The directory where the file will be saved.
|
|
147
|
-
verbose: If True, prints a confirmation message upon saving.
|
|
148
|
-
"""
|
|
149
|
-
self._save_helper(artifact=self.continuous_feature_names,
|
|
150
|
-
directory=directory,
|
|
151
|
-
filename=DatasetKeys.CONTINUOUS_NAMES,
|
|
152
|
-
verbose=verbose)
|
|
153
|
-
|
|
154
|
-
def save_categorical_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
155
|
-
"""
|
|
156
|
-
Saves categorical feature names to a text file.
|
|
157
|
-
|
|
158
|
-
Args:
|
|
159
|
-
directory: The directory where the file will be saved.
|
|
160
|
-
verbose: If True, prints a confirmation message upon saving.
|
|
161
|
-
"""
|
|
162
|
-
self._save_helper(artifact=self.categorical_feature_names,
|
|
163
|
-
directory=directory,
|
|
164
|
-
filename=DatasetKeys.CATEGORICAL_NAMES,
|
|
165
|
-
verbose=verbose)
|
|
166
|
-
|
|
167
|
-
def save_artifacts(self, directory: Union[str,Path]):
|
|
168
|
-
"""
|
|
169
|
-
Saves feature names, categorical feature names, continuous feature names to separate text files.
|
|
170
|
-
"""
|
|
171
|
-
self.save_all_features(directory=directory, verbose=True)
|
|
172
|
-
self.save_continuous_features(directory=directory, verbose=True)
|
|
173
|
-
self.save_categorical_features(directory=directory, verbose=True)
|
|
174
|
-
|
|
175
|
-
def __repr__(self) -> str:
|
|
176
|
-
"""Returns a concise representation of the schema's contents."""
|
|
177
|
-
total = len(self.feature_names)
|
|
178
|
-
cont = len(self.continuous_feature_names)
|
|
179
|
-
cat = len(self.categorical_feature_names)
|
|
180
|
-
index_map = self.categorical_index_map is not None
|
|
181
|
-
cat_map = self.categorical_mappings is not None
|
|
182
|
-
return (
|
|
183
|
-
f"FeatureSchema(total={total}, continuous={cont}, categorical={cat}, index_map={index_map}, categorical_map={cat_map})"
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
def create_guischema_template(
|
|
188
|
-
directory: Union[str, Path],
|
|
189
|
-
feature_schema: FeatureSchema,
|
|
190
|
-
targets: list[str],
|
|
191
|
-
continuous_ranges: Dict[str, Tuple[float, float]],
|
|
192
|
-
multibinary_groups: Union[Dict[str, list[str]], None] = None,
|
|
193
|
-
) -> None:
|
|
194
|
-
"""
|
|
195
|
-
Generates a 'GUISchema.json' boilerplate file based on the Model FeatureSchema.
|
|
196
|
-
|
|
197
|
-
The generated JSON contains entries with empty "gui_name" fields for manual mapping.
|
|
198
|
-
Leave 'gui_name' empty to use auto-formatted Title Case.
|
|
199
|
-
|
|
200
|
-
Args:
|
|
201
|
-
directory (str | Path): Where to save the json file.
|
|
202
|
-
feature_schema (FeatureSchema): The source FeatureSchema object.
|
|
203
|
-
targets (list[str]): List of target names as used in the ML pipeline.
|
|
204
|
-
continuous_ranges (Dict[str, Tuple[float, float]]): Dict {model_name: (min, max)}.
|
|
205
|
-
multibinary_groups (Dict[str, list[str]] | None): Optional Dict {GUI_Group_Name: [model_col_1, model_col_2]}.
|
|
206
|
-
Used to group binary columns into a single multi-select list.
|
|
207
|
-
"""
|
|
208
|
-
dir_path = make_fullpath(directory, make=True, enforce="directory")
|
|
209
|
-
|
|
210
|
-
schema = feature_schema
|
|
211
|
-
output_data: Dict[str, Any] = {
|
|
212
|
-
SchemaKeys.TARGETS: [],
|
|
213
|
-
SchemaKeys.CONTINUOUS: [],
|
|
214
|
-
SchemaKeys.BINARY: [],
|
|
215
|
-
SchemaKeys.MULTIBINARY: {}, # Structure: GroupName: [{model: x, gui: ""}]
|
|
216
|
-
SchemaKeys.CATEGORICAL: []
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
# Track handled columns to prevent duplicates in binary/categorical
|
|
220
|
-
handled_cols = set()
|
|
221
|
-
|
|
222
|
-
# 1. Targets
|
|
223
|
-
for t in targets:
|
|
224
|
-
output_data[SchemaKeys.TARGETS].append({
|
|
225
|
-
SchemaKeys.MODEL_NAME: t,
|
|
226
|
-
SchemaKeys.GUI_NAME: "" # User to fill
|
|
227
|
-
})
|
|
228
|
-
|
|
229
|
-
# 2. Continuous
|
|
230
|
-
# Validate ranges against schema
|
|
231
|
-
schema_cont_set = set(schema.continuous_feature_names)
|
|
232
|
-
for name, min_max in continuous_ranges.items():
|
|
233
|
-
if name in schema_cont_set:
|
|
234
|
-
output_data[SchemaKeys.CONTINUOUS].append({
|
|
235
|
-
SchemaKeys.MODEL_NAME: name,
|
|
236
|
-
SchemaKeys.GUI_NAME: "",
|
|
237
|
-
SchemaKeys.MIN_VALUE: min_max[0],
|
|
238
|
-
SchemaKeys.MAX_VALUE: min_max[1]
|
|
239
|
-
})
|
|
240
|
-
handled_cols.add(name)
|
|
241
|
-
else:
|
|
242
|
-
_LOGGER.warning(f"GUISchema: Provided range for '{name}', but it is not in FeatureSchema continuous list.")
|
|
243
|
-
|
|
244
|
-
# 3. Multi-Binary Groups
|
|
245
|
-
if multibinary_groups:
|
|
246
|
-
# Check for validity within the generic feature list
|
|
247
|
-
all_feats = set(schema.feature_names)
|
|
248
|
-
|
|
249
|
-
for group_name, cols in multibinary_groups.items():
|
|
250
|
-
# Validation: Groups cannot be empty
|
|
251
|
-
if not cols:
|
|
252
|
-
# warn and skip
|
|
253
|
-
_LOGGER.warning(f"GUISchema: Multi-binary group '{group_name}' is empty and will be skipped.")
|
|
254
|
-
continue
|
|
255
|
-
|
|
256
|
-
group_options = []
|
|
257
|
-
for col in cols:
|
|
258
|
-
# Validation: Columns must exist in schema
|
|
259
|
-
if col not in all_feats:
|
|
260
|
-
# warn and skip
|
|
261
|
-
_LOGGER.warning(f"GUISchema: Multi-binary column '{col}' in group '{group_name}' not found in FeatureSchema. Skipping.")
|
|
262
|
-
continue
|
|
263
|
-
# else, add to group
|
|
264
|
-
group_options.append({
|
|
265
|
-
SchemaKeys.MODEL_NAME: col,
|
|
266
|
-
SchemaKeys.GUI_NAME: ""
|
|
267
|
-
})
|
|
268
|
-
handled_cols.add(col)
|
|
269
|
-
output_data[SchemaKeys.MULTIBINARY][group_name] = group_options
|
|
270
|
-
|
|
271
|
-
# 4. Binary & Categorical (Derived from Schema Mappings)
|
|
272
|
-
if schema.categorical_mappings:
|
|
273
|
-
for name, mapping in schema.categorical_mappings.items():
|
|
274
|
-
if name in handled_cols:
|
|
275
|
-
continue
|
|
276
|
-
|
|
277
|
-
# Heuristic: Cardinality 2 = Binary, >2 = Categorical
|
|
278
|
-
if len(mapping) == 2:
|
|
279
|
-
output_data[SchemaKeys.BINARY].append({
|
|
280
|
-
SchemaKeys.MODEL_NAME: name,
|
|
281
|
-
SchemaKeys.GUI_NAME: "" # User to fill
|
|
282
|
-
})
|
|
283
|
-
else:
|
|
284
|
-
# For categorical, we also allow renaming the specific options
|
|
285
|
-
options_with_names = {k: "" for k in mapping.keys()} # Default gui_option = model_option
|
|
286
|
-
|
|
287
|
-
output_data[SchemaKeys.CATEGORICAL].append({
|
|
288
|
-
SchemaKeys.MODEL_NAME: name,
|
|
289
|
-
SchemaKeys.GUI_NAME: "", # User to fill feature name
|
|
290
|
-
SchemaKeys.MAPPING: mapping, # Original mapping
|
|
291
|
-
SchemaKeys.OPTIONAL_LABELS: options_with_names # User can edit keys here
|
|
292
|
-
})
|
|
293
|
-
|
|
294
|
-
save_path = dir_path / SchemaKeys.GUI_SCHEMA_FILENAME
|
|
295
|
-
try:
|
|
296
|
-
with open(save_path, 'w', encoding='utf-8') as f:
|
|
297
|
-
json.dump(output_data, f, indent=4)
|
|
298
|
-
_LOGGER.info(f"GUISchema template generated at: '{dir_path.name}/{SchemaKeys.GUI_SCHEMA_FILENAME}'")
|
|
299
|
-
except IOError as e:
|
|
300
|
-
_LOGGER.error(f"Failed to save GUISchema template: {e}")
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
def make_multibinary_groups(
|
|
304
|
-
feature_schema: FeatureSchema,
|
|
305
|
-
group_prefixes: list[str],
|
|
306
|
-
separator: str = "_"
|
|
307
|
-
) -> Dict[str, list[str]]:
|
|
308
|
-
"""
|
|
309
|
-
Helper to automate creating the multibinary_groups dictionary for create_guischema_template.
|
|
310
|
-
|
|
311
|
-
Iterates through provided prefixes and groups categorical features that contain
|
|
312
|
-
the pattern '{prefix}{separator}'.
|
|
313
|
-
|
|
314
|
-
Args:
|
|
315
|
-
feature_schema: The loaded FeatureSchema containing categorical feature names.
|
|
316
|
-
group_prefixes: A list of group prefixes to search for.
|
|
317
|
-
separator: The separator used in Multibinary Encoding (default '_').
|
|
318
|
-
|
|
319
|
-
Returns:
|
|
320
|
-
Dict[str, list[str]]: A dictionary mapping group names to their found column names.
|
|
321
|
-
"""
|
|
322
|
-
groups: Dict[str, list[str]] = {}
|
|
323
|
-
|
|
324
|
-
# check that categorical features exist
|
|
325
|
-
if not feature_schema.categorical_feature_names:
|
|
326
|
-
_LOGGER.error("FeatureSchema has no categorical features defined.")
|
|
327
|
-
raise ValueError()
|
|
328
|
-
|
|
329
|
-
# validate separator
|
|
330
|
-
if not separator or not isinstance(separator, str):
|
|
331
|
-
_LOGGER.error(f"Invalid separator '{separator}' of type {type(separator)}.")
|
|
332
|
-
raise ValueError()
|
|
333
|
-
|
|
334
|
-
for prefix in group_prefixes:
|
|
335
|
-
if not prefix or not isinstance(prefix, str):
|
|
336
|
-
_LOGGER.error(f"Invalid prefix '{prefix}' of type {type(prefix)}.")
|
|
337
|
-
raise ValueError()
|
|
338
|
-
|
|
339
|
-
search_term = f"{prefix}{separator}"
|
|
340
|
-
|
|
341
|
-
# check if substring exists in the column name. must begin with prefix+separator
|
|
342
|
-
cols = [
|
|
343
|
-
name for name in feature_schema.categorical_feature_names
|
|
344
|
-
if name.startswith(search_term)
|
|
345
|
-
]
|
|
346
|
-
|
|
347
|
-
if cols:
|
|
348
|
-
groups[prefix] = cols
|
|
349
|
-
else:
|
|
350
|
-
_LOGGER.warning(f"No columns found for group '{prefix}' using search term '{search_term}'")
|
|
351
|
-
|
|
352
|
-
# log resulting groups
|
|
353
|
-
_LOGGER.info(f"Multibinary groups created: {list(groups.keys())}")
|
|
354
|
-
|
|
355
|
-
return groups
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
def info():
|
|
359
|
-
_script_info(__all__)
|
ml_tools/plot_fonts.py
DELETED
ml_tools/schema.py
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|