dragon-ml-toolbox 19.13.0__py3-none-any.whl → 20.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/METADATA +29 -46
- dragon_ml_toolbox-20.0.0.dist-info/RECORD +178 -0
- ml_tools/{ETL_cleaning.py → ETL_cleaning/__init__.py} +13 -5
- ml_tools/ETL_cleaning/_basic_clean.py +351 -0
- ml_tools/ETL_cleaning/_clean_tools.py +128 -0
- ml_tools/ETL_cleaning/_dragon_cleaner.py +245 -0
- ml_tools/ETL_cleaning/_imprimir.py +13 -0
- ml_tools/{ETL_engineering.py → ETL_engineering/__init__.py} +8 -4
- ml_tools/ETL_engineering/_dragon_engineering.py +261 -0
- ml_tools/ETL_engineering/_imprimir.py +24 -0
- ml_tools/{_core/_ETL_engineering.py → ETL_engineering/_transforms.py} +14 -267
- ml_tools/{_core → GUI_tools}/_GUI_tools.py +37 -40
- ml_tools/{GUI_tools.py → GUI_tools/__init__.py} +7 -5
- ml_tools/GUI_tools/_imprimir.py +12 -0
- ml_tools/IO_tools/_IO_loggers.py +235 -0
- ml_tools/IO_tools/_IO_save_load.py +151 -0
- ml_tools/IO_tools/_IO_utils.py +140 -0
- ml_tools/{IO_tools.py → IO_tools/__init__.py} +13 -5
- ml_tools/IO_tools/_imprimir.py +14 -0
- ml_tools/MICE/_MICE_imputation.py +132 -0
- ml_tools/{MICE_imputation.py → MICE/__init__.py} +6 -7
- ml_tools/{_core/_MICE_imputation.py → MICE/_dragon_mice.py} +243 -322
- ml_tools/MICE/_imprimir.py +11 -0
- ml_tools/{ML_callbacks.py → ML_callbacks/__init__.py} +12 -4
- ml_tools/ML_callbacks/_base.py +101 -0
- ml_tools/ML_callbacks/_checkpoint.py +232 -0
- ml_tools/ML_callbacks/_early_stop.py +208 -0
- ml_tools/ML_callbacks/_imprimir.py +12 -0
- ml_tools/ML_callbacks/_scheduler.py +197 -0
- ml_tools/{ML_chaining_utilities.py → ML_chain/__init__.py} +8 -3
- ml_tools/{_core/_ML_chaining_utilities.py → ML_chain/_chaining_tools.py} +5 -129
- ml_tools/ML_chain/_dragon_chain.py +140 -0
- ml_tools/ML_chain/_imprimir.py +11 -0
- ml_tools/ML_configuration/__init__.py +90 -0
- ml_tools/ML_configuration/_base_model_config.py +69 -0
- ml_tools/ML_configuration/_finalize.py +366 -0
- ml_tools/ML_configuration/_imprimir.py +47 -0
- ml_tools/ML_configuration/_metrics.py +593 -0
- ml_tools/ML_configuration/_models.py +206 -0
- ml_tools/ML_configuration/_training.py +124 -0
- ml_tools/ML_datasetmaster/__init__.py +28 -0
- ml_tools/ML_datasetmaster/_base_datasetmaster.py +337 -0
- ml_tools/{_core/_ML_datasetmaster.py → ML_datasetmaster/_datasetmaster.py} +9 -329
- ml_tools/ML_datasetmaster/_imprimir.py +15 -0
- ml_tools/{_core/_ML_sequence_datasetmaster.py → ML_datasetmaster/_sequence_datasetmaster.py} +13 -15
- ml_tools/{_core/_ML_vision_datasetmaster.py → ML_datasetmaster/_vision_datasetmaster.py} +63 -65
- ml_tools/ML_evaluation/__init__.py +53 -0
- ml_tools/ML_evaluation/_classification.py +629 -0
- ml_tools/ML_evaluation/_feature_importance.py +409 -0
- ml_tools/ML_evaluation/_imprimir.py +25 -0
- ml_tools/ML_evaluation/_loss.py +92 -0
- ml_tools/ML_evaluation/_regression.py +273 -0
- ml_tools/{_core/_ML_sequence_evaluation.py → ML_evaluation/_sequence.py} +8 -11
- ml_tools/{_core/_ML_vision_evaluation.py → ML_evaluation/_vision.py} +12 -17
- ml_tools/{_core → ML_evaluation_captum}/_ML_evaluation_captum.py +11 -38
- ml_tools/{ML_evaluation_captum.py → ML_evaluation_captum/__init__.py} +6 -4
- ml_tools/ML_evaluation_captum/_imprimir.py +10 -0
- ml_tools/{_core → ML_finalize_handler}/_ML_finalize_handler.py +3 -7
- ml_tools/ML_finalize_handler/__init__.py +10 -0
- ml_tools/ML_finalize_handler/_imprimir.py +8 -0
- ml_tools/ML_inference/__init__.py +22 -0
- ml_tools/ML_inference/_base_inference.py +166 -0
- ml_tools/{_core/_ML_chaining_inference.py → ML_inference/_chain_inference.py} +14 -17
- ml_tools/ML_inference/_dragon_inference.py +332 -0
- ml_tools/ML_inference/_imprimir.py +11 -0
- ml_tools/ML_inference/_multi_inference.py +180 -0
- ml_tools/ML_inference_sequence/__init__.py +10 -0
- ml_tools/ML_inference_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_inference.py → ML_inference_sequence/_sequence_inference.py} +11 -15
- ml_tools/ML_inference_vision/__init__.py +10 -0
- ml_tools/ML_inference_vision/_imprimir.py +8 -0
- ml_tools/{_core/_ML_vision_inference.py → ML_inference_vision/_vision_inference.py} +15 -19
- ml_tools/ML_models/__init__.py +32 -0
- ml_tools/{_core/_ML_models_advanced.py → ML_models/_advanced_models.py} +22 -18
- ml_tools/ML_models/_base_mlp_attention.py +198 -0
- ml_tools/{_core/_models_advanced_base.py → ML_models/_base_save_load.py} +73 -49
- ml_tools/ML_models/_dragon_tabular.py +248 -0
- ml_tools/ML_models/_imprimir.py +18 -0
- ml_tools/ML_models/_mlp_attention.py +134 -0
- ml_tools/{_core → ML_models}/_models_advanced_helpers.py +13 -13
- ml_tools/ML_models_sequence/__init__.py +10 -0
- ml_tools/ML_models_sequence/_imprimir.py +8 -0
- ml_tools/{_core/_ML_sequence_models.py → ML_models_sequence/_sequence_models.py} +5 -8
- ml_tools/ML_models_vision/__init__.py +29 -0
- ml_tools/ML_models_vision/_base_wrapper.py +254 -0
- ml_tools/ML_models_vision/_image_classification.py +182 -0
- ml_tools/ML_models_vision/_image_segmentation.py +108 -0
- ml_tools/ML_models_vision/_imprimir.py +16 -0
- ml_tools/ML_models_vision/_object_detection.py +135 -0
- ml_tools/ML_optimization/__init__.py +21 -0
- ml_tools/ML_optimization/_imprimir.py +13 -0
- ml_tools/{_core/_ML_optimization_pareto.py → ML_optimization/_multi_dragon.py} +18 -24
- ml_tools/ML_optimization/_single_dragon.py +203 -0
- ml_tools/{_core/_ML_optimization.py → ML_optimization/_single_manual.py} +75 -213
- ml_tools/{_core → ML_scaler}/_ML_scaler.py +8 -11
- ml_tools/ML_scaler/__init__.py +10 -0
- ml_tools/ML_scaler/_imprimir.py +8 -0
- ml_tools/ML_trainer/__init__.py +20 -0
- ml_tools/ML_trainer/_base_trainer.py +297 -0
- ml_tools/ML_trainer/_dragon_detection_trainer.py +402 -0
- ml_tools/ML_trainer/_dragon_sequence_trainer.py +540 -0
- ml_tools/ML_trainer/_dragon_trainer.py +1160 -0
- ml_tools/ML_trainer/_imprimir.py +10 -0
- ml_tools/{ML_utilities.py → ML_utilities/__init__.py} +14 -6
- ml_tools/ML_utilities/_artifact_finder.py +382 -0
- ml_tools/ML_utilities/_imprimir.py +16 -0
- ml_tools/ML_utilities/_inspection.py +325 -0
- ml_tools/ML_utilities/_train_tools.py +205 -0
- ml_tools/{ML_vision_transformers.py → ML_vision_transformers/__init__.py} +9 -6
- ml_tools/{_core/_ML_vision_transformers.py → ML_vision_transformers/_core_transforms.py} +11 -155
- ml_tools/ML_vision_transformers/_imprimir.py +14 -0
- ml_tools/ML_vision_transformers/_offline_augmentation.py +159 -0
- ml_tools/{_core/_PSO_optimization.py → PSO_optimization/_PSO.py} +58 -15
- ml_tools/{PSO_optimization.py → PSO_optimization/__init__.py} +5 -3
- ml_tools/PSO_optimization/_imprimir.py +10 -0
- ml_tools/SQL/__init__.py +7 -0
- ml_tools/{_core/_SQL.py → SQL/_dragon_SQL.py} +7 -11
- ml_tools/SQL/_imprimir.py +8 -0
- ml_tools/{_core → VIF}/_VIF_factor.py +5 -8
- ml_tools/{VIF_factor.py → VIF/__init__.py} +4 -2
- ml_tools/VIF/_imprimir.py +10 -0
- ml_tools/_core/__init__.py +7 -1
- ml_tools/_core/_logger.py +8 -18
- ml_tools/_core/_schema_load_ops.py +43 -0
- ml_tools/_core/_script_info.py +2 -2
- ml_tools/{data_exploration.py → data_exploration/__init__.py} +32 -16
- ml_tools/data_exploration/_analysis.py +214 -0
- ml_tools/data_exploration/_cleaning.py +566 -0
- ml_tools/data_exploration/_features.py +583 -0
- ml_tools/data_exploration/_imprimir.py +32 -0
- ml_tools/data_exploration/_plotting.py +487 -0
- ml_tools/data_exploration/_schema_ops.py +176 -0
- ml_tools/{ensemble_evaluation.py → ensemble_evaluation/__init__.py} +6 -4
- ml_tools/{_core → ensemble_evaluation}/_ensemble_evaluation.py +3 -7
- ml_tools/ensemble_evaluation/_imprimir.py +14 -0
- ml_tools/{ensemble_inference.py → ensemble_inference/__init__.py} +5 -3
- ml_tools/{_core → ensemble_inference}/_ensemble_inference.py +15 -18
- ml_tools/ensemble_inference/_imprimir.py +9 -0
- ml_tools/{ensemble_learning.py → ensemble_learning/__init__.py} +4 -6
- ml_tools/{_core → ensemble_learning}/_ensemble_learning.py +7 -10
- ml_tools/ensemble_learning/_imprimir.py +10 -0
- ml_tools/{excel_handler.py → excel_handler/__init__.py} +5 -3
- ml_tools/{_core → excel_handler}/_excel_handler.py +6 -10
- ml_tools/excel_handler/_imprimir.py +13 -0
- ml_tools/{keys.py → keys/__init__.py} +4 -1
- ml_tools/keys/_imprimir.py +11 -0
- ml_tools/{_core → keys}/_keys.py +2 -0
- ml_tools/{math_utilities.py → math_utilities/__init__.py} +5 -2
- ml_tools/math_utilities/_imprimir.py +11 -0
- ml_tools/{_core → math_utilities}/_math_utilities.py +1 -5
- ml_tools/{optimization_tools.py → optimization_tools/__init__.py} +9 -4
- ml_tools/optimization_tools/_imprimir.py +13 -0
- ml_tools/optimization_tools/_optimization_bounds.py +236 -0
- ml_tools/optimization_tools/_optimization_plots.py +218 -0
- ml_tools/{path_manager.py → path_manager/__init__.py} +6 -3
- ml_tools/{_core/_path_manager.py → path_manager/_dragonmanager.py} +11 -347
- ml_tools/path_manager/_imprimir.py +15 -0
- ml_tools/path_manager/_path_tools.py +346 -0
- ml_tools/plot_fonts/__init__.py +8 -0
- ml_tools/plot_fonts/_imprimir.py +8 -0
- ml_tools/{_core → plot_fonts}/_plot_fonts.py +2 -5
- ml_tools/schema/__init__.py +15 -0
- ml_tools/schema/_feature_schema.py +223 -0
- ml_tools/schema/_gui_schema.py +191 -0
- ml_tools/schema/_imprimir.py +10 -0
- ml_tools/{serde.py → serde/__init__.py} +4 -2
- ml_tools/serde/_imprimir.py +10 -0
- ml_tools/{_core → serde}/_serde.py +3 -8
- ml_tools/{utilities.py → utilities/__init__.py} +11 -6
- ml_tools/utilities/_imprimir.py +18 -0
- ml_tools/{_core/_utilities.py → utilities/_utility_save_load.py} +13 -190
- ml_tools/utilities/_utility_tools.py +192 -0
- dragon_ml_toolbox-19.13.0.dist-info/RECORD +0 -111
- ml_tools/ML_chaining_inference.py +0 -8
- ml_tools/ML_configuration.py +0 -86
- ml_tools/ML_configuration_pytab.py +0 -14
- ml_tools/ML_datasetmaster.py +0 -10
- ml_tools/ML_evaluation.py +0 -16
- ml_tools/ML_evaluation_multi.py +0 -12
- ml_tools/ML_finalize_handler.py +0 -8
- ml_tools/ML_inference.py +0 -12
- ml_tools/ML_models.py +0 -14
- ml_tools/ML_models_advanced.py +0 -14
- ml_tools/ML_models_pytab.py +0 -14
- ml_tools/ML_optimization.py +0 -14
- ml_tools/ML_optimization_pareto.py +0 -8
- ml_tools/ML_scaler.py +0 -8
- ml_tools/ML_sequence_datasetmaster.py +0 -8
- ml_tools/ML_sequence_evaluation.py +0 -10
- ml_tools/ML_sequence_inference.py +0 -8
- ml_tools/ML_sequence_models.py +0 -8
- ml_tools/ML_trainer.py +0 -12
- ml_tools/ML_vision_datasetmaster.py +0 -12
- ml_tools/ML_vision_evaluation.py +0 -10
- ml_tools/ML_vision_inference.py +0 -8
- ml_tools/ML_vision_models.py +0 -18
- ml_tools/SQL.py +0 -8
- ml_tools/_core/_ETL_cleaning.py +0 -694
- ml_tools/_core/_IO_tools.py +0 -498
- ml_tools/_core/_ML_callbacks.py +0 -702
- ml_tools/_core/_ML_configuration.py +0 -1332
- ml_tools/_core/_ML_configuration_pytab.py +0 -102
- ml_tools/_core/_ML_evaluation.py +0 -867
- ml_tools/_core/_ML_evaluation_multi.py +0 -544
- ml_tools/_core/_ML_inference.py +0 -646
- ml_tools/_core/_ML_models.py +0 -668
- ml_tools/_core/_ML_models_pytab.py +0 -693
- ml_tools/_core/_ML_trainer.py +0 -2323
- ml_tools/_core/_ML_utilities.py +0 -886
- ml_tools/_core/_ML_vision_models.py +0 -644
- ml_tools/_core/_data_exploration.py +0 -1901
- ml_tools/_core/_optimization_tools.py +0 -493
- ml_tools/_core/_schema.py +0 -359
- ml_tools/plot_fonts.py +0 -8
- ml_tools/schema.py +0 -12
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-19.13.0.dist-info → dragon_ml_toolbox-20.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
from typing import Optional, Union, Literal
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import re
|
|
4
|
+
import shutil
|
|
5
|
+
|
|
6
|
+
from .._core import get_logger
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_LOGGER = get_logger("Path Ops")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"make_fullpath",
|
|
14
|
+
"sanitize_filename",
|
|
15
|
+
"list_csv_paths",
|
|
16
|
+
"list_files_by_extension",
|
|
17
|
+
"list_subdirectories",
|
|
18
|
+
"clean_directory",
|
|
19
|
+
"safe_move",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def make_fullpath(
|
|
24
|
+
input_path: Union[str, Path],
|
|
25
|
+
make: bool = False,
|
|
26
|
+
verbose: bool = False,
|
|
27
|
+
enforce: Optional[Literal["directory", "file"]] = None
|
|
28
|
+
) -> Path:
|
|
29
|
+
"""
|
|
30
|
+
Resolves a string or Path into an absolute Path, optionally creating it.
|
|
31
|
+
|
|
32
|
+
- If the path exists, it is returned.
|
|
33
|
+
- If the path does not exist and `make=True`, it will:
|
|
34
|
+
- Create the file if the path has a suffix
|
|
35
|
+
- Create the directory if it has no suffix
|
|
36
|
+
- If `make=False` and the path does not exist, an error is raised.
|
|
37
|
+
- If `enforce`, raises an error if the resolved path is not what was enforced.
|
|
38
|
+
- Optionally prints whether the resolved path is a file or directory.
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
input_path (str | Path):
|
|
42
|
+
Path to resolve.
|
|
43
|
+
make (bool):
|
|
44
|
+
If True, attempt to create file or directory.
|
|
45
|
+
verbose (bool):
|
|
46
|
+
Print classification after resolution.
|
|
47
|
+
enforce ("directory" | "file" | None):
|
|
48
|
+
Raises an error if the resolved path is not what was enforced.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Path: Resolved absolute path.
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ValueError: If the path doesn't exist and can't be created.
|
|
55
|
+
TypeError: If the final path does not match the `enforce` parameter.
|
|
56
|
+
|
|
57
|
+
## 🗒️ Note:
|
|
58
|
+
|
|
59
|
+
Directories with dots will be treated as files.
|
|
60
|
+
|
|
61
|
+
Files without extension will be treated as directories.
|
|
62
|
+
"""
|
|
63
|
+
path = Path(input_path).expanduser()
|
|
64
|
+
|
|
65
|
+
is_file = path.suffix != ""
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
resolved = path.resolve(strict=True)
|
|
69
|
+
except FileNotFoundError:
|
|
70
|
+
if not make:
|
|
71
|
+
_LOGGER.error(f"Path does not exist: '{path}'.")
|
|
72
|
+
raise FileNotFoundError()
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
if is_file:
|
|
76
|
+
# Create parent directories first
|
|
77
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
path.touch(exist_ok=False)
|
|
79
|
+
else:
|
|
80
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
81
|
+
resolved = path.resolve(strict=True)
|
|
82
|
+
except Exception:
|
|
83
|
+
_LOGGER.exception(f"Failed to create {'file' if is_file else 'directory'} '{path}'.")
|
|
84
|
+
raise IOError()
|
|
85
|
+
|
|
86
|
+
if enforce == "file" and not resolved.is_file():
|
|
87
|
+
_LOGGER.error(f"Path was enforced as a file, but it is not: '{resolved}'")
|
|
88
|
+
raise TypeError()
|
|
89
|
+
|
|
90
|
+
if enforce == "directory" and not resolved.is_dir():
|
|
91
|
+
_LOGGER.error(f"Path was enforced as a directory, but it is not: '{resolved}'")
|
|
92
|
+
raise TypeError()
|
|
93
|
+
|
|
94
|
+
if verbose:
|
|
95
|
+
if resolved.is_file():
|
|
96
|
+
print("📄 Path is a File")
|
|
97
|
+
elif resolved.is_dir():
|
|
98
|
+
print("📁 Path is a Directory")
|
|
99
|
+
else:
|
|
100
|
+
print("❓ Path exists but is neither file nor directory")
|
|
101
|
+
|
|
102
|
+
return resolved
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def sanitize_filename(filename: str) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Sanitizes the name by:
|
|
108
|
+
- Stripping leading/trailing whitespace.
|
|
109
|
+
- Replacing all internal whitespace characters with underscores.
|
|
110
|
+
- Removing or replacing characters invalid in filenames.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
filename (str): Base filename.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
str: A sanitized string suitable to use as a filename.
|
|
117
|
+
"""
|
|
118
|
+
# Strip leading/trailing whitespace
|
|
119
|
+
sanitized = filename.strip()
|
|
120
|
+
|
|
121
|
+
# Replace all whitespace sequences (space, tab, etc.) with underscores
|
|
122
|
+
sanitized = re.sub(r'\s+', '_', sanitized)
|
|
123
|
+
|
|
124
|
+
# Conservative filter to keep filenames safe across platforms
|
|
125
|
+
sanitized = re.sub(r'[^\w\-.]', '', sanitized)
|
|
126
|
+
|
|
127
|
+
# Check for empty string after sanitization
|
|
128
|
+
if not sanitized:
|
|
129
|
+
_LOGGER.error("The sanitized filename is empty. The original input may have contained only invalid characters.")
|
|
130
|
+
raise ValueError()
|
|
131
|
+
|
|
132
|
+
return sanitized
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def list_csv_paths(directory: Union[str, Path], verbose: bool = True, raise_on_empty: bool = True) -> dict[str, Path]:
|
|
136
|
+
"""
|
|
137
|
+
Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
|
|
138
|
+
|
|
139
|
+
Parameters:
|
|
140
|
+
directory (str | Path): Path to the directory containing `.csv` files.
|
|
141
|
+
verbose (bool): If True, prints found files.
|
|
142
|
+
raise_on_empty (bool): If True, raises IOError if no files are found.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
(dict[str, Path]): Dictionary mapping {filename: filepath}.
|
|
146
|
+
"""
|
|
147
|
+
# wraps the more general function
|
|
148
|
+
return list_files_by_extension(directory=directory, extension="csv", verbose=verbose, raise_on_empty=raise_on_empty)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def list_files_by_extension(
|
|
152
|
+
directory: Union[str, Path],
|
|
153
|
+
extension: str,
|
|
154
|
+
verbose: bool = True,
|
|
155
|
+
raise_on_empty: bool = True
|
|
156
|
+
) -> dict[str, Path]:
|
|
157
|
+
"""
|
|
158
|
+
Lists all files with the specified extension in the given directory and returns a mapping:
|
|
159
|
+
filenames (without extensions) to their absolute paths.
|
|
160
|
+
|
|
161
|
+
Parameters:
|
|
162
|
+
directory (str | Path): Path to the directory to search in.
|
|
163
|
+
extension (str): File extension to search for (e.g., 'json', 'txt').
|
|
164
|
+
verbose (bool): If True, logs the files found.
|
|
165
|
+
raise_on_empty (bool): If True, raises IOError if no matching files are found.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
(dict[str, Path]): Dictionary mapping {filename: filepath}. Returns empty dict if none found and raise_on_empty is False.
|
|
169
|
+
"""
|
|
170
|
+
dir_path = make_fullpath(directory, enforce="directory")
|
|
171
|
+
|
|
172
|
+
# Normalize the extension (remove leading dot if present)
|
|
173
|
+
normalized_ext = extension.lstrip(".").lower()
|
|
174
|
+
pattern = f"*.{normalized_ext}"
|
|
175
|
+
|
|
176
|
+
matched_paths = list(dir_path.glob(pattern))
|
|
177
|
+
|
|
178
|
+
if not matched_paths:
|
|
179
|
+
msg = f"No '.{normalized_ext}' files found in directory: {dir_path}."
|
|
180
|
+
if raise_on_empty:
|
|
181
|
+
_LOGGER.error(msg)
|
|
182
|
+
raise IOError()
|
|
183
|
+
else:
|
|
184
|
+
if verbose:
|
|
185
|
+
_LOGGER.warning(msg)
|
|
186
|
+
return {}
|
|
187
|
+
|
|
188
|
+
name_path_dict = {p.stem: p for p in matched_paths}
|
|
189
|
+
|
|
190
|
+
if verbose:
|
|
191
|
+
_LOGGER.info(f"📂 '{normalized_ext.upper()}' files found:")
|
|
192
|
+
for name in name_path_dict:
|
|
193
|
+
print(f"\t{name}")
|
|
194
|
+
|
|
195
|
+
return name_path_dict
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def list_subdirectories(
|
|
199
|
+
root_dir: Union[str, Path],
|
|
200
|
+
verbose: bool = True,
|
|
201
|
+
raise_on_empty: bool = True
|
|
202
|
+
) -> dict[str, Path]:
|
|
203
|
+
"""
|
|
204
|
+
Scans a directory and returns a dictionary of its immediate subdirectories.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
root_dir (str | Path): The path to the directory to scan.
|
|
208
|
+
verbose (bool): If True, prints the number of directories found.
|
|
209
|
+
raise_on_empty (bool): If True, raises IOError if no subdirectories are found.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
|
|
213
|
+
"""
|
|
214
|
+
root_path = make_fullpath(root_dir, enforce="directory")
|
|
215
|
+
|
|
216
|
+
directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
|
|
217
|
+
|
|
218
|
+
if len(directories) < 1:
|
|
219
|
+
msg = f"No subdirectories found inside '{root_path}'"
|
|
220
|
+
if raise_on_empty:
|
|
221
|
+
_LOGGER.error(msg)
|
|
222
|
+
raise IOError()
|
|
223
|
+
else:
|
|
224
|
+
if verbose:
|
|
225
|
+
_LOGGER.warning(msg)
|
|
226
|
+
return {}
|
|
227
|
+
|
|
228
|
+
if verbose:
|
|
229
|
+
count = len(directories)
|
|
230
|
+
# Use pluralization for better readability
|
|
231
|
+
plural = 'ies' if count != 1 else 'y'
|
|
232
|
+
print(f"Found {count} subdirector{plural} in '{root_path.name}'.")
|
|
233
|
+
|
|
234
|
+
# Create a dictionary where the key is the directory's name (a string)
|
|
235
|
+
# and the value is the full Path object.
|
|
236
|
+
dir_map = {p.name: p for p in directories}
|
|
237
|
+
|
|
238
|
+
return dir_map
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def clean_directory(directory: Union[str, Path], verbose: bool = False) -> None:
|
|
242
|
+
"""
|
|
243
|
+
⚠️ DANGER: DESTRUCTIVE OPERATION ⚠️
|
|
244
|
+
|
|
245
|
+
Deletes all files and subdirectories inside the specified directory. It is designed to empty a folder, not delete the folder itself.
|
|
246
|
+
|
|
247
|
+
Safety: It skips hidden files and directories (those starting with a period '.'). This works for macOS/Linux hidden files and dot-config folders on Windows.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
directory (str | Path): The directory path to clean.
|
|
251
|
+
verbose (bool): If True, prints the name of each top-level item deleted.
|
|
252
|
+
"""
|
|
253
|
+
target_dir = make_fullpath(directory, enforce="directory")
|
|
254
|
+
|
|
255
|
+
if verbose:
|
|
256
|
+
_LOGGER.warning(f"Starting cleanup of directory: {target_dir}")
|
|
257
|
+
|
|
258
|
+
for item in target_dir.iterdir():
|
|
259
|
+
# Safety Check: Skip hidden files/dirs
|
|
260
|
+
if item.name.startswith("."):
|
|
261
|
+
continue
|
|
262
|
+
|
|
263
|
+
try:
|
|
264
|
+
if item.is_file() or item.is_symlink():
|
|
265
|
+
item.unlink()
|
|
266
|
+
if verbose:
|
|
267
|
+
print(f" 🗑️ Deleted file: {item.name}")
|
|
268
|
+
elif item.is_dir():
|
|
269
|
+
shutil.rmtree(item)
|
|
270
|
+
if verbose:
|
|
271
|
+
print(f" 🗑️ Deleted directory: {item.name}")
|
|
272
|
+
except Exception as e:
|
|
273
|
+
_LOGGER.warning(f"Failed to delete item '{item.name}': {e}")
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def safe_move(
|
|
278
|
+
source: Union[str, Path],
|
|
279
|
+
final_destination: Union[str, Path],
|
|
280
|
+
rename: Optional[str] = None,
|
|
281
|
+
overwrite: bool = False
|
|
282
|
+
) -> Path:
|
|
283
|
+
"""
|
|
284
|
+
Moves a file or directory to a destination directory with safety checks.
|
|
285
|
+
|
|
286
|
+
Features:
|
|
287
|
+
- Supports optional renaming (sanitized automatically).
|
|
288
|
+
- PRESERVES file extensions during renaming (cannot be modified).
|
|
289
|
+
- Prevents accidental overwrites unless explicit.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
source (str | Path): The file or directory to move.
|
|
293
|
+
final_destination (str | Path): The destination DIRECTORY where the item will be moved. It will be created if it does not exist.
|
|
294
|
+
rename (Optional[str]): If provided, the moved item will be renamed to this. Note: For files, the extension is strictly preserved.
|
|
295
|
+
overwrite (bool): If True, overwrites the destination path if it exists.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Path: The new absolute path of the moved item.
|
|
299
|
+
"""
|
|
300
|
+
# 1. Validation and Setup
|
|
301
|
+
src_path = make_fullpath(source, make=False)
|
|
302
|
+
|
|
303
|
+
# Ensure destination directory exists
|
|
304
|
+
dest_dir_path = make_fullpath(final_destination, make=True, enforce="directory")
|
|
305
|
+
|
|
306
|
+
# 2. Determine Target Name
|
|
307
|
+
if rename:
|
|
308
|
+
sanitized_name = sanitize_filename(rename)
|
|
309
|
+
if src_path.is_file():
|
|
310
|
+
# Strict Extension Preservation
|
|
311
|
+
final_name = f"{sanitized_name}{src_path.suffix}"
|
|
312
|
+
else:
|
|
313
|
+
final_name = sanitized_name
|
|
314
|
+
else:
|
|
315
|
+
final_name = src_path.name
|
|
316
|
+
|
|
317
|
+
final_path = dest_dir_path / final_name
|
|
318
|
+
|
|
319
|
+
# 3. Safety Checks (Collision Detection)
|
|
320
|
+
if final_path.exists():
|
|
321
|
+
if not overwrite:
|
|
322
|
+
_LOGGER.error(f"Destination already exists: '{final_path}'. Use overwrite=True to force.")
|
|
323
|
+
raise FileExistsError()
|
|
324
|
+
|
|
325
|
+
# Smart Overwrite Handling
|
|
326
|
+
if final_path.is_dir():
|
|
327
|
+
if src_path.is_file():
|
|
328
|
+
_LOGGER.error(f"Cannot overwrite directory '{final_path}' with file '{src_path}'")
|
|
329
|
+
raise IsADirectoryError()
|
|
330
|
+
# If overwriting a directory, we must remove the old one first to avoid nesting/errors
|
|
331
|
+
shutil.rmtree(final_path)
|
|
332
|
+
else:
|
|
333
|
+
# Destination is a file
|
|
334
|
+
if src_path.is_dir():
|
|
335
|
+
_LOGGER.error(f"Cannot overwrite file '{final_path}' with directory '{src_path}'")
|
|
336
|
+
raise FileExistsError()
|
|
337
|
+
final_path.unlink()
|
|
338
|
+
|
|
339
|
+
# 4. Perform Move
|
|
340
|
+
try:
|
|
341
|
+
shutil.move(str(src_path), str(final_path))
|
|
342
|
+
return final_path
|
|
343
|
+
except Exception as e:
|
|
344
|
+
_LOGGER.exception(f"Failed to move '{src_path}' to '{final_path}'")
|
|
345
|
+
raise e
|
|
346
|
+
|
|
@@ -2,8 +2,8 @@ import matplotlib.pyplot as plt
|
|
|
2
2
|
from matplotlib import font_manager as fm
|
|
3
3
|
import platform
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
|
|
5
|
+
from .._core import get_logger
|
|
6
|
+
|
|
7
7
|
|
|
8
8
|
_LOGGER = get_logger("Plot Fonts")
|
|
9
9
|
|
|
@@ -62,6 +62,3 @@ def configure_cjk_fonts(verbose: bool = True) -> None:
|
|
|
62
62
|
if verbose:
|
|
63
63
|
_LOGGER.warning(f"No suitable Simplified Chinese fonts found for {system}. Text may not render correctly.")
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
def info():
|
|
67
|
-
_script_info(__all__)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from ._feature_schema import FeatureSchema
|
|
2
|
+
|
|
3
|
+
from ._gui_schema import (
|
|
4
|
+
create_guischema_template,
|
|
5
|
+
make_multibinary_groups
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
from ._imprimir import info
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"FeatureSchema",
|
|
13
|
+
"create_guischema_template",
|
|
14
|
+
"make_multibinary_groups",
|
|
15
|
+
]
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
from typing import NamedTuple, Optional, Union, Any
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from ..IO_tools import save_list_strings
|
|
6
|
+
|
|
7
|
+
from .._core import get_logger
|
|
8
|
+
from ..path_manager import make_fullpath
|
|
9
|
+
from ..keys._keys import SchemaKeys, DatasetKeys, PytorchModelArchitectureKeys
|
|
10
|
+
|
|
11
|
+
from .._core._schema_load_ops import prepare_schema_from_json
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_LOGGER = get_logger("FeatureSchema")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"FeatureSchema",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FeatureSchema(NamedTuple):
|
|
23
|
+
"""Holds the final, definitive schema for the model pipeline."""
|
|
24
|
+
|
|
25
|
+
# The final, ordered list of all feature names
|
|
26
|
+
feature_names: tuple[str, ...]
|
|
27
|
+
|
|
28
|
+
# List of all continuous feature names
|
|
29
|
+
continuous_feature_names: tuple[str, ...]
|
|
30
|
+
|
|
31
|
+
# List of all categorical feature names
|
|
32
|
+
categorical_feature_names: tuple[str, ...]
|
|
33
|
+
|
|
34
|
+
# Map of {column_index: cardinality} for categorical features
|
|
35
|
+
categorical_index_map: Optional[dict[int, int]]
|
|
36
|
+
|
|
37
|
+
# Map string-to-int category values (e.g., {'color': {'red': 0, 'blue': 1}})
|
|
38
|
+
categorical_mappings: Optional[dict[str, dict[str, int]]]
|
|
39
|
+
|
|
40
|
+
def to_json(self, directory: Union[str, Path], verbose: bool = True) -> None:
|
|
41
|
+
"""
|
|
42
|
+
Saves the schema as 'FeatureSchema.json' to the provided directory.
|
|
43
|
+
|
|
44
|
+
Handles conversion of Tuple->List and IntKeys->StrKeys automatically.
|
|
45
|
+
"""
|
|
46
|
+
# validate path
|
|
47
|
+
dir_path = make_fullpath(directory, enforce="directory")
|
|
48
|
+
file_path = dir_path / SchemaKeys.SCHEMA_FILENAME
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
# Convert named tuple to dict
|
|
52
|
+
data = self._asdict()
|
|
53
|
+
|
|
54
|
+
# Write to disk
|
|
55
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
56
|
+
json.dump(data, f, indent=4)
|
|
57
|
+
|
|
58
|
+
if verbose:
|
|
59
|
+
_LOGGER.info(f"FeatureSchema saved to '{dir_path.name}/{SchemaKeys.SCHEMA_FILENAME}'")
|
|
60
|
+
|
|
61
|
+
except (IOError, TypeError) as e:
|
|
62
|
+
_LOGGER.error(f"Failed to save FeatureSchema to JSON: {e}")
|
|
63
|
+
raise e
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def from_json(cls, directory: Union[str, Path], verbose: bool = True) -> 'FeatureSchema':
|
|
67
|
+
"""
|
|
68
|
+
Loads a 'FeatureSchema.json' from the provided directory.
|
|
69
|
+
|
|
70
|
+
Restores Tuples from Lists and Integer Keys from Strings.
|
|
71
|
+
"""
|
|
72
|
+
# validate directory
|
|
73
|
+
dir_path = make_fullpath(directory, enforce="directory")
|
|
74
|
+
file_path = dir_path / SchemaKeys.SCHEMA_FILENAME
|
|
75
|
+
|
|
76
|
+
if not file_path.exists():
|
|
77
|
+
_LOGGER.error(f"FeatureSchema file not found at '{directory}'")
|
|
78
|
+
raise FileNotFoundError()
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
82
|
+
data: dict[str, Any] = json.load(f)
|
|
83
|
+
|
|
84
|
+
# Logic delegated to shared helper
|
|
85
|
+
schema_kwargs = prepare_schema_from_json(data)
|
|
86
|
+
schema = cls(**schema_kwargs)
|
|
87
|
+
|
|
88
|
+
if verbose:
|
|
89
|
+
_LOGGER.info(f"FeatureSchema loaded from '{dir_path.name}'")
|
|
90
|
+
|
|
91
|
+
return schema
|
|
92
|
+
|
|
93
|
+
except (IOError, ValueError, KeyError) as e:
|
|
94
|
+
_LOGGER.error(f"Failed to load FeatureSchema from '{dir_path}': {e}")
|
|
95
|
+
raise e
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def from_model_architecture(cls, file_or_dir: Union[str, Path], verbose: bool = True) -> 'FeatureSchema':
|
|
99
|
+
"""
|
|
100
|
+
Extracts and loads the FeatureSchema embedded within a model's 'architecture.json' file.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
file_or_dir: Path to the JSON file or the directory containing 'architecture.json'.
|
|
104
|
+
verbose: If True, prints a confirmation message upon loading.
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
KeyError: If the architecture file does not contain a FeatureSchema configuration.
|
|
108
|
+
"""
|
|
109
|
+
user_path = make_fullpath(file_or_dir)
|
|
110
|
+
|
|
111
|
+
# 1. Resolve Path
|
|
112
|
+
if user_path.is_dir():
|
|
113
|
+
json_filename = PytorchModelArchitectureKeys.SAVENAME + ".json"
|
|
114
|
+
target_path = make_fullpath(user_path / json_filename, enforce="file")
|
|
115
|
+
elif user_path.is_file():
|
|
116
|
+
target_path = user_path
|
|
117
|
+
else:
|
|
118
|
+
_LOGGER.error(f"Invalid path: '{file_or_dir}'")
|
|
119
|
+
raise IOError()
|
|
120
|
+
|
|
121
|
+
# 2. Load Architecture JSON
|
|
122
|
+
try:
|
|
123
|
+
with open(target_path, 'r', encoding='utf-8') as f:
|
|
124
|
+
arch_data: dict[str, Any] = json.load(f)
|
|
125
|
+
except (IOError, json.JSONDecodeError) as e:
|
|
126
|
+
_LOGGER.error(f"Failed to load architecture file from '{target_path}': {e}")
|
|
127
|
+
raise e
|
|
128
|
+
|
|
129
|
+
# 3. Validate and Extract Schema Dict
|
|
130
|
+
config = arch_data.get(PytorchModelArchitectureKeys.CONFIG, {})
|
|
131
|
+
|
|
132
|
+
if SchemaKeys.SCHEMA_DICT not in config:
|
|
133
|
+
error_msg = f"The model architecture at '{target_path.name}' does not contain a '{SchemaKeys.SCHEMA_DICT}' key. This model might not use a FeatureSchema."
|
|
134
|
+
_LOGGER.error(error_msg)
|
|
135
|
+
raise KeyError()
|
|
136
|
+
|
|
137
|
+
data = config[SchemaKeys.SCHEMA_DICT]
|
|
138
|
+
|
|
139
|
+
# 4. Reconstruct Schema (Restore Types)
|
|
140
|
+
try:
|
|
141
|
+
schema_kwargs = prepare_schema_from_json(data)
|
|
142
|
+
schema = cls(**schema_kwargs)
|
|
143
|
+
|
|
144
|
+
if verbose:
|
|
145
|
+
_LOGGER.info(f"FeatureSchema extracted from architecture '{target_path.name}'")
|
|
146
|
+
|
|
147
|
+
return schema
|
|
148
|
+
|
|
149
|
+
except (ValueError, KeyError) as e:
|
|
150
|
+
_LOGGER.error(f"Failed to parse FeatureSchema from architecture file: {e}")
|
|
151
|
+
raise e
|
|
152
|
+
|
|
153
|
+
def _save_helper(self, artifact: tuple[str, ...], directory: Union[str,Path], filename: str, verbose: bool):
|
|
154
|
+
to_save = list(artifact)
|
|
155
|
+
|
|
156
|
+
# empty check
|
|
157
|
+
if not to_save:
|
|
158
|
+
_LOGGER.warning(f"Skipping save for '{filename}': The feature list is empty.")
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
save_list_strings(list_strings=to_save,
|
|
162
|
+
directory=directory,
|
|
163
|
+
filename=filename,
|
|
164
|
+
verbose=verbose)
|
|
165
|
+
|
|
166
|
+
def save_all_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
167
|
+
"""
|
|
168
|
+
Saves all feature names to a text file.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
directory: The directory where the file will be saved.
|
|
172
|
+
verbose: If True, prints a confirmation message upon saving.
|
|
173
|
+
"""
|
|
174
|
+
self._save_helper(artifact=self.feature_names,
|
|
175
|
+
directory=directory,
|
|
176
|
+
filename=DatasetKeys.FEATURE_NAMES,
|
|
177
|
+
verbose=verbose)
|
|
178
|
+
|
|
179
|
+
def save_continuous_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
180
|
+
"""
|
|
181
|
+
Saves continuous feature names to a text file.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
directory: The directory where the file will be saved.
|
|
185
|
+
verbose: If True, prints a confirmation message upon saving.
|
|
186
|
+
"""
|
|
187
|
+
self._save_helper(artifact=self.continuous_feature_names,
|
|
188
|
+
directory=directory,
|
|
189
|
+
filename=DatasetKeys.CONTINUOUS_NAMES,
|
|
190
|
+
verbose=verbose)
|
|
191
|
+
|
|
192
|
+
def save_categorical_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
193
|
+
"""
|
|
194
|
+
Saves categorical feature names to a text file.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
directory: The directory where the file will be saved.
|
|
198
|
+
verbose: If True, prints a confirmation message upon saving.
|
|
199
|
+
"""
|
|
200
|
+
self._save_helper(artifact=self.categorical_feature_names,
|
|
201
|
+
directory=directory,
|
|
202
|
+
filename=DatasetKeys.CATEGORICAL_NAMES,
|
|
203
|
+
verbose=verbose)
|
|
204
|
+
|
|
205
|
+
def save_artifacts(self, directory: Union[str,Path]):
|
|
206
|
+
"""
|
|
207
|
+
Saves feature names, categorical feature names, continuous feature names to separate text files.
|
|
208
|
+
"""
|
|
209
|
+
self.save_all_features(directory=directory, verbose=True)
|
|
210
|
+
self.save_continuous_features(directory=directory, verbose=True)
|
|
211
|
+
self.save_categorical_features(directory=directory, verbose=True)
|
|
212
|
+
|
|
213
|
+
def __repr__(self) -> str:
|
|
214
|
+
"""Returns a concise representation of the schema's contents."""
|
|
215
|
+
total = len(self.feature_names)
|
|
216
|
+
cont = len(self.continuous_feature_names)
|
|
217
|
+
cat = len(self.categorical_feature_names)
|
|
218
|
+
index_map = self.categorical_index_map is not None
|
|
219
|
+
cat_map = self.categorical_mappings is not None
|
|
220
|
+
return (
|
|
221
|
+
f"FeatureSchema(total={total}, continuous={cont}, categorical={cat}, index_map={index_map}, categorical_map={cat_map})"
|
|
222
|
+
)
|
|
223
|
+
|