dragon-ml-toolbox 10.1.1__py3-none-any.whl → 14.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/METADATA +38 -63
- dragon_ml_toolbox-14.2.0.dist-info/RECORD +48 -0
- {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/licenses/LICENSE +1 -1
- {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +11 -0
- ml_tools/ETL_cleaning.py +175 -59
- ml_tools/ETL_engineering.py +506 -70
- ml_tools/GUI_tools.py +2 -1
- ml_tools/MICE_imputation.py +212 -7
- ml_tools/ML_callbacks.py +73 -40
- ml_tools/ML_datasetmaster.py +267 -284
- ml_tools/ML_evaluation.py +119 -58
- ml_tools/ML_evaluation_multi.py +107 -32
- ml_tools/ML_inference.py +15 -5
- ml_tools/ML_models.py +234 -170
- ml_tools/ML_models_advanced.py +323 -0
- ml_tools/ML_optimization.py +321 -97
- ml_tools/ML_scaler.py +10 -5
- ml_tools/ML_trainer.py +585 -40
- ml_tools/ML_utilities.py +528 -0
- ml_tools/ML_vision_datasetmaster.py +1315 -0
- ml_tools/ML_vision_evaluation.py +260 -0
- ml_tools/ML_vision_inference.py +428 -0
- ml_tools/ML_vision_models.py +627 -0
- ml_tools/ML_vision_transformers.py +58 -0
- ml_tools/PSO_optimization.py +10 -7
- ml_tools/RNN_forecast.py +2 -0
- ml_tools/SQL.py +22 -9
- ml_tools/VIF_factor.py +4 -3
- ml_tools/_ML_vision_recipe.py +88 -0
- ml_tools/__init__.py +1 -0
- ml_tools/_logger.py +0 -2
- ml_tools/_schema.py +96 -0
- ml_tools/constants.py +79 -0
- ml_tools/custom_logger.py +164 -16
- ml_tools/data_exploration.py +1092 -109
- ml_tools/ensemble_evaluation.py +48 -1
- ml_tools/ensemble_inference.py +6 -7
- ml_tools/ensemble_learning.py +4 -3
- ml_tools/handle_excel.py +1 -0
- ml_tools/keys.py +80 -0
- ml_tools/math_utilities.py +259 -0
- ml_tools/optimization_tools.py +198 -24
- ml_tools/path_manager.py +144 -45
- ml_tools/serde.py +192 -0
- ml_tools/utilities.py +287 -227
- dragon_ml_toolbox-10.1.1.dist-info/RECORD +0 -36
- {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/top_level.txt +0 -0
ml_tools/optimization_tools.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import matplotlib.pyplot as plt
|
|
2
2
|
import seaborn as sns
|
|
3
|
-
from typing import Union, Any, Literal, Optional
|
|
3
|
+
from typing import Union, Any, Literal, Optional, Dict, List, Tuple
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
import pandas as pd
|
|
6
6
|
|
|
@@ -9,14 +9,135 @@ from .utilities import yield_dataframes_from_dir
|
|
|
9
9
|
from ._logger import _LOGGER
|
|
10
10
|
from ._script_info import _script_info
|
|
11
11
|
from .SQL import DatabaseManager
|
|
12
|
+
from ._schema import FeatureSchema
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
__all__ = [
|
|
16
|
+
"create_optimization_bounds",
|
|
15
17
|
"parse_lower_upper_bounds",
|
|
16
|
-
"plot_optimal_feature_distributions"
|
|
18
|
+
"plot_optimal_feature_distributions",
|
|
17
19
|
]
|
|
18
20
|
|
|
19
21
|
|
|
22
|
+
def create_optimization_bounds(
|
|
23
|
+
schema: FeatureSchema,
|
|
24
|
+
continuous_bounds_map: Dict[str, Tuple[float, float]],
|
|
25
|
+
start_at_zero: bool = True
|
|
26
|
+
) -> Tuple[List[float], List[float]]:
|
|
27
|
+
"""
|
|
28
|
+
Generates the lower and upper bounds lists for the optimizer from a FeatureSchema.
|
|
29
|
+
|
|
30
|
+
This helper function automates the creation of unbiased bounds for
|
|
31
|
+
categorical features and combines them with user-defined bounds for
|
|
32
|
+
continuous features, using the schema as the single source of truth
|
|
33
|
+
for feature order and type.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
schema (FeatureSchema):
|
|
37
|
+
The definitive schema object created by
|
|
38
|
+
`data_exploration.finalize_feature_schema()`.
|
|
39
|
+
continuous_bounds_map (Dict[str, Tuple[float, float]]):
|
|
40
|
+
A dictionary mapping the *name* of each **continuous** feature
|
|
41
|
+
to its (min_bound, max_bound) tuple.
|
|
42
|
+
start_at_zero (bool):
|
|
43
|
+
- If True, assumes categorical encoding is [0, 1, ..., k-1].
|
|
44
|
+
Bounds will be set as [-0.5, k - 0.5].
|
|
45
|
+
- If False, assumes encoding is [1, 2, ..., k].
|
|
46
|
+
Bounds will be set as [0.5, k + 0.5].
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Tuple[List[float], List[float]]:
|
|
50
|
+
A tuple containing two lists: (lower_bounds, upper_bounds).
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
ValueError: If a feature is missing from `continuous_bounds_map`
|
|
54
|
+
or if a feature name in the map is not a
|
|
55
|
+
continuous feature according to the schema.
|
|
56
|
+
"""
|
|
57
|
+
# 1. Get feature names and map from schema
|
|
58
|
+
feature_names = schema.feature_names
|
|
59
|
+
categorical_index_map = schema.categorical_index_map
|
|
60
|
+
total_features = len(feature_names)
|
|
61
|
+
|
|
62
|
+
if total_features <= 0:
|
|
63
|
+
_LOGGER.error("Schema contains no features.")
|
|
64
|
+
raise ValueError()
|
|
65
|
+
|
|
66
|
+
_LOGGER.info(f"Generating bounds for {total_features} total features...")
|
|
67
|
+
|
|
68
|
+
# 2. Initialize bound lists
|
|
69
|
+
lower_bounds: List[Optional[float]] = [None] * total_features
|
|
70
|
+
upper_bounds: List[Optional[float]] = [None] * total_features
|
|
71
|
+
|
|
72
|
+
# 3. Populate categorical bounds (Index-based)
|
|
73
|
+
if categorical_index_map:
|
|
74
|
+
for index, cardinality in categorical_index_map.items():
|
|
75
|
+
if not (0 <= index < total_features):
|
|
76
|
+
_LOGGER.error(f"Categorical index {index} is out of range for the {total_features} features.")
|
|
77
|
+
raise ValueError()
|
|
78
|
+
|
|
79
|
+
if start_at_zero:
|
|
80
|
+
# Rule for [0, k-1]: bounds are [-0.5, k - 0.5]
|
|
81
|
+
low = -0.5
|
|
82
|
+
high = float(cardinality) - 0.5
|
|
83
|
+
else:
|
|
84
|
+
# Rule for [1, k]: bounds are [0.5, k + 0.5]
|
|
85
|
+
low = 0.5
|
|
86
|
+
high = float(cardinality) + 0.5
|
|
87
|
+
|
|
88
|
+
lower_bounds[index] = low
|
|
89
|
+
upper_bounds[index] = high
|
|
90
|
+
|
|
91
|
+
_LOGGER.info(f"Automatically set bounds for {len(categorical_index_map)} categorical features.")
|
|
92
|
+
else:
|
|
93
|
+
_LOGGER.info("No categorical features found in schema.")
|
|
94
|
+
|
|
95
|
+
# 4. Populate continuous bounds (Name-based)
|
|
96
|
+
# Use schema.continuous_feature_names for robust checking
|
|
97
|
+
continuous_names_set = set(schema.continuous_feature_names)
|
|
98
|
+
|
|
99
|
+
if continuous_names_set != set(continuous_bounds_map.keys()):
|
|
100
|
+
missing_in_map = continuous_names_set - set(continuous_bounds_map.keys())
|
|
101
|
+
if missing_in_map:
|
|
102
|
+
_LOGGER.error(f"The following continuous features are missing from 'continuous_bounds_map': {list(missing_in_map)}")
|
|
103
|
+
|
|
104
|
+
extra_in_map = set(continuous_bounds_map.keys()) - continuous_names_set
|
|
105
|
+
if extra_in_map:
|
|
106
|
+
_LOGGER.error(f"The following features in 'continuous_bounds_map' are not defined as continuous in the schema: {list(extra_in_map)}")
|
|
107
|
+
|
|
108
|
+
raise ValueError("Mismatch between 'continuous_bounds_map' and schema's continuous features.")
|
|
109
|
+
|
|
110
|
+
count_continuous = 0
|
|
111
|
+
for name, (low, high) in continuous_bounds_map.items():
|
|
112
|
+
# Map name to its index in the *feature-only* list
|
|
113
|
+
# This is guaranteed to be correct by the schema
|
|
114
|
+
index = feature_names.index(name)
|
|
115
|
+
|
|
116
|
+
if lower_bounds[index] is not None:
|
|
117
|
+
# This should be impossible if schema is correct, but good to check
|
|
118
|
+
_LOGGER.error(f"Schema conflict: Feature '{name}' (at index {index}) is defined as both continuous and categorical.")
|
|
119
|
+
raise ValueError()
|
|
120
|
+
|
|
121
|
+
lower_bounds[index] = float(low)
|
|
122
|
+
upper_bounds[index] = float(high)
|
|
123
|
+
count_continuous += 1
|
|
124
|
+
|
|
125
|
+
_LOGGER.info(f"Manually set bounds for {count_continuous} continuous features.")
|
|
126
|
+
|
|
127
|
+
# 5. Final Validation (all Nones should be filled)
|
|
128
|
+
if None in lower_bounds:
|
|
129
|
+
missing_indices = [i for i, b in enumerate(lower_bounds) if b is None]
|
|
130
|
+
missing_names = [feature_names[i] for i in missing_indices]
|
|
131
|
+
_LOGGER.error(f"Failed to create all bounds. This indicates an internal logic error. Missing: {missing_names}")
|
|
132
|
+
raise RuntimeError("Internal error: Not all bounds were populated.")
|
|
133
|
+
|
|
134
|
+
# Cast to float lists, as 'None' sentinels are gone
|
|
135
|
+
return (
|
|
136
|
+
[float(b) for b in lower_bounds], # type: ignore
|
|
137
|
+
[float(b) for b in upper_bounds] # type: ignore
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
20
141
|
def parse_lower_upper_bounds(source: dict[str,tuple[Any,Any]]):
|
|
21
142
|
"""
|
|
22
143
|
Parse lower and upper boundaries, returning 2 lists:
|
|
@@ -29,13 +150,16 @@ def parse_lower_upper_bounds(source: dict[str,tuple[Any,Any]]):
|
|
|
29
150
|
return lower, upper
|
|
30
151
|
|
|
31
152
|
|
|
32
|
-
def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
|
|
153
|
+
def plot_optimal_feature_distributions(results_dir: Union[str, Path], verbose: bool=False):
|
|
33
154
|
"""
|
|
34
|
-
Analyzes optimization results and plots the distribution of optimal values
|
|
155
|
+
Analyzes optimization results and plots the distribution of optimal values.
|
|
35
156
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
157
|
+
This function is compatible with mixed-type CSVs (strings for
|
|
158
|
+
categorical features, numbers for continuous). It automatically
|
|
159
|
+
detects the data type for each feature and generates:
|
|
160
|
+
|
|
161
|
+
- A Bar Plot for categorical (string) features.
|
|
162
|
+
- A KDE Plot for continuous (numeric) features.
|
|
39
163
|
|
|
40
164
|
Plots are saved in a subdirectory inside the source directory.
|
|
41
165
|
|
|
@@ -55,10 +179,17 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
|
|
|
55
179
|
_LOGGER.info(f"📁 Starting analysis from results in: '{results_dir}'")
|
|
56
180
|
data_to_plot = []
|
|
57
181
|
for df, df_name in yield_dataframes_from_dir(results_path):
|
|
182
|
+
if df.shape[1] < 2:
|
|
183
|
+
_LOGGER.warning(f"Skipping '{df_name}': must have at least 2 columns (feature + target).")
|
|
184
|
+
continue
|
|
58
185
|
melted_df = df.iloc[:, :-1].melt(var_name='feature', value_name='value')
|
|
59
|
-
melted_df['target'] = df_name
|
|
186
|
+
melted_df['target'] = df_name
|
|
60
187
|
data_to_plot.append(melted_df)
|
|
61
188
|
|
|
189
|
+
if not data_to_plot:
|
|
190
|
+
_LOGGER.error("No valid data to plot after processing all CSVs.")
|
|
191
|
+
return
|
|
192
|
+
|
|
62
193
|
long_df = pd.concat(data_to_plot, ignore_index=True)
|
|
63
194
|
features = long_df['feature'].unique()
|
|
64
195
|
_LOGGER.info(f"Found data for {len(features)} features across {len(long_df['target'].unique())} targets. Generating plots...")
|
|
@@ -66,12 +197,23 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
|
|
|
66
197
|
# --- Plotting Loop ---
|
|
67
198
|
for feature_name in features:
|
|
68
199
|
plt.figure(figsize=(12, 7))
|
|
200
|
+
# Use .copy() to avoid SettingWithCopyWarning
|
|
201
|
+
# feature_df = long_df[long_df['feature'] == feature_name].copy()
|
|
69
202
|
feature_df = long_df[long_df['feature'] == feature_name]
|
|
70
203
|
|
|
71
|
-
#
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
204
|
+
# --- Type-checking logic ---
|
|
205
|
+
# Attempt to convert 'value' column to numeric.
|
|
206
|
+
# errors='coerce' turns non-numeric strings (e.g., 'Category_A') into NaN
|
|
207
|
+
feature_df['numeric_value'] = pd.to_numeric(feature_df['value'], errors='coerce')
|
|
208
|
+
|
|
209
|
+
# If *any* value failed conversion (is NaN), treat it as categorical.
|
|
210
|
+
if feature_df['numeric_value'].isna().any():
|
|
211
|
+
|
|
212
|
+
# --- PLOT 1: CATEGORICAL (String-based) ---
|
|
213
|
+
if verbose:
|
|
214
|
+
_LOGGER.info(f"Plotting '{feature_name}' as categorical (bar plot).")
|
|
215
|
+
|
|
216
|
+
# Calculate percentages for a clean bar plot
|
|
75
217
|
norm_df = (feature_df.groupby('target')['value']
|
|
76
218
|
.value_counts(normalize=True)
|
|
77
219
|
.mul(100)
|
|
@@ -79,21 +221,29 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
|
|
|
79
221
|
.reset_index())
|
|
80
222
|
|
|
81
223
|
ax = sns.barplot(data=norm_df, x='value', y='percent', hue='target')
|
|
82
|
-
|
|
83
|
-
plt.title(f"Optimal Value Distribution for '{feature_name}'", fontsize=16)
|
|
84
224
|
plt.ylabel("Frequency (%)", fontsize=12)
|
|
85
225
|
ax.set_ylim(0, 100) # Set Y-axis from 0 to 100
|
|
226
|
+
|
|
227
|
+
# Rotate x-labels if there are many categories
|
|
228
|
+
if norm_df['value'].nunique() > 10:
|
|
229
|
+
plt.xticks(rotation=45, ha='right')
|
|
86
230
|
|
|
87
231
|
else:
|
|
88
|
-
# PLOT 2:
|
|
89
|
-
|
|
232
|
+
# --- PLOT 2: CONTINUOUS (Numeric-based) ---
|
|
233
|
+
# All values were successfully converted to numeric.
|
|
234
|
+
if verbose:
|
|
235
|
+
_LOGGER.info(f"Plotting '{feature_name}' as continuous (KDE plot).")
|
|
236
|
+
|
|
237
|
+
# Use the 'numeric_value' column (which is float type) for the KDE
|
|
238
|
+
ax = sns.kdeplot(data=feature_df, x='numeric_value', hue='target',
|
|
90
239
|
fill=True, alpha=0.1, warn_singular=False)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
plt.
|
|
240
|
+
|
|
241
|
+
# Set the x-axis label back to the original feature name
|
|
242
|
+
plt.xlabel("Feature Value", fontsize=12)
|
|
243
|
+
plt.ylabel("Density", fontsize=12)
|
|
94
244
|
|
|
95
245
|
# --- Common settings for both plot types ---
|
|
96
|
-
plt.
|
|
246
|
+
plt.title(f"Optimal Value Distribution for '{feature_name}'", fontsize=16)
|
|
97
247
|
plt.grid(axis='y', alpha=0.5, linestyle='--')
|
|
98
248
|
|
|
99
249
|
legend = ax.get_legend()
|
|
@@ -106,28 +256,52 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
|
|
|
106
256
|
plt.close()
|
|
107
257
|
|
|
108
258
|
_LOGGER.info(f"All plots saved successfully to: '{output_path}'")
|
|
109
|
-
|
|
259
|
+
|
|
110
260
|
|
|
111
261
|
def _save_result(
|
|
112
262
|
result_dict: dict,
|
|
113
263
|
save_format: Literal['csv', 'sqlite', 'both'],
|
|
114
264
|
csv_path: Path,
|
|
115
265
|
db_manager: Optional[DatabaseManager] = None,
|
|
116
|
-
db_table_name: Optional[str] = None
|
|
266
|
+
db_table_name: Optional[str] = None,
|
|
267
|
+
categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None
|
|
117
268
|
):
|
|
118
269
|
"""
|
|
119
270
|
Private helper to handle saving a single result to CSV, SQLite, or both.
|
|
271
|
+
|
|
272
|
+
If `categorical_mappings` is provided, it will reverse-map integer values
|
|
273
|
+
to their string representations before saving.
|
|
120
274
|
"""
|
|
275
|
+
# --- Reverse Mapping Logic ---
|
|
276
|
+
# Create a copy to hold the values to be saved
|
|
277
|
+
save_dict = result_dict.copy()
|
|
278
|
+
|
|
279
|
+
if categorical_mappings:
|
|
280
|
+
for feature_name, mapping in categorical_mappings.items():
|
|
281
|
+
if feature_name in save_dict:
|
|
282
|
+
# Create a reverse map {0: 'Category_A', 1: 'Category_B'}
|
|
283
|
+
reverse_map = {idx: name for name, idx in mapping.items()}
|
|
284
|
+
|
|
285
|
+
# Get the integer value from the results (e.g., 0)
|
|
286
|
+
int_value = save_dict[feature_name]
|
|
287
|
+
|
|
288
|
+
# Find the corresponding string (e.g., 'Category_A')
|
|
289
|
+
# Use .get() for safety, defaulting to the original value if not found
|
|
290
|
+
string_value = reverse_map.get(int_value, int_value)
|
|
291
|
+
|
|
292
|
+
# Update the dictionary that will be saved
|
|
293
|
+
save_dict[feature_name] = string_value
|
|
294
|
+
|
|
121
295
|
# Save to CSV
|
|
122
296
|
if save_format in ['csv', 'both']:
|
|
123
|
-
df_row = pd.DataFrame([
|
|
297
|
+
df_row = pd.DataFrame([save_dict])
|
|
124
298
|
file_exists = csv_path.exists()
|
|
125
299
|
df_row.to_csv(csv_path, mode='a', index=False, header=not file_exists)
|
|
126
300
|
|
|
127
301
|
# Save to SQLite
|
|
128
302
|
if save_format in ['sqlite', 'both']:
|
|
129
303
|
if db_manager and db_table_name:
|
|
130
|
-
db_manager.insert_row(db_table_name,
|
|
304
|
+
db_manager.insert_row(db_table_name, save_dict)
|
|
131
305
|
else:
|
|
132
306
|
_LOGGER.warning("SQLite saving requested but db_manager or table_name not provided.")
|
|
133
307
|
|
ml_tools/path_manager.py
CHANGED
|
@@ -2,9 +2,10 @@ from pprint import pprint
|
|
|
2
2
|
from typing import Optional, List, Dict, Union, Literal
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
import re
|
|
5
|
+
import sys
|
|
6
|
+
|
|
5
7
|
from ._script_info import _script_info
|
|
6
8
|
from ._logger import _LOGGER
|
|
7
|
-
import sys
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
__all__ = [
|
|
@@ -13,6 +14,7 @@ __all__ = [
|
|
|
13
14
|
"sanitize_filename",
|
|
14
15
|
"list_csv_paths",
|
|
15
16
|
"list_files_by_extension",
|
|
17
|
+
"list_subdirectories"
|
|
16
18
|
]
|
|
17
19
|
|
|
18
20
|
|
|
@@ -20,15 +22,35 @@ class PathManager:
|
|
|
20
22
|
"""
|
|
21
23
|
Manages and stores a project's file paths, acting as a centralized
|
|
22
24
|
"path database". It supports both development mode and applications
|
|
23
|
-
bundled with Pyinstaller.
|
|
25
|
+
bundled with Pyinstaller or Nuitka.
|
|
24
26
|
|
|
25
|
-
|
|
27
|
+
All keys provided to the manager are automatically sanitized to ensure
|
|
28
|
+
they are valid Python identifiers. This allows for clean, attribute-style
|
|
29
|
+
access. The sanitization process involves replacing whitespace with
|
|
30
|
+
underscores and removing special characters.
|
|
26
31
|
"""
|
|
27
32
|
def __init__(
|
|
28
33
|
self,
|
|
29
34
|
anchor_file: str,
|
|
30
35
|
base_directories: Optional[List[str]] = None
|
|
31
36
|
):
|
|
37
|
+
"""
|
|
38
|
+
Sets up the core paths for a project by anchoring to a specific file.
|
|
39
|
+
|
|
40
|
+
The manager automatically registers a 'ROOT' path, which points to the
|
|
41
|
+
root of the package, and can pre-register common subdirectories found
|
|
42
|
+
directly within that root.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
anchor_file (str): The path to a file within your package, typically
|
|
46
|
+
the `__file__` of the script where PathManager
|
|
47
|
+
is instantiated. This is used to locate the
|
|
48
|
+
package root directory.
|
|
49
|
+
base_directories (List[str] | None): An optional list of strings,
|
|
50
|
+
where each string is the name
|
|
51
|
+
of a subdirectory to register
|
|
52
|
+
relative to the package root.
|
|
53
|
+
"""
|
|
32
54
|
resolved_anchor_path = Path(anchor_file).resolve()
|
|
33
55
|
self._package_name = resolved_anchor_path.parent.name
|
|
34
56
|
self._is_bundled, bundle_root = self._get_bundle_root()
|
|
@@ -42,13 +64,17 @@ class PathManager:
|
|
|
42
64
|
package_root = resolved_anchor_path.parent
|
|
43
65
|
|
|
44
66
|
# Register the root of the package itself
|
|
45
|
-
self.
|
|
67
|
+
self.ROOT = package_root
|
|
46
68
|
|
|
47
69
|
# Register all the base directories
|
|
48
70
|
if base_directories:
|
|
49
71
|
for dir_name in base_directories:
|
|
50
|
-
|
|
51
|
-
self.
|
|
72
|
+
sanitized_dir_name = self._sanitize_key(dir_name)
|
|
73
|
+
self._check_underscore_key(sanitized_dir_name)
|
|
74
|
+
setattr(self, sanitized_dir_name, package_root / sanitized_dir_name)
|
|
75
|
+
|
|
76
|
+
# Signal that initialization is complete.
|
|
77
|
+
self._initialized = True
|
|
52
78
|
|
|
53
79
|
def _get_bundle_root(self) -> tuple[bool, Optional[str]]:
|
|
54
80
|
"""
|
|
@@ -71,47 +97,35 @@ class PathManager:
|
|
|
71
97
|
# --- Not Bundled ---
|
|
72
98
|
else:
|
|
73
99
|
return False, None
|
|
100
|
+
|
|
101
|
+
def _check_underscore_key(self, key: str) -> None:
|
|
102
|
+
if key.startswith("_"):
|
|
103
|
+
_LOGGER.error(f"Path key '{key}' cannot start with underscores.")
|
|
104
|
+
raise ValueError()
|
|
74
105
|
|
|
75
|
-
def
|
|
76
|
-
"""
|
|
77
|
-
Retrieves a stored path by its key.
|
|
78
|
-
|
|
79
|
-
Args:
|
|
80
|
-
key (str): The key of the path to retrieve.
|
|
81
|
-
|
|
82
|
-
Returns:
|
|
83
|
-
Path: The resolved, absolute Path object.
|
|
84
|
-
|
|
85
|
-
Raises:
|
|
86
|
-
KeyError: If the key is not found in the manager.
|
|
87
|
-
"""
|
|
88
|
-
try:
|
|
89
|
-
return self._paths[key]
|
|
90
|
-
except KeyError:
|
|
91
|
-
_LOGGER.error(f"Path key '{key}' not found.")
|
|
92
|
-
raise
|
|
93
|
-
|
|
94
|
-
def update(self, new_paths: Dict[str, Union[str, Path]], overwrite: bool = False) -> None:
|
|
106
|
+
def update(self, new_paths: Dict[str, Union[str, Path]]) -> None:
|
|
95
107
|
"""
|
|
96
|
-
Adds new paths
|
|
108
|
+
Adds new paths in the manager.
|
|
97
109
|
|
|
98
110
|
Args:
|
|
99
111
|
new_paths (Dict[str, Union[str, Path]]): A dictionary where keys are
|
|
100
112
|
the identifiers and values are the
|
|
101
|
-
Path objects
|
|
102
|
-
overwrite (bool): If False (default), raises a KeyError if any
|
|
103
|
-
key in new_paths already exists. If True,
|
|
104
|
-
allows overwriting existing keys.
|
|
113
|
+
Path objects to store.
|
|
105
114
|
"""
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
+
# Pre-check
|
|
116
|
+
for key in new_paths:
|
|
117
|
+
sanitized_key = self._sanitize_key(key)
|
|
118
|
+
self._check_underscore_key(sanitized_key)
|
|
119
|
+
if hasattr(self, sanitized_key):
|
|
120
|
+
_LOGGER.error(f"Cannot add path for key '{sanitized_key}' ({key}): an attribute with this name already exists.")
|
|
121
|
+
raise KeyError()
|
|
122
|
+
|
|
123
|
+
# If no conflicts, add new paths
|
|
124
|
+
for key, value in new_paths.items():
|
|
125
|
+
self.__setattr__(key, value)
|
|
126
|
+
|
|
127
|
+
def _sanitize_key(self, key: str):
|
|
128
|
+
return sanitize_filename(key)
|
|
115
129
|
|
|
116
130
|
def make_dirs(self, keys: Optional[List[str]] = None, verbose: bool = False) -> None:
|
|
117
131
|
"""
|
|
@@ -146,7 +160,7 @@ class PathManager:
|
|
|
146
160
|
if path.suffix: # It's a file, not a directory
|
|
147
161
|
continue
|
|
148
162
|
|
|
149
|
-
# ---
|
|
163
|
+
# --- CRITICAL CHECK ---
|
|
150
164
|
# Determine if the path is inside the main application package.
|
|
151
165
|
is_internal_path = package_root and path.is_relative_to(package_root)
|
|
152
166
|
|
|
@@ -185,15 +199,20 @@ class PathManager:
|
|
|
185
199
|
# --- Dictionary-Style Methods ---
|
|
186
200
|
def __getitem__(self, key: str) -> Path:
|
|
187
201
|
"""Allows dictionary-style getting, e.g., PM['my_key']"""
|
|
188
|
-
return self.
|
|
202
|
+
return self.__getattr__(key)
|
|
189
203
|
|
|
190
204
|
def __setitem__(self, key: str, value: Union[str, Path]):
|
|
191
|
-
"""Allows dictionary-style setting,
|
|
192
|
-
self.
|
|
205
|
+
"""Allows dictionary-style setting, e.g., PM['my_key'] = path"""
|
|
206
|
+
sanitized_key = self._sanitize_key(key)
|
|
207
|
+
self._check_underscore_key(sanitized_key)
|
|
208
|
+
self.__setattr__(sanitized_key, value)
|
|
193
209
|
|
|
194
210
|
def __contains__(self, key: str) -> bool:
|
|
195
211
|
"""Allows checking for a key's existence, e.g., if 'my_key' in PM"""
|
|
196
|
-
|
|
212
|
+
sanitized_key = self._sanitize_key(key)
|
|
213
|
+
true_false = sanitized_key in self._paths
|
|
214
|
+
# print(f"key {sanitized_key} in current path dictionary keys: {true_false}")
|
|
215
|
+
return true_false
|
|
197
216
|
|
|
198
217
|
def __len__(self) -> int:
|
|
199
218
|
"""Allows getting the number of paths, e.g., len(PM)"""
|
|
@@ -210,6 +229,54 @@ class PathManager:
|
|
|
210
229
|
def items(self):
|
|
211
230
|
"""Returns all registered (key, Path) pairs."""
|
|
212
231
|
return self._paths.items()
|
|
232
|
+
|
|
233
|
+
def __getattr__(self, name: str) -> Path:
|
|
234
|
+
"""
|
|
235
|
+
Allows attribute-style access to paths, e.g., PM.data.
|
|
236
|
+
"""
|
|
237
|
+
# Block access to private attributes
|
|
238
|
+
if name.startswith('_'):
|
|
239
|
+
_LOGGER.error(f"Access to private attribute '{name}' is not allowed, remove leading underscore.")
|
|
240
|
+
raise AttributeError()
|
|
241
|
+
|
|
242
|
+
sanitized_name = self._sanitize_key(name)
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
# Look for the key in our internal dictionary
|
|
246
|
+
return self._paths[sanitized_name]
|
|
247
|
+
except KeyError:
|
|
248
|
+
# If not found, raise an AttributeError
|
|
249
|
+
_LOGGER.error(f"'{type(self).__name__}' object has no attribute or path key '{sanitized_name}'")
|
|
250
|
+
raise AttributeError()
|
|
251
|
+
|
|
252
|
+
def __setattr__(self, name: str, value: Union[str, Path, bool, dict, str, int, tuple]):
|
|
253
|
+
"""Allows attribute-style setting of paths, e.g., PM.data = 'path/to/data'."""
|
|
254
|
+
# Check for internal attributes, which are set directly on the object.
|
|
255
|
+
if name.startswith('_'):
|
|
256
|
+
# This check prevents setting new private attributes after __init__ is done.
|
|
257
|
+
is_initialized = self.__dict__.get('_initialized', False)
|
|
258
|
+
if is_initialized:
|
|
259
|
+
_LOGGER.error(f"Cannot set private attribute '{name}' after initialization.")
|
|
260
|
+
raise AttributeError()
|
|
261
|
+
super().__setattr__(name, value)
|
|
262
|
+
return
|
|
263
|
+
|
|
264
|
+
# Sanitize the key for the public path.
|
|
265
|
+
sanitized_name = self._sanitize_key(name)
|
|
266
|
+
self._check_underscore_key(sanitized_name)
|
|
267
|
+
|
|
268
|
+
# Prevent overwriting existing methods (e.g., PM.status = 'foo').
|
|
269
|
+
# This check looks at the class, not the instance therefore won't trigger __getattr__.
|
|
270
|
+
if hasattr(self.__class__, sanitized_name):
|
|
271
|
+
_LOGGER.error(f"Cannot overwrite existing attribute or method '{sanitized_name}' ({name}).")
|
|
272
|
+
raise AttributeError()
|
|
273
|
+
|
|
274
|
+
if not isinstance(value, (str, Path)):
|
|
275
|
+
_LOGGER.error(f"Cannot assign type '{type(value).__name__}' to a path. Must be str or Path.")
|
|
276
|
+
raise TypeError
|
|
277
|
+
|
|
278
|
+
# If all checks pass, treat it as a public path and store it in the _paths dictionary.
|
|
279
|
+
self._paths[sanitized_name] = Path(value)
|
|
213
280
|
|
|
214
281
|
|
|
215
282
|
def make_fullpath(
|
|
@@ -385,5 +452,37 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
|
|
|
385
452
|
return name_path_dict
|
|
386
453
|
|
|
387
454
|
|
|
455
|
+
def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
|
|
456
|
+
"""
|
|
457
|
+
Scans a directory and returns a dictionary of its immediate subdirectories.
|
|
458
|
+
|
|
459
|
+
Args:
|
|
460
|
+
root_dir (str | Path): The path to the directory to scan.
|
|
461
|
+
verbose (bool): If True, prints the number of directories found.
|
|
462
|
+
|
|
463
|
+
Returns:
|
|
464
|
+
dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
|
|
465
|
+
"""
|
|
466
|
+
root_path = make_fullpath(root_dir, enforce="directory")
|
|
467
|
+
|
|
468
|
+
directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
|
|
469
|
+
|
|
470
|
+
if len(directories) < 1:
|
|
471
|
+
_LOGGER.error(f"No subdirectories found inside '{root_path}'")
|
|
472
|
+
raise IOError()
|
|
473
|
+
|
|
474
|
+
if verbose:
|
|
475
|
+
count = len(directories)
|
|
476
|
+
# Use pluralization for better readability
|
|
477
|
+
plural = 'ies' if count != 1 else 'y'
|
|
478
|
+
print(f"Found {count} subdirector{plural} in '{root_path.name}'.")
|
|
479
|
+
|
|
480
|
+
# Create a dictionary where the key is the directory's name (a string)
|
|
481
|
+
# and the value is the full Path object.
|
|
482
|
+
dir_map = {p.name: p for p in directories}
|
|
483
|
+
|
|
484
|
+
return dir_map
|
|
485
|
+
|
|
486
|
+
|
|
388
487
|
def info():
|
|
389
488
|
_script_info(__all__)
|