dragon-ml-toolbox 10.2.0__py3-none-any.whl → 14.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (48) hide show
  1. {dragon_ml_toolbox-10.2.0.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/METADATA +38 -63
  2. dragon_ml_toolbox-14.2.0.dist-info/RECORD +48 -0
  3. {dragon_ml_toolbox-10.2.0.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/licenses/LICENSE +1 -1
  4. {dragon_ml_toolbox-10.2.0.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +11 -0
  5. ml_tools/ETL_cleaning.py +72 -34
  6. ml_tools/ETL_engineering.py +506 -70
  7. ml_tools/GUI_tools.py +2 -1
  8. ml_tools/MICE_imputation.py +212 -7
  9. ml_tools/ML_callbacks.py +73 -40
  10. ml_tools/ML_datasetmaster.py +267 -284
  11. ml_tools/ML_evaluation.py +119 -58
  12. ml_tools/ML_evaluation_multi.py +107 -32
  13. ml_tools/ML_inference.py +15 -5
  14. ml_tools/ML_models.py +234 -170
  15. ml_tools/ML_models_advanced.py +323 -0
  16. ml_tools/ML_optimization.py +321 -97
  17. ml_tools/ML_scaler.py +10 -5
  18. ml_tools/ML_trainer.py +585 -40
  19. ml_tools/ML_utilities.py +528 -0
  20. ml_tools/ML_vision_datasetmaster.py +1315 -0
  21. ml_tools/ML_vision_evaluation.py +260 -0
  22. ml_tools/ML_vision_inference.py +428 -0
  23. ml_tools/ML_vision_models.py +627 -0
  24. ml_tools/ML_vision_transformers.py +58 -0
  25. ml_tools/PSO_optimization.py +10 -7
  26. ml_tools/RNN_forecast.py +2 -0
  27. ml_tools/SQL.py +22 -9
  28. ml_tools/VIF_factor.py +4 -3
  29. ml_tools/_ML_vision_recipe.py +88 -0
  30. ml_tools/__init__.py +1 -0
  31. ml_tools/_logger.py +0 -2
  32. ml_tools/_schema.py +96 -0
  33. ml_tools/constants.py +79 -0
  34. ml_tools/custom_logger.py +164 -16
  35. ml_tools/data_exploration.py +1092 -109
  36. ml_tools/ensemble_evaluation.py +48 -1
  37. ml_tools/ensemble_inference.py +6 -7
  38. ml_tools/ensemble_learning.py +4 -3
  39. ml_tools/handle_excel.py +1 -0
  40. ml_tools/keys.py +80 -0
  41. ml_tools/math_utilities.py +259 -0
  42. ml_tools/optimization_tools.py +198 -24
  43. ml_tools/path_manager.py +144 -45
  44. ml_tools/serde.py +192 -0
  45. ml_tools/utilities.py +287 -227
  46. dragon_ml_toolbox-10.2.0.dist-info/RECORD +0 -36
  47. {dragon_ml_toolbox-10.2.0.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/WHEEL +0 -0
  48. {dragon_ml_toolbox-10.2.0.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import matplotlib.pyplot as plt
2
2
  import seaborn as sns
3
- from typing import Union, Any, Literal, Optional
3
+ from typing import Union, Any, Literal, Optional, Dict, List, Tuple
4
4
  from pathlib import Path
5
5
  import pandas as pd
6
6
 
@@ -9,14 +9,135 @@ from .utilities import yield_dataframes_from_dir
9
9
  from ._logger import _LOGGER
10
10
  from ._script_info import _script_info
11
11
  from .SQL import DatabaseManager
12
+ from ._schema import FeatureSchema
12
13
 
13
14
 
14
15
  __all__ = [
16
+ "create_optimization_bounds",
15
17
  "parse_lower_upper_bounds",
16
- "plot_optimal_feature_distributions"
18
+ "plot_optimal_feature_distributions",
17
19
  ]
18
20
 
19
21
 
22
+ def create_optimization_bounds(
23
+ schema: FeatureSchema,
24
+ continuous_bounds_map: Dict[str, Tuple[float, float]],
25
+ start_at_zero: bool = True
26
+ ) -> Tuple[List[float], List[float]]:
27
+ """
28
+ Generates the lower and upper bounds lists for the optimizer from a FeatureSchema.
29
+
30
+ This helper function automates the creation of unbiased bounds for
31
+ categorical features and combines them with user-defined bounds for
32
+ continuous features, using the schema as the single source of truth
33
+ for feature order and type.
34
+
35
+ Args:
36
+ schema (FeatureSchema):
37
+ The definitive schema object created by
38
+ `data_exploration.finalize_feature_schema()`.
39
+ continuous_bounds_map (Dict[str, Tuple[float, float]]):
40
+ A dictionary mapping the *name* of each **continuous** feature
41
+ to its (min_bound, max_bound) tuple.
42
+ start_at_zero (bool):
43
+ - If True, assumes categorical encoding is [0, 1, ..., k-1].
44
+ Bounds will be set as [-0.5, k - 0.5].
45
+ - If False, assumes encoding is [1, 2, ..., k].
46
+ Bounds will be set as [0.5, k + 0.5].
47
+
48
+ Returns:
49
+ Tuple[List[float], List[float]]:
50
+ A tuple containing two lists: (lower_bounds, upper_bounds).
51
+
52
+ Raises:
53
+ ValueError: If a feature is missing from `continuous_bounds_map`
54
+ or if a feature name in the map is not a
55
+ continuous feature according to the schema.
56
+ """
57
+ # 1. Get feature names and map from schema
58
+ feature_names = schema.feature_names
59
+ categorical_index_map = schema.categorical_index_map
60
+ total_features = len(feature_names)
61
+
62
+ if total_features <= 0:
63
+ _LOGGER.error("Schema contains no features.")
64
+ raise ValueError()
65
+
66
+ _LOGGER.info(f"Generating bounds for {total_features} total features...")
67
+
68
+ # 2. Initialize bound lists
69
+ lower_bounds: List[Optional[float]] = [None] * total_features
70
+ upper_bounds: List[Optional[float]] = [None] * total_features
71
+
72
+ # 3. Populate categorical bounds (Index-based)
73
+ if categorical_index_map:
74
+ for index, cardinality in categorical_index_map.items():
75
+ if not (0 <= index < total_features):
76
+ _LOGGER.error(f"Categorical index {index} is out of range for the {total_features} features.")
77
+ raise ValueError()
78
+
79
+ if start_at_zero:
80
+ # Rule for [0, k-1]: bounds are [-0.5, k - 0.5]
81
+ low = -0.5
82
+ high = float(cardinality) - 0.5
83
+ else:
84
+ # Rule for [1, k]: bounds are [0.5, k + 0.5]
85
+ low = 0.5
86
+ high = float(cardinality) + 0.5
87
+
88
+ lower_bounds[index] = low
89
+ upper_bounds[index] = high
90
+
91
+ _LOGGER.info(f"Automatically set bounds for {len(categorical_index_map)} categorical features.")
92
+ else:
93
+ _LOGGER.info("No categorical features found in schema.")
94
+
95
+ # 4. Populate continuous bounds (Name-based)
96
+ # Use schema.continuous_feature_names for robust checking
97
+ continuous_names_set = set(schema.continuous_feature_names)
98
+
99
+ if continuous_names_set != set(continuous_bounds_map.keys()):
100
+ missing_in_map = continuous_names_set - set(continuous_bounds_map.keys())
101
+ if missing_in_map:
102
+ _LOGGER.error(f"The following continuous features are missing from 'continuous_bounds_map': {list(missing_in_map)}")
103
+
104
+ extra_in_map = set(continuous_bounds_map.keys()) - continuous_names_set
105
+ if extra_in_map:
106
+ _LOGGER.error(f"The following features in 'continuous_bounds_map' are not defined as continuous in the schema: {list(extra_in_map)}")
107
+
108
+ raise ValueError("Mismatch between 'continuous_bounds_map' and schema's continuous features.")
109
+
110
+ count_continuous = 0
111
+ for name, (low, high) in continuous_bounds_map.items():
112
+ # Map name to its index in the *feature-only* list
113
+ # This is guaranteed to be correct by the schema
114
+ index = feature_names.index(name)
115
+
116
+ if lower_bounds[index] is not None:
117
+ # This should be impossible if schema is correct, but good to check
118
+ _LOGGER.error(f"Schema conflict: Feature '{name}' (at index {index}) is defined as both continuous and categorical.")
119
+ raise ValueError()
120
+
121
+ lower_bounds[index] = float(low)
122
+ upper_bounds[index] = float(high)
123
+ count_continuous += 1
124
+
125
+ _LOGGER.info(f"Manually set bounds for {count_continuous} continuous features.")
126
+
127
+ # 5. Final Validation (all Nones should be filled)
128
+ if None in lower_bounds:
129
+ missing_indices = [i for i, b in enumerate(lower_bounds) if b is None]
130
+ missing_names = [feature_names[i] for i in missing_indices]
131
+ _LOGGER.error(f"Failed to create all bounds. This indicates an internal logic error. Missing: {missing_names}")
132
+ raise RuntimeError("Internal error: Not all bounds were populated.")
133
+
134
+ # Cast to float lists, as 'None' sentinels are gone
135
+ return (
136
+ [float(b) for b in lower_bounds], # type: ignore
137
+ [float(b) for b in upper_bounds] # type: ignore
138
+ )
139
+
140
+
20
141
  def parse_lower_upper_bounds(source: dict[str,tuple[Any,Any]]):
21
142
  """
22
143
  Parse lower and upper boundaries, returning 2 lists:
@@ -29,13 +150,16 @@ def parse_lower_upper_bounds(source: dict[str,tuple[Any,Any]]):
29
150
  return lower, upper
30
151
 
31
152
 
32
- def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
153
+ def plot_optimal_feature_distributions(results_dir: Union[str, Path], verbose: bool=False):
33
154
  """
34
- Analyzes optimization results and plots the distribution of optimal values for each feature.
155
+ Analyzes optimization results and plots the distribution of optimal values.
35
156
 
36
- For features with more than two unique values, this function generates a color-coded
37
- Kernel Density Estimate (KDE) plot. For binary or constant features, it generates a bar plot
38
- showing relative frequency.
157
+ This function is compatible with mixed-type CSVs (strings for
158
+ categorical features, numbers for continuous). It automatically
159
+ detects the data type for each feature and generates:
160
+
161
+ - A Bar Plot for categorical (string) features.
162
+ - A KDE Plot for continuous (numeric) features.
39
163
 
40
164
  Plots are saved in a subdirectory inside the source directory.
41
165
 
@@ -55,10 +179,17 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
55
179
  _LOGGER.info(f"📁 Starting analysis from results in: '{results_dir}'")
56
180
  data_to_plot = []
57
181
  for df, df_name in yield_dataframes_from_dir(results_path):
182
+ if df.shape[1] < 2:
183
+ _LOGGER.warning(f"Skipping '{df_name}': must have at least 2 columns (feature + target).")
184
+ continue
58
185
  melted_df = df.iloc[:, :-1].melt(var_name='feature', value_name='value')
59
- melted_df['target'] = df_name.replace("Optimization_", "")
186
+ melted_df['target'] = df_name
60
187
  data_to_plot.append(melted_df)
61
188
 
189
+ if not data_to_plot:
190
+ _LOGGER.error("No valid data to plot after processing all CSVs.")
191
+ return
192
+
62
193
  long_df = pd.concat(data_to_plot, ignore_index=True)
63
194
  features = long_df['feature'].unique()
64
195
  _LOGGER.info(f"Found data for {len(features)} features across {len(long_df['target'].unique())} targets. Generating plots...")
@@ -66,12 +197,23 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
66
197
  # --- Plotting Loop ---
67
198
  for feature_name in features:
68
199
  plt.figure(figsize=(12, 7))
200
+ # Use .copy() to avoid SettingWithCopyWarning
201
+ # feature_df = long_df[long_df['feature'] == feature_name].copy()
69
202
  feature_df = long_df[long_df['feature'] == feature_name]
70
203
 
71
- # Check if the feature is binary or constant
72
- if feature_df['value'].nunique() <= 2:
73
- # PLOT 1: For discrete values, calculate percentages and use a true bar plot.
74
- # This ensures the X-axis is clean (e.g., just 0 and 1).
204
+ # --- Type-checking logic ---
205
+ # Attempt to convert 'value' column to numeric.
206
+ # errors='coerce' turns non-numeric strings (e.g., 'Category_A') into NaN
207
+ feature_df['numeric_value'] = pd.to_numeric(feature_df['value'], errors='coerce')
208
+
209
+ # If *any* value failed conversion (is NaN), treat it as categorical.
210
+ if feature_df['numeric_value'].isna().any():
211
+
212
+ # --- PLOT 1: CATEGORICAL (String-based) ---
213
+ if verbose:
214
+ _LOGGER.info(f"Plotting '{feature_name}' as categorical (bar plot).")
215
+
216
+ # Calculate percentages for a clean bar plot
75
217
  norm_df = (feature_df.groupby('target')['value']
76
218
  .value_counts(normalize=True)
77
219
  .mul(100)
@@ -79,21 +221,29 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
79
221
  .reset_index())
80
222
 
81
223
  ax = sns.barplot(data=norm_df, x='value', y='percent', hue='target')
82
-
83
- plt.title(f"Optimal Value Distribution for '{feature_name}'", fontsize=16)
84
224
  plt.ylabel("Frequency (%)", fontsize=12)
85
225
  ax.set_ylim(0, 100) # Set Y-axis from 0 to 100
226
+
227
+ # Rotate x-labels if there are many categories
228
+ if norm_df['value'].nunique() > 10:
229
+ plt.xticks(rotation=45, ha='right')
86
230
 
87
231
  else:
88
- # PLOT 2: KDE plot for continuous values.
89
- ax = sns.kdeplot(data=feature_df, x='value', hue='target',
232
+ # --- PLOT 2: CONTINUOUS (Numeric-based) ---
233
+ # All values were successfully converted to numeric.
234
+ if verbose:
235
+ _LOGGER.info(f"Plotting '{feature_name}' as continuous (KDE plot).")
236
+
237
+ # Use the 'numeric_value' column (which is float type) for the KDE
238
+ ax = sns.kdeplot(data=feature_df, x='numeric_value', hue='target',
90
239
  fill=True, alpha=0.1, warn_singular=False)
91
-
92
- plt.title(f"Optimal Value Distribution for '{feature_name}'", fontsize=16)
93
- plt.ylabel("Density", fontsize=12) # Y-axis is "Density" for KDE plots
240
+
241
+ # Set the x-axis label back to the original feature name
242
+ plt.xlabel("Feature Value", fontsize=12)
243
+ plt.ylabel("Density", fontsize=12)
94
244
 
95
245
  # --- Common settings for both plot types ---
96
- plt.xlabel("Feature Value", fontsize=12)
246
+ plt.title(f"Optimal Value Distribution for '{feature_name}'", fontsize=16)
97
247
  plt.grid(axis='y', alpha=0.5, linestyle='--')
98
248
 
99
249
  legend = ax.get_legend()
@@ -106,28 +256,52 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
106
256
  plt.close()
107
257
 
108
258
  _LOGGER.info(f"All plots saved successfully to: '{output_path}'")
109
-
259
+
110
260
 
111
261
  def _save_result(
112
262
  result_dict: dict,
113
263
  save_format: Literal['csv', 'sqlite', 'both'],
114
264
  csv_path: Path,
115
265
  db_manager: Optional[DatabaseManager] = None,
116
- db_table_name: Optional[str] = None
266
+ db_table_name: Optional[str] = None,
267
+ categorical_mappings: Optional[Dict[str, Dict[str, int]]] = None
117
268
  ):
118
269
  """
119
270
  Private helper to handle saving a single result to CSV, SQLite, or both.
271
+
272
+ If `categorical_mappings` is provided, it will reverse-map integer values
273
+ to their string representations before saving.
120
274
  """
275
+ # --- Reverse Mapping Logic ---
276
+ # Create a copy to hold the values to be saved
277
+ save_dict = result_dict.copy()
278
+
279
+ if categorical_mappings:
280
+ for feature_name, mapping in categorical_mappings.items():
281
+ if feature_name in save_dict:
282
+ # Create a reverse map {0: 'Category_A', 1: 'Category_B'}
283
+ reverse_map = {idx: name for name, idx in mapping.items()}
284
+
285
+ # Get the integer value from the results (e.g., 0)
286
+ int_value = save_dict[feature_name]
287
+
288
+ # Find the corresponding string (e.g., 'Category_A')
289
+ # Use .get() for safety, defaulting to the original value if not found
290
+ string_value = reverse_map.get(int_value, int_value)
291
+
292
+ # Update the dictionary that will be saved
293
+ save_dict[feature_name] = string_value
294
+
121
295
  # Save to CSV
122
296
  if save_format in ['csv', 'both']:
123
- df_row = pd.DataFrame([result_dict])
297
+ df_row = pd.DataFrame([save_dict])
124
298
  file_exists = csv_path.exists()
125
299
  df_row.to_csv(csv_path, mode='a', index=False, header=not file_exists)
126
300
 
127
301
  # Save to SQLite
128
302
  if save_format in ['sqlite', 'both']:
129
303
  if db_manager and db_table_name:
130
- db_manager.insert_row(db_table_name, result_dict)
304
+ db_manager.insert_row(db_table_name, save_dict)
131
305
  else:
132
306
  _LOGGER.warning("SQLite saving requested but db_manager or table_name not provided.")
133
307
 
ml_tools/path_manager.py CHANGED
@@ -2,9 +2,10 @@ from pprint import pprint
2
2
  from typing import Optional, List, Dict, Union, Literal
3
3
  from pathlib import Path
4
4
  import re
5
+ import sys
6
+
5
7
  from ._script_info import _script_info
6
8
  from ._logger import _LOGGER
7
- import sys
8
9
 
9
10
 
10
11
  __all__ = [
@@ -13,6 +14,7 @@ __all__ = [
13
14
  "sanitize_filename",
14
15
  "list_csv_paths",
15
16
  "list_files_by_extension",
17
+ "list_subdirectories"
16
18
  ]
17
19
 
18
20
 
@@ -20,15 +22,35 @@ class PathManager:
20
22
  """
21
23
  Manages and stores a project's file paths, acting as a centralized
22
24
  "path database". It supports both development mode and applications
23
- bundled with Pyinstaller.
25
+ bundled with Pyinstaller or Nuitka.
24
26
 
25
- Supports python dictionary syntax.
27
+ All keys provided to the manager are automatically sanitized to ensure
28
+ they are valid Python identifiers. This allows for clean, attribute-style
29
+ access. The sanitization process involves replacing whitespace with
30
+ underscores and removing special characters.
26
31
  """
27
32
  def __init__(
28
33
  self,
29
34
  anchor_file: str,
30
35
  base_directories: Optional[List[str]] = None
31
36
  ):
37
+ """
38
+ Sets up the core paths for a project by anchoring to a specific file.
39
+
40
+ The manager automatically registers a 'ROOT' path, which points to the
41
+ root of the package, and can pre-register common subdirectories found
42
+ directly within that root.
43
+
44
+ Args:
45
+ anchor_file (str): The path to a file within your package, typically
46
+ the `__file__` of the script where PathManager
47
+ is instantiated. This is used to locate the
48
+ package root directory.
49
+ base_directories (List[str] | None): An optional list of strings,
50
+ where each string is the name
51
+ of a subdirectory to register
52
+ relative to the package root.
53
+ """
32
54
  resolved_anchor_path = Path(anchor_file).resolve()
33
55
  self._package_name = resolved_anchor_path.parent.name
34
56
  self._is_bundled, bundle_root = self._get_bundle_root()
@@ -42,13 +64,17 @@ class PathManager:
42
64
  package_root = resolved_anchor_path.parent
43
65
 
44
66
  # Register the root of the package itself
45
- self._paths["ROOT"] = package_root
67
+ self.ROOT = package_root
46
68
 
47
69
  # Register all the base directories
48
70
  if base_directories:
49
71
  for dir_name in base_directories:
50
- # This logic works for both dev mode and bundled mode
51
- self._paths[dir_name] = package_root / dir_name
72
+ sanitized_dir_name = self._sanitize_key(dir_name)
73
+ self._check_underscore_key(sanitized_dir_name)
74
+ setattr(self, sanitized_dir_name, package_root / sanitized_dir_name)
75
+
76
+ # Signal that initialization is complete.
77
+ self._initialized = True
52
78
 
53
79
  def _get_bundle_root(self) -> tuple[bool, Optional[str]]:
54
80
  """
@@ -71,47 +97,35 @@ class PathManager:
71
97
  # --- Not Bundled ---
72
98
  else:
73
99
  return False, None
100
+
101
+ def _check_underscore_key(self, key: str) -> None:
102
+ if key.startswith("_"):
103
+ _LOGGER.error(f"Path key '{key}' cannot start with underscores.")
104
+ raise ValueError()
74
105
 
75
- def get(self, key: str) -> Path:
76
- """
77
- Retrieves a stored path by its key.
78
-
79
- Args:
80
- key (str): The key of the path to retrieve.
81
-
82
- Returns:
83
- Path: The resolved, absolute Path object.
84
-
85
- Raises:
86
- KeyError: If the key is not found in the manager.
87
- """
88
- try:
89
- return self._paths[key]
90
- except KeyError:
91
- _LOGGER.error(f"Path key '{key}' not found.")
92
- raise
93
-
94
- def update(self, new_paths: Dict[str, Union[str, Path]], overwrite: bool = False) -> None:
106
+ def update(self, new_paths: Dict[str, Union[str, Path]]) -> None:
95
107
  """
96
- Adds new paths or overwrites existing ones in the manager.
108
+ Adds new paths in the manager.
97
109
 
98
110
  Args:
99
111
  new_paths (Dict[str, Union[str, Path]]): A dictionary where keys are
100
112
  the identifiers and values are the
101
- Path objects or strings to store.
102
- overwrite (bool): If False (default), raises a KeyError if any
103
- key in new_paths already exists. If True,
104
- allows overwriting existing keys.
113
+ Path objects to store.
105
114
  """
106
- if not overwrite:
107
- for key in new_paths:
108
- if key in self._paths:
109
- _LOGGER.error(f"Path key '{key}' already exists in the manager. To replace it, call update() with overwrite=True.")
110
- raise KeyError
111
-
112
- # Resolve any string paths to Path objects before storing
113
- resolved_new_paths = {k: Path(v) for k, v in new_paths.items()}
114
- self._paths.update(resolved_new_paths)
115
+ # Pre-check
116
+ for key in new_paths:
117
+ sanitized_key = self._sanitize_key(key)
118
+ self._check_underscore_key(sanitized_key)
119
+ if hasattr(self, sanitized_key):
120
+ _LOGGER.error(f"Cannot add path for key '{sanitized_key}' ({key}): an attribute with this name already exists.")
121
+ raise KeyError()
122
+
123
+ # If no conflicts, add new paths
124
+ for key, value in new_paths.items():
125
+ self.__setattr__(key, value)
126
+
127
+ def _sanitize_key(self, key: str):
128
+ return sanitize_filename(key)
115
129
 
116
130
  def make_dirs(self, keys: Optional[List[str]] = None, verbose: bool = False) -> None:
117
131
  """
@@ -146,7 +160,7 @@ class PathManager:
146
160
  if path.suffix: # It's a file, not a directory
147
161
  continue
148
162
 
149
- # --- THE CRITICAL CHECK ---
163
+ # --- CRITICAL CHECK ---
150
164
  # Determine if the path is inside the main application package.
151
165
  is_internal_path = package_root and path.is_relative_to(package_root)
152
166
 
@@ -185,15 +199,20 @@ class PathManager:
185
199
  # --- Dictionary-Style Methods ---
186
200
  def __getitem__(self, key: str) -> Path:
187
201
  """Allows dictionary-style getting, e.g., PM['my_key']"""
188
- return self.get(key)
202
+ return self.__getattr__(key)
189
203
 
190
204
  def __setitem__(self, key: str, value: Union[str, Path]):
191
- """Allows dictionary-style setting, does not allow overwriting, e.g., PM['my_key'] = path"""
192
- self.update({key: value}, overwrite=False)
205
+ """Allows dictionary-style setting, e.g., PM['my_key'] = path"""
206
+ sanitized_key = self._sanitize_key(key)
207
+ self._check_underscore_key(sanitized_key)
208
+ self.__setattr__(sanitized_key, value)
193
209
 
194
210
  def __contains__(self, key: str) -> bool:
195
211
  """Allows checking for a key's existence, e.g., if 'my_key' in PM"""
196
- return key in self._paths
212
+ sanitized_key = self._sanitize_key(key)
213
+ true_false = sanitized_key in self._paths
214
+ # print(f"key {sanitized_key} in current path dictionary keys: {true_false}")
215
+ return true_false
197
216
 
198
217
  def __len__(self) -> int:
199
218
  """Allows getting the number of paths, e.g., len(PM)"""
@@ -210,6 +229,54 @@ class PathManager:
210
229
  def items(self):
211
230
  """Returns all registered (key, Path) pairs."""
212
231
  return self._paths.items()
232
+
233
+ def __getattr__(self, name: str) -> Path:
234
+ """
235
+ Allows attribute-style access to paths, e.g., PM.data.
236
+ """
237
+ # Block access to private attributes
238
+ if name.startswith('_'):
239
+ _LOGGER.error(f"Access to private attribute '{name}' is not allowed, remove leading underscore.")
240
+ raise AttributeError()
241
+
242
+ sanitized_name = self._sanitize_key(name)
243
+
244
+ try:
245
+ # Look for the key in our internal dictionary
246
+ return self._paths[sanitized_name]
247
+ except KeyError:
248
+ # If not found, raise an AttributeError
249
+ _LOGGER.error(f"'{type(self).__name__}' object has no attribute or path key '{sanitized_name}'")
250
+ raise AttributeError()
251
+
252
+ def __setattr__(self, name: str, value: Union[str, Path, bool, dict, str, int, tuple]):
253
+ """Allows attribute-style setting of paths, e.g., PM.data = 'path/to/data'."""
254
+ # Check for internal attributes, which are set directly on the object.
255
+ if name.startswith('_'):
256
+ # This check prevents setting new private attributes after __init__ is done.
257
+ is_initialized = self.__dict__.get('_initialized', False)
258
+ if is_initialized:
259
+ _LOGGER.error(f"Cannot set private attribute '{name}' after initialization.")
260
+ raise AttributeError()
261
+ super().__setattr__(name, value)
262
+ return
263
+
264
+ # Sanitize the key for the public path.
265
+ sanitized_name = self._sanitize_key(name)
266
+ self._check_underscore_key(sanitized_name)
267
+
268
+ # Prevent overwriting existing methods (e.g., PM.status = 'foo').
269
+ # This check looks at the class, not the instance therefore won't trigger __getattr__.
270
+ if hasattr(self.__class__, sanitized_name):
271
+ _LOGGER.error(f"Cannot overwrite existing attribute or method '{sanitized_name}' ({name}).")
272
+ raise AttributeError()
273
+
274
+ if not isinstance(value, (str, Path)):
275
+ _LOGGER.error(f"Cannot assign type '{type(value).__name__}' to a path. Must be str or Path.")
276
+ raise TypeError
277
+
278
+ # If all checks pass, treat it as a public path and store it in the _paths dictionary.
279
+ self._paths[sanitized_name] = Path(value)
213
280
 
214
281
 
215
282
  def make_fullpath(
@@ -385,5 +452,37 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
385
452
  return name_path_dict
386
453
 
387
454
 
455
+ def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
456
+ """
457
+ Scans a directory and returns a dictionary of its immediate subdirectories.
458
+
459
+ Args:
460
+ root_dir (str | Path): The path to the directory to scan.
461
+ verbose (bool): If True, prints the number of directories found.
462
+
463
+ Returns:
464
+ dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
465
+ """
466
+ root_path = make_fullpath(root_dir, enforce="directory")
467
+
468
+ directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
469
+
470
+ if len(directories) < 1:
471
+ _LOGGER.error(f"No subdirectories found inside '{root_path}'")
472
+ raise IOError()
473
+
474
+ if verbose:
475
+ count = len(directories)
476
+ # Use pluralization for better readability
477
+ plural = 'ies' if count != 1 else 'y'
478
+ print(f"Found {count} subdirector{plural} in '{root_path.name}'.")
479
+
480
+ # Create a dictionary where the key is the directory's name (a string)
481
+ # and the value is the full Path object.
482
+ dir_map = {p.name: p for p in directories}
483
+
484
+ return dir_map
485
+
486
+
388
487
  def info():
389
488
  _script_info(__all__)