dragon-ml-toolbox 10.11.1__py3-none-any.whl → 10.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 10.11.1
3
+ Version: 10.12.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-10.11.1.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
- dragon_ml_toolbox-10.11.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
1
+ dragon_ml_toolbox-10.12.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
+ dragon_ml_toolbox-10.12.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
3
3
  ml_tools/ETL_cleaning.py,sha256=lSP5q6-ukGhJBPV8dlsqJvPXAzj4du_0J-SbtEd0Pjg,19292
4
4
  ml_tools/ETL_engineering.py,sha256=a6KCWH6kRatZtjaFEF_o917ApPMK5_vRD-BjfCDAl-E,49400
5
5
  ml_tools/GUI_tools.py,sha256=kEQWg-bog3pB5tI22gMGKWaCGHnz9TB2Lvvfhf5F2CI,45412
@@ -9,7 +9,7 @@ ml_tools/ML_datasetmaster.py,sha256=vqKZhCXsvN5yeRJdOKqMPh5OhY1xe6xlNjM3WoH5lys,
9
9
  ml_tools/ML_evaluation.py,sha256=6FB6S-aDDpFzQdrp3flBVECzEsHhMbQknYVGhHooEFs,16207
10
10
  ml_tools/ML_evaluation_multi.py,sha256=2jTSNFCu8cz5C05EusnrDyffs59M2Fq3UXSHxo2TR1A,12515
11
11
  ml_tools/ML_inference.py,sha256=SGDPiPxs_OYDKKRZziFMyaWcC8A37c70W9t-dMP5niI,23066
12
- ml_tools/ML_models.py,sha256=8UOMg9Qn8qtecUGfgnLRedX-lCWYwEs-C5RJ2m8mZM4,27544
12
+ ml_tools/ML_models.py,sha256=JMFOuw4jtX5RtUFpkQWS8-dzDW0AwqYjbl67XRCVubA,27996
13
13
  ml_tools/ML_optimization.py,sha256=a2Uxe1g-y4I-gFa8ENIM8QDS-Pz3hoPRRaVXAWMbyQA,13491
14
14
  ml_tools/ML_scaler.py,sha256=h2ymq5u953Lx60Qb38Y0mAWj85x9PbnP0xYNQ3pd8-w,7535
15
15
  ml_tools/ML_trainer.py,sha256=_g48w5Ak-wQr5fGHdJqlcpnzv3gWyL1ghkOhy9VOZbo,23930
@@ -21,16 +21,16 @@ ml_tools/__init__.py,sha256=q0y9faQ6e17XCQ7eUiCZ1FJ4Bg5EQqLjZ9f_l5REUUY,41
21
21
  ml_tools/_logger.py,sha256=wcImAiXEZKPNcwM30qBh3t7HvoPURonJY0nrgMGF0sM,4719
22
22
  ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
23
23
  ml_tools/custom_logger.py,sha256=ry43hk54K6xKo8jRAgq1sFxUpOA9T0LIJ7sw0so2BW0,5880
24
- ml_tools/data_exploration.py,sha256=4McT2BR9muK4JVVTKUfvRyThe0m_o2vpy9RJ1f_1FeY,28692
24
+ ml_tools/data_exploration.py,sha256=-aTi5jmv4AepPgi2k_85qEJsSLx5zPOtTbhorqzUvGQ,38542
25
25
  ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
26
26
  ml_tools/ensemble_inference.py,sha256=EFHnbjbu31fcVp88NBx8lWAVdu2Gpg9MY9huVZJHFfM,9350
27
27
  ml_tools/ensemble_learning.py,sha256=3s0kH4i_naj0IVl_T4knst-Hwg4TScWjEdsXX5KAi7I,21929
28
28
  ml_tools/handle_excel.py,sha256=He4UT15sCGhaG-JKfs7uYVAubxWjrqgJ6U7OhMR2fuE,14005
29
29
  ml_tools/keys.py,sha256=FDpbS3Jb0pjrVvvp2_8nZi919mbob_-xwuy5OOtKM_A,1848
30
30
  ml_tools/optimization_tools.py,sha256=P3I6lIpvZ8Xf2kX5FvvBKBmrK2pB6idBpkTzfUJxTeE,5073
31
- ml_tools/path_manager.py,sha256=CCZSlHpUiuaHsMAYcmMGZ9GvbHNbbrTqYFicgWz6pRs,17883
31
+ ml_tools/path_manager.py,sha256=ke0MYOhYheRPX599GUbrvRsYHn2JKUmMDldS5LP6LQA,18431
32
32
  ml_tools/utilities.py,sha256=uheMUjQJ1zI69gASsE-mCq4KlRPVGgrgqson02rGNYM,30755
33
- dragon_ml_toolbox-10.11.1.dist-info/METADATA,sha256=x3e66l1-dXkoE6ldWAH77epdEMnqj6YAvSVKYDVFhHU,6969
34
- dragon_ml_toolbox-10.11.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
- dragon_ml_toolbox-10.11.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
36
- dragon_ml_toolbox-10.11.1.dist-info/RECORD,,
33
+ dragon_ml_toolbox-10.12.0.dist-info/METADATA,sha256=dgxB7Ad4a5Zf1CPzLZFo5ny2Siotmsm2mWjQ8B7Nsa4,6969
34
+ dragon_ml_toolbox-10.12.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ dragon_ml_toolbox-10.12.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
36
+ dragon_ml_toolbox-10.12.0.dist-info/RECORD,,
ml_tools/ML_models.py CHANGED
@@ -300,8 +300,8 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
300
300
  sequence with a standard Transformer Encoder.
301
301
  """
302
302
  def __init__(self, *,
303
+ in_features: int,
303
304
  out_targets: int,
304
- numerical_indices: List[int],
305
305
  categorical_map: Dict[int, int],
306
306
  embedding_dim: int = 32,
307
307
  num_heads: int = 8,
@@ -309,8 +309,8 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
309
309
  dropout: float = 0.1):
310
310
  """
311
311
  Args:
312
+ in_features (int): The total number of columns in the input data (features).
312
313
  out_targets (int): Number of output targets (1 for regression).
313
- numerical_indices (List[int]): Column indices for numerical features.
314
314
  categorical_map (Dict[int, int]): Maps categorical column index to its cardinality (number of unique categories).
315
315
  embedding_dim (int): The dimension for all feature embeddings. Must be divisible by num_heads.
316
316
  num_heads (int): The number of heads in the multi-head attention mechanism.
@@ -330,15 +330,25 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
330
330
  their cardinality (the number of unique categories) via the `categorical_map` parameter.
331
331
 
332
332
  **Ordinal & Binary Features** (e.g., 'Low/Medium/High', 'True/False'): Should be treated as **numerical**. Map them to numbers that
333
- represent their state (e.g., `{'Low': 0, 'Medium': 1}` or `{False: 0, True: 1}`). Their column indices should be included in the
334
- `numerical_indices` list.
333
+ represent their state (e.g., `{'Low': 0, 'Medium': 1}` or `{False: 0, True: 1}`). Their column indices should **NOT** be included in the
334
+ `categorical_map` parameter.
335
335
 
336
- **Standard Numerical Features** (e.g., 'Age', 'Price'): Should be included in the `numerical_indices` list. It is highly recommended to
337
- scale them before training.
336
+ **Standard Numerical and Continuous Features** (e.g., 'Age', 'Price'): It is highly recommended to scale them before training.
338
337
  """
339
338
  super().__init__()
340
-
339
+
340
+ # --- Validation ---
341
+ if categorical_map and max(categorical_map.keys()) >= in_features:
342
+ _LOGGER.error(f"A categorical index ({max(categorical_map.keys())}) is out of bounds for the provided input features ({in_features}).")
343
+ raise ValueError()
344
+
345
+ # --- Derive numerical indices ---
346
+ all_indices = set(range(in_features))
347
+ categorical_indices_set = set(categorical_map.keys())
348
+ numerical_indices = sorted(list(all_indices - categorical_indices_set))
349
+
341
350
  # --- Save configuration ---
351
+ self.in_features = in_features
342
352
  self.out_targets = out_targets
343
353
  self.numerical_indices = numerical_indices
344
354
  self.categorical_map = categorical_map
@@ -405,8 +415,8 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
405
415
  def get_architecture_config(self) -> Dict[str, Any]:
406
416
  """Returns the full configuration of the model."""
407
417
  return {
418
+ 'in_features': self.in_features,
408
419
  'out_targets': self.out_targets,
409
- 'numerical_indices': self.numerical_indices,
410
420
  'categorical_map': self.categorical_map,
411
421
  'embedding_dim': self.embedding_dim,
412
422
  'num_heads': self.num_heads,
@@ -416,11 +426,9 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
416
426
 
417
427
  def __repr__(self) -> str:
418
428
  """Returns the developer-friendly string representation of the model."""
419
- num_features = len(self.numerical_indices) + len(self.categorical_map)
420
-
421
429
  # Build the architecture string part-by-part
422
430
  parts = [
423
- f"Tokenizer(features={num_features}, dim={self.embedding_dim})",
431
+ f"Tokenizer(features={self.in_features}, dim={self.embedding_dim})",
424
432
  "[CLS]",
425
433
  f"TransformerEncoder(layers={self.num_layers}, heads={self.num_heads})",
426
434
  f"PredictionHead(outputs={self.out_targets})"
@@ -22,6 +22,7 @@ __all__ = [
22
22
  "drop_columns_with_missing_data",
23
23
  "drop_macro",
24
24
  "clean_column_names",
25
+ "encode_categorical_features",
25
26
  "split_features_targets",
26
27
  "split_continuous_binary",
27
28
  "plot_correlation_heatmap",
@@ -29,7 +30,9 @@ __all__ = [
29
30
  "clip_outliers_single",
30
31
  "clip_outliers_multi",
31
32
  "match_and_filter_columns_by_regex",
32
- "standardize_percentages"
33
+ "standardize_percentages",
34
+ "create_transformer_categorical_map",
35
+ "reconstruct_one_hot"
33
36
  ]
34
37
 
35
38
 
@@ -337,6 +340,90 @@ def clean_column_names(df: pd.DataFrame, replacement_char: str = '-', replacemen
337
340
  return new_df
338
341
 
339
342
 
343
+ def encode_categorical_features(
344
+ df: pd.DataFrame,
345
+ columns_to_encode: List[str],
346
+ encode_nulls: bool,
347
+ split_resulting_dataset: bool = True,
348
+ verbose: bool = True
349
+ ) -> Tuple[Dict[str, Dict[str, int]], pd.DataFrame, Optional[pd.DataFrame]]:
350
+ """
351
+ Finds unique values in specified categorical columns, encodes them into integers,
352
+ and returns a dictionary containing the mappings for each column.
353
+
354
+ This function automates the label encoding process and generates a simple,
355
+ human-readable dictionary of the mappings.
356
+
357
+ Args:
358
+ df (pd.DataFrame): The input DataFrame.
359
+ columns_to_encode (List[str]): A list of column names to be encoded.
360
+ encode_nulls (bool): If True, encodes Null values as a distinct category
361
+ "Other" with a value of 0. Other categories start from 1.
362
+ If False, Nulls are ignored.
363
+ split_resulting_dataset (bool): If True, returns two separate DataFrames:
364
+ one with non-categorical columns and one with the encoded columns.
365
+ If False, returns a single DataFrame with all columns.
366
+ verbose (bool): If True, prints encoding progress.
367
+
368
+ Returns:
369
+ Tuple:
370
+
371
+ - Dict[str, Dict[str, int]]: A dictionary where each key is a column name and the value is its category-to-integer mapping.
372
+
373
+ - pd.DataFrame: The original dataframe with or without encoded columns (see `split_resulting_dataset`).
374
+
375
+ - pd.DataFrame | None: If `split_resulting_dataset` is True, the encoded columns as a new dataframe.
376
+ """
377
+ df_encoded = df.copy()
378
+
379
+ # Validate columns
380
+ valid_columns = [col for col in columns_to_encode if col in df_encoded.columns]
381
+ missing_columns = set(columns_to_encode) - set(valid_columns)
382
+ if missing_columns:
383
+ _LOGGER.warning(f"Columns not found and will be skipped: {list(missing_columns)}")
384
+
385
+ mappings: Dict[str, Dict[str, int]] = {}
386
+
387
+ _LOGGER.info(f"Encoding {len(valid_columns)} categorical column(s).")
388
+ for col_name in valid_columns:
389
+ has_nulls = df_encoded[col_name].isnull().any()
390
+
391
+ if encode_nulls and has_nulls:
392
+ # Handle nulls: "Other" -> 0, other categories -> 1, 2, 3...
393
+ categories = sorted([str(cat) for cat in df_encoded[col_name].dropna().unique()])
394
+ # Start mapping from 1 for non-null values
395
+ mapping = {category: i + 1 for i, category in enumerate(categories)}
396
+
397
+ # Apply mapping and fill remaining NaNs with 0
398
+ mapped_series = df_encoded[col_name].astype(str).map(mapping)
399
+ df_encoded[col_name] = mapped_series.fillna(0).astype(int)
400
+
401
+ # Create the complete user-facing map including "Other"
402
+ user_mapping = {**mapping, "Other": 0}
403
+ mappings[col_name] = user_mapping
404
+ else:
405
+ # ignore nulls
406
+ categories = sorted([str(cat) for cat in df_encoded[col_name].dropna().unique()])
407
+
408
+ mapping = {category: i for i, category in enumerate(categories)}
409
+
410
+ df_encoded[col_name] = df_encoded[col_name].astype(str).map(mapping)
411
+
412
+ mappings[col_name] = mapping
413
+
414
+ if verbose:
415
+ cardinality = len(mappings[col_name])
416
+ print(f" - Encoded '{col_name}' with {cardinality} unique values.")
417
+
418
+ # Handle the dataset splitting logic
419
+ if split_resulting_dataset:
420
+ df_categorical = df_encoded[valid_columns].to_frame()
421
+ df_non_categorical = df.drop(columns=valid_columns)
422
+ return mappings, df_non_categorical, df_categorical
423
+ else:
424
+ return mappings, df_encoded, None
425
+
426
+
340
427
  def split_features_targets(df: pd.DataFrame, targets: list[str]):
341
428
  """
342
429
  Splits a DataFrame's columns into features and targets.
@@ -766,6 +853,141 @@ def standardize_percentages(
766
853
  return df_copy
767
854
 
768
855
 
856
+ def create_transformer_categorical_map(
857
+ df: pd.DataFrame,
858
+ mappings: Dict[str, Dict[str, int]],
859
+ verbose: bool = True
860
+ ) -> Dict[int, int]:
861
+ """
862
+ Creates the `categorical_map` required by a `TabularTransformer` model.
863
+
864
+ This function should be called late in the preprocessing pipeline, after all
865
+ column additions, deletions, or reordering have occurred. It uses the final
866
+ DataFrame's column order to map the correct column index to its cardinality.
867
+
868
+ Args:
869
+ df (pd.DataFrame): The final, processed DataFrame.
870
+ mappings (Dict[str, Dict[str, int]]): The mappings dictionary generated by
871
+ `encode_categorical_features`, containing the category-to-integer
872
+ mapping for each categorical column.
873
+ verbose (bool): If True, prints mapping progress.
874
+
875
+ Returns:
876
+ (Dict[int, int]): The final `categorical_map` for the transformer,
877
+ mapping each column's current index to its cardinality (e.g., {0: 3}).
878
+ """
879
+ transformer_map = {}
880
+ categorical_column_names = mappings.keys()
881
+
882
+ _LOGGER.info("Creating categorical map for TabularTransformer.")
883
+ for col_name in categorical_column_names:
884
+ if col_name in df.columns:
885
+ col_idx = df.columns.get_loc(col_name)
886
+
887
+ # Get cardinality directly from the length of the mapping dictionary
888
+ cardinality = len(mappings[col_name])
889
+
890
+ transformer_map[col_idx] = cardinality
891
+ if verbose:
892
+ print(f" - Mapping column '{col_name}' at index {col_idx} with cardinality {cardinality}.")
893
+ else:
894
+ _LOGGER.warning(f"Categorical column '{col_name}' not found in the final DataFrame. Skipping.")
895
+
896
+ return transformer_map
897
+
898
+
899
+ def reconstruct_one_hot(
900
+ df: pd.DataFrame,
901
+ base_feature_names: List[str],
902
+ separator: str = '_',
903
+ drop_original: bool = True
904
+ ) -> pd.DataFrame:
905
+ """
906
+ Reconstructs original categorical columns from a one-hot encoded DataFrame.
907
+
908
+ This function identifies groups of one-hot encoded columns based on a common
909
+ prefix (base feature name) and a separator. It then collapses each group
910
+ into a single column containing the categorical value.
911
+
912
+ Args:
913
+ df (pd.DataFrame):
914
+ The input DataFrame with one-hot encoded columns.
915
+ base_features (List[str]):
916
+ A list of base feature names to reconstruct. For example, if you have
917
+ columns 'B_a', 'B_b', 'B_c', you would pass `['B']`.
918
+ separator (str):
919
+ The character separating the base name from the categorical value in
920
+ the column names (e.g., '_' in 'B_a').
921
+ drop_original (bool):
922
+ If True, the original one-hot encoded columns will be dropped from
923
+ the returned DataFrame.
924
+
925
+ Returns:
926
+ pd.DataFrame:
927
+ A new DataFrame with the specified one-hot encoded features
928
+ reconstructed into single categorical columns.
929
+
930
+ <br>
931
+
932
+ ## Note:
933
+
934
+ This function is designed to be robust, but users should be aware of two key edge cases:
935
+
936
+ 1. **Ambiguous Base Feature Prefixes**: If `base_feature_names` list contains names where one is a prefix of another (e.g., `['feat', 'feat_ext']`), the order is critical. The function will match columns greedily. To avoid incorrect grouping, always list the **most specific base names first** (e.g., `['feat_ext', 'feat']`).
937
+
938
+ 2. **Malformed One-Hot Data**: If a row contains multiple `1`s within the same feature group (e.g., both `B_a` and `B_c` are `1`), the function will not raise an error. It uses `.idxmax()`, which returns the first column that contains the maximum value. This means it will silently select the first category it encounters and ignore the others, potentially masking an upstream data issue.
939
+ """
940
+ if not isinstance(df, pd.DataFrame):
941
+ _LOGGER.error("Input must be a pandas DataFrame.")
942
+ raise TypeError()
943
+
944
+ new_df = df.copy()
945
+ all_ohe_cols_to_drop = []
946
+ reconstructed_count = 0
947
+
948
+ _LOGGER.info(f"Attempting to reconstruct {len(base_feature_names)} one-hot encoded feature(s).")
949
+
950
+ for base_name in base_feature_names:
951
+ # Regex to find all columns belonging to this base feature.
952
+ pattern = f"^{re.escape(base_name)}{re.escape(separator)}"
953
+
954
+ # Find matching columns
955
+ ohe_cols = [col for col in df.columns if re.match(pattern, col)]
956
+
957
+ if not ohe_cols:
958
+ _LOGGER.warning(f"No one-hot encoded columns found for base feature '{base_name}'. Skipping.")
959
+ continue
960
+
961
+ # For each row, find the column name with the maximum value (which is 1)
962
+ reconstructed_series = new_df[ohe_cols].idxmax(axis=1)
963
+
964
+ # Extract the categorical value (the suffix) from the column name
965
+ # Use n=1 in split to handle cases where the category itself might contain the separator
966
+ new_column_values = reconstructed_series.str.split(separator, n=1).str[1]
967
+
968
+ # Handle rows where all OHE columns were 0 (e.g., original value was NaN).
969
+ # In these cases, idxmax returns the first column name, but the sum of values is 0.
970
+ all_zero_mask = new_df[ohe_cols].sum(axis=1) == 0
971
+ new_column_values.loc[all_zero_mask] = np.nan
972
+
973
+ # Assign the new reconstructed column to the DataFrame
974
+ new_df[base_name] = new_column_values
975
+
976
+ all_ohe_cols_to_drop.extend(ohe_cols)
977
+ reconstructed_count += 1
978
+ print(f" - Reconstructed '{base_name}' from {len(ohe_cols)} columns.")
979
+
980
+ if drop_original and all_ohe_cols_to_drop:
981
+ # Drop the original OHE columns, ensuring no duplicates in the drop list
982
+ unique_cols_to_drop = list(set(all_ohe_cols_to_drop))
983
+ new_df.drop(columns=unique_cols_to_drop, inplace=True)
984
+ _LOGGER.info(f"Dropped {len(unique_cols_to_drop)} original one-hot encoded columns.")
985
+
986
+ _LOGGER.info(f"Successfully reconstructed {reconstructed_count} feature(s).")
987
+
988
+ return new_df
989
+
990
+
769
991
  def _validate_columns(df: pd.DataFrame, columns: list[str]):
770
992
  valid_columns = [column for column in columns if column in df.columns]
771
993
  return valid_columns
ml_tools/path_manager.py CHANGED
@@ -248,26 +248,33 @@ class PathManager:
248
248
  _LOGGER.error(f"'{type(self).__name__}' object has no attribute or path key '{sanitized_name}'")
249
249
  raise AttributeError()
250
250
 
251
- def __setattr__(self, name: str, value: Union[str, Path]):
251
+ def __setattr__(self, name: str, value: Union[str, Path, bool, dict, str, int, tuple]):
252
252
  """Allows attribute-style setting of paths, e.g., PM.data = 'path/to/data'."""
253
- # Check for internal attributes
253
+ # Check for internal attributes, which are set directly on the object.
254
254
  if name.startswith('_'):
255
- if hasattr(self, '_initialized') and self._initialized:
256
- self._check_underscore_key(name)
257
- return
258
- else:
259
- # During initialization, allow private attributes to be set.
260
- super().__setattr__(name, value)
255
+ # This check prevents setting new private attributes after __init__ is done.
256
+ is_initialized = self.__dict__.get('_initialized', False)
257
+ if is_initialized:
258
+ _LOGGER.error(f"Cannot set private attribute '{name}' after initialization.")
259
+ raise AttributeError()
260
+ super().__setattr__(name, value)
261
261
  return
262
262
 
263
- # Block overwriting of existing methods/attributes
263
+ # Sanitize the key for the public path.
264
264
  sanitized_name = self._sanitize_key(name)
265
265
  self._check_underscore_key(sanitized_name)
266
- if hasattr(self, sanitized_name):
266
+
267
+ # Prevent overwriting existing methods (e.g., PM.status = 'foo').
268
+ # This check looks at the class, not the instance therefore won't trigger __getattr__.
269
+ if hasattr(self.__class__, sanitized_name):
267
270
  _LOGGER.error(f"Cannot overwrite existing attribute or method '{sanitized_name}' ({name}).")
268
271
  raise AttributeError()
272
+
273
+ if not isinstance(value, (str, Path)):
274
+ _LOGGER.error(f"Cannot assign type '{type(value).__name__}' to a path. Must be str or Path.")
275
+ raise TypeError
269
276
 
270
- # If all checks pass, treat it as a public path.
277
+ # If all checks pass, treat it as a public path and store it in the _paths dictionary.
271
278
  self._paths[sanitized_name] = Path(value)
272
279
 
273
280