dragon-ml-toolbox 8.2.0__tar.gz → 9.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (42) hide show
  1. {dragon_ml_toolbox-8.2.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-9.0.0}/PKG-INFO +5 -1
  2. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0/dragon_ml_toolbox.egg-info}/PKG-INFO +5 -1
  3. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -1
  4. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/dragon_ml_toolbox.egg-info/requires.txt +4 -0
  5. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ETL_engineering.py +177 -79
  6. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/GUI_tools.py +5 -5
  7. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/MICE_imputation.py +12 -8
  8. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ML_callbacks.py +6 -3
  9. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ML_datasetmaster.py +37 -20
  10. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ML_evaluation.py +4 -4
  11. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ML_evaluation_multi.py +26 -17
  12. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ML_inference.py +30 -23
  13. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ML_models.py +14 -14
  14. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ML_optimization.py +4 -3
  15. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ML_scaler.py +7 -7
  16. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ML_trainer.py +17 -15
  17. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/PSO_optimization.py +16 -8
  18. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/RNN_forecast.py +1 -1
  19. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/SQL.py +22 -13
  20. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/VIF_factor.py +7 -6
  21. dragon_ml_toolbox-9.0.0/ml_tools/_logger.py +134 -0
  22. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/custom_logger.py +12 -8
  23. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/data_exploration.py +20 -15
  24. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ensemble_evaluation.py +10 -6
  25. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ensemble_inference.py +18 -18
  26. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/ensemble_learning.py +8 -5
  27. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/handle_excel.py +15 -11
  28. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/optimization_tools.py +3 -4
  29. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/path_manager.py +21 -15
  30. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/utilities.py +35 -26
  31. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/pyproject.toml +7 -3
  32. dragon_ml_toolbox-8.2.0/ml_tools/_ML_optimization_multi.py +0 -231
  33. dragon_ml_toolbox-8.2.0/ml_tools/_logger.py +0 -36
  34. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/LICENSE +0 -0
  35. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/LICENSE-THIRD-PARTY.md +0 -0
  36. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/README.md +0 -0
  37. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  38. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  39. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/__init__.py +0 -0
  40. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/_script_info.py +0 -0
  41. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/ml_tools/keys.py +0 -0
  42. {dragon_ml_toolbox-8.2.0 → dragon_ml_toolbox-9.0.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 8.2.0
3
+ Version: 9.0.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -17,6 +17,7 @@ Requires-Dist: pandas; extra == "base"
17
17
  Requires-Dist: numpy; extra == "base"
18
18
  Requires-Dist: polars; extra == "base"
19
19
  Requires-Dist: joblib; extra == "base"
20
+ Requires-Dist: colorlog; extra == "base"
20
21
  Provides-Extra: ml
21
22
  Requires-Dist: numpy>=2.0; extra == "ml"
22
23
  Requires-Dist: pandas; extra == "ml"
@@ -37,6 +38,7 @@ Requires-Dist: shap; extra == "ml"
37
38
  Requires-Dist: tqdm; extra == "ml"
38
39
  Requires-Dist: Pillow; extra == "ml"
39
40
  Requires-Dist: evotorch; extra == "ml"
41
+ Requires-Dist: colorlog; extra == "ml"
40
42
  Provides-Extra: mice
41
43
  Requires-Dist: numpy<2.0; extra == "mice"
42
44
  Requires-Dist: pandas; extra == "mice"
@@ -48,6 +50,7 @@ Requires-Dist: matplotlib; extra == "mice"
48
50
  Requires-Dist: statsmodels; extra == "mice"
49
51
  Requires-Dist: lightgbm<=4.5.0; extra == "mice"
50
52
  Requires-Dist: shap; extra == "mice"
53
+ Requires-Dist: colorlog; extra == "mice"
51
54
  Provides-Extra: pytorch
52
55
  Requires-Dist: torch; extra == "pytorch"
53
56
  Requires-Dist: torchvision; extra == "pytorch"
@@ -59,6 +62,7 @@ Requires-Dist: ipykernel; extra == "excel"
59
62
  Requires-Dist: notebook; extra == "excel"
60
63
  Requires-Dist: jupyterlab; extra == "excel"
61
64
  Requires-Dist: ipywidgets; extra == "excel"
65
+ Requires-Dist: colorlog; extra == "excel"
62
66
  Provides-Extra: gui-boost
63
67
  Requires-Dist: numpy; extra == "gui-boost"
64
68
  Requires-Dist: joblib; extra == "gui-boost"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 8.2.0
3
+ Version: 9.0.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -17,6 +17,7 @@ Requires-Dist: pandas; extra == "base"
17
17
  Requires-Dist: numpy; extra == "base"
18
18
  Requires-Dist: polars; extra == "base"
19
19
  Requires-Dist: joblib; extra == "base"
20
+ Requires-Dist: colorlog; extra == "base"
20
21
  Provides-Extra: ml
21
22
  Requires-Dist: numpy>=2.0; extra == "ml"
22
23
  Requires-Dist: pandas; extra == "ml"
@@ -37,6 +38,7 @@ Requires-Dist: shap; extra == "ml"
37
38
  Requires-Dist: tqdm; extra == "ml"
38
39
  Requires-Dist: Pillow; extra == "ml"
39
40
  Requires-Dist: evotorch; extra == "ml"
41
+ Requires-Dist: colorlog; extra == "ml"
40
42
  Provides-Extra: mice
41
43
  Requires-Dist: numpy<2.0; extra == "mice"
42
44
  Requires-Dist: pandas; extra == "mice"
@@ -48,6 +50,7 @@ Requires-Dist: matplotlib; extra == "mice"
48
50
  Requires-Dist: statsmodels; extra == "mice"
49
51
  Requires-Dist: lightgbm<=4.5.0; extra == "mice"
50
52
  Requires-Dist: shap; extra == "mice"
53
+ Requires-Dist: colorlog; extra == "mice"
51
54
  Provides-Extra: pytorch
52
55
  Requires-Dist: torch; extra == "pytorch"
53
56
  Requires-Dist: torchvision; extra == "pytorch"
@@ -59,6 +62,7 @@ Requires-Dist: ipykernel; extra == "excel"
59
62
  Requires-Dist: notebook; extra == "excel"
60
63
  Requires-Dist: jupyterlab; extra == "excel"
61
64
  Requires-Dist: ipywidgets; extra == "excel"
65
+ Requires-Dist: colorlog; extra == "excel"
62
66
  Provides-Extra: gui-boost
63
67
  Requires-Dist: numpy; extra == "gui-boost"
64
68
  Requires-Dist: joblib; extra == "gui-boost"
@@ -23,7 +23,6 @@ ml_tools/PSO_optimization.py
23
23
  ml_tools/RNN_forecast.py
24
24
  ml_tools/SQL.py
25
25
  ml_tools/VIF_factor.py
26
- ml_tools/_ML_optimization_multi.py
27
26
  ml_tools/__init__.py
28
27
  ml_tools/_logger.py
29
28
  ml_tools/_script_info.py
@@ -19,12 +19,14 @@ shap
19
19
  tqdm
20
20
  Pillow
21
21
  evotorch
22
+ colorlog
22
23
 
23
24
  [base]
24
25
  pandas
25
26
  numpy
26
27
  polars
27
28
  joblib
29
+ colorlog
28
30
 
29
31
  [excel]
30
32
  pandas
@@ -34,6 +36,7 @@ ipykernel
34
36
  notebook
35
37
  jupyterlab
36
38
  ipywidgets
39
+ colorlog
37
40
 
38
41
  [gui-boost]
39
42
  numpy
@@ -57,6 +60,7 @@ matplotlib
57
60
  statsmodels
58
61
  lightgbm<=4.5.0
59
62
  shap
63
+ colorlog
60
64
 
61
65
  [nuitka]
62
66
  nuitka
@@ -1,11 +1,15 @@
1
1
  import polars as pl
2
+ import pandas as pd
2
3
  import re
4
+ from pathlib import Path
3
5
  from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
6
+ from .path_manager import sanitize_filename, make_fullpath
4
7
  from ._script_info import _script_info
5
8
  from ._logger import _LOGGER
6
9
 
7
10
 
8
11
  __all__ = [
12
+ "save_unique_values",
9
13
  "ColumnCleaner",
10
14
  "DataFrameCleaner",
11
15
  "TransformationRecipe",
@@ -23,6 +27,80 @@ __all__ = [
23
27
  "DateFeatureExtractor"
24
28
  ]
25
29
 
30
+ ################ Unique Values per column #################
31
+ def save_unique_values(csv_path: Union[str, Path], output_dir: Union[str, Path]) -> None:
32
+ """
33
+ Loads a CSV file, then analyzes it and saves the unique non-null values
34
+ from each column into a separate text file exactly as they appear.
35
+
36
+ This is useful for understanding the raw categories or range of values
37
+ within a dataset before cleaning.
38
+
39
+ Args:
40
+ csv_path (Union[str, Path]):
41
+ The file path to the input CSV file.
42
+ output_dir (Union[str, Path]):
43
+ The path to the directory where the .txt files will be saved.
44
+ The directory will be created if it does not exist.
45
+ """
46
+ # --- 1. Input Validation ---
47
+ csv_path = make_fullpath(input_path=csv_path, enforce="file")
48
+ output_dir = make_fullpath(input_path=output_dir, make=True)
49
+
50
+ # --- 2. Load Data ---
51
+ try:
52
+ # Load all columns as strings to preserve original formatting
53
+ df = pd.read_csv(csv_path, dtype=str, encoding='utf-8')
54
+ except FileNotFoundError as e:
55
+ _LOGGER.error(f"The file was not found at '{csv_path}'.")
56
+ raise e
57
+ except Exception as e2:
58
+ _LOGGER.error(f"An error occurred while reading the CSV file.")
59
+ raise e2
60
+ else:
61
+ _LOGGER.info(f"Data loaded from '{csv_path}'")
62
+
63
+ # --- 3. Process Each Column ---
64
+ for i, column_name in enumerate(df.columns):
65
+ _LOGGER.info(f"Processing column: '{column_name}'...")
66
+
67
+ # --- Get unique values AS IS ---
68
+ try:
69
+ # Drop nulls, get unique values, and sort them.
70
+ # The values are preserved exactly as they are in the cells.
71
+ unique_values = df[column_name].dropna().unique()
72
+ sorted_uniques = sorted(unique_values)
73
+ except Exception:
74
+ _LOGGER.exception(f"Could not process column '{column_name}'.")
75
+ continue
76
+
77
+ if not sorted_uniques:
78
+ _LOGGER.warning(f"Column '{column_name}' has no unique non-null values. Skipping.")
79
+ continue
80
+
81
+ # --- Sanitize column name to create a valid filename ---
82
+ sanitized_name = sanitize_filename(column_name)
83
+ if not sanitized_name.strip('_'):
84
+ sanitized_name = f'column_{i}'
85
+ file_path = output_dir / f"{sanitized_name}_unique_values.txt"
86
+
87
+ # --- Write to file ---
88
+ try:
89
+ with open(file_path, 'w', encoding='utf-8') as f:
90
+ f.write(f"# Unique values for column: '{column_name}'\n")
91
+ f.write(f"# Total unique non-null values: {len(sorted_uniques)}\n")
92
+ f.write("-" * 30 + "\n")
93
+ for value in sorted_uniques:
94
+ f.write(f"{value}\n")
95
+ f.write("-" * 30 + "\n")
96
+ except IOError:
97
+ _LOGGER.exception(f"Error writing to file {file_path}.")
98
+ else:
99
+ _LOGGER.info(f"Successfully saved {len(sorted_uniques)} unique values to '{file_path}'")
100
+
101
+ _LOGGER.info("Process complete.")
102
+
103
+
26
104
  ########## EXTRACT and CLEAN ##########
27
105
  class ColumnCleaner:
28
106
  """
@@ -60,16 +138,19 @@ class ColumnCleaner:
60
138
  """
61
139
  def __init__(self, column_name: str, rules: Dict[str, str], case_insensitive: bool = True):
62
140
  if not isinstance(column_name, str) or not column_name:
63
- raise TypeError("The 'column_name' must be a non-empty string.")
141
+ _LOGGER.error("The 'column_name' must be a non-empty string.")
142
+ raise TypeError()
64
143
  if not isinstance(rules, dict):
65
- raise TypeError("The 'rules' argument must be a dictionary.")
144
+ _LOGGER.error("The 'rules' argument must be a dictionary.")
145
+ raise TypeError()
66
146
 
67
147
  # Validate each regex pattern for correctness
68
148
  for pattern in rules.keys():
69
149
  try:
70
150
  re.compile(pattern)
71
- except re.error as e:
72
- raise ValueError(f"Invalid regex pattern '{pattern}': {e}") from e
151
+ except re.error:
152
+ _LOGGER.error(f"Invalid regex pattern '{pattern}'.")
153
+ raise
73
154
 
74
155
  self.column_name = column_name
75
156
  self.rules = rules
@@ -94,20 +175,17 @@ class DataFrameCleaner:
94
175
  """
95
176
  def __init__(self, cleaners: List[ColumnCleaner]):
96
177
  if not isinstance(cleaners, list):
97
- raise TypeError("The 'cleaners' argument must be a list of ColumnCleaner objects.")
178
+ _LOGGER.error("The 'cleaners' argument must be a list of ColumnCleaner objects.")
179
+ raise TypeError()
98
180
 
99
181
  seen_columns = set()
100
182
  for cleaner in cleaners:
101
183
  if not isinstance(cleaner, ColumnCleaner):
102
- raise TypeError(
103
- f"All items in 'cleaners' list must be ColumnCleaner objects, "
104
- f"but found an object of type {type(cleaner).__name__}."
105
- )
184
+ _LOGGER.error(f"All items in 'cleaners' list must be ColumnCleaner objects, but found an object of type {type(cleaner).__name__}.")
185
+ raise TypeError()
106
186
  if cleaner.column_name in seen_columns:
107
- raise ValueError(
108
- f"Duplicate ColumnCleaner found for column '{cleaner.column_name}'. "
109
- "Each column should only have one cleaner."
110
- )
187
+ _LOGGER.error(f"Duplicate ColumnCleaner found for column '{cleaner.column_name}'. Each column should only have one cleaner.")
188
+ raise ValueError()
111
189
  seen_columns.add(cleaner.column_name)
112
190
 
113
191
  self.cleaners = cleaners
@@ -131,10 +209,10 @@ class DataFrameCleaner:
131
209
  missing_columns = rule_columns - df_columns
132
210
 
133
211
  if missing_columns:
134
- raise ValueError(
135
- f"The following columns specified in cleaning rules "
136
- f"were not found in the DataFrame: {sorted(list(missing_columns))}"
137
- )
212
+ _LOGGER.error("The following columns specified in cleaning rules were not found in the DataFrame:")
213
+ for miss_col in sorted(list(missing_columns)):
214
+ print(f"\t- {miss_col}")
215
+ raise ValueError()
138
216
 
139
217
  df_cleaned = df.clone()
140
218
 
@@ -153,7 +231,7 @@ class DataFrameCleaner:
153
231
  # Execute the expression chain for the column
154
232
  df_cleaned = df_cleaned.with_columns(col_expr.alias(col_name))
155
233
 
156
- print(f"Cleaned {len(self.cleaners)} columns.")
234
+ _LOGGER.info(f"Cleaned {len(self.cleaners)} columns.")
157
235
 
158
236
  return df_cleaned
159
237
 
@@ -199,16 +277,20 @@ class TransformationRecipe:
199
277
  """
200
278
  # --- Validation ---
201
279
  if not isinstance(input_col_name, str) or not input_col_name:
202
- raise TypeError("'input_col' must be a non-empty string.")
280
+ _LOGGER.error("'input_col' must be a non-empty string.")
281
+ raise TypeError()
203
282
 
204
283
  if transform == _RENAME:
205
284
  if not isinstance(output_col_names, str):
206
- raise TypeError("For a RENAME operation, 'output_col' must be a string.")
285
+ _LOGGER.error("For a RENAME operation, 'output_col' must be a string.")
286
+ raise TypeError()
207
287
  elif not isinstance(transform, Callable):
208
- raise TypeError(f"'transform' must be a callable function or the string '{_RENAME}'.")
288
+ _LOGGER.error(f"'transform' must be a callable function or the string '{_RENAME}'.")
289
+ raise TypeError()
209
290
 
210
291
  if isinstance(output_col_names, list) and transform == _RENAME:
211
- raise ValueError("A RENAME operation cannot have a list of output columns.")
292
+ _LOGGER.error("A RENAME operation cannot have a list of output columns.")
293
+ raise ValueError()
212
294
 
213
295
  # --- Add Step ---
214
296
  step = {
@@ -243,9 +325,11 @@ class DataProcessor:
243
325
  been populated with transformation steps.
244
326
  """
245
327
  if not isinstance(recipe, TransformationRecipe):
246
- raise TypeError("The recipe must be an instance of TransformationRecipe.")
328
+ _LOGGER.error("The recipe must be an instance of TransformationRecipe.")
329
+ raise TypeError()
247
330
  if len(recipe) == 0:
248
- raise ValueError("The recipe cannot be empty.")
331
+ _LOGGER.error("The recipe cannot be empty.")
332
+ raise ValueError()
249
333
  self._recipe = recipe
250
334
 
251
335
  def transform(self, df: pl.DataFrame) -> pl.DataFrame:
@@ -260,7 +344,8 @@ class DataProcessor:
260
344
  transform_action = step["transform"]
261
345
 
262
346
  if input_col_name not in df.columns:
263
- raise ValueError(f"Input column '{input_col_name}' not found in DataFrame.")
347
+ _LOGGER.error(f"Input column '{input_col_name}' not found in DataFrame.")
348
+ raise ValueError()
264
349
 
265
350
  input_series = df.get_column(input_col_name)
266
351
 
@@ -273,17 +358,16 @@ class DataProcessor:
273
358
 
274
359
  if isinstance(result, pl.Series):
275
360
  if not isinstance(output_col_spec, str):
276
- raise TypeError(f"Function for '{input_col_name}' returned a Series but 'output_col' is not a string.")
361
+ _LOGGER.error(f"Function for '{input_col_name}' returned a Series but 'output_col' is not a string.")
362
+ raise TypeError()
277
363
  processed_columns.append(result.alias(output_col_spec))
278
364
 
279
365
  elif isinstance(result, pl.DataFrame):
280
366
  # 1. Handle list-based renaming
281
367
  if isinstance(output_col_spec, list):
282
368
  if len(result.columns) != len(output_col_spec):
283
- raise ValueError(
284
- f"Mismatch in '{input_col_name}': function produced {len(result.columns)} columns, "
285
- f"but recipe specifies {len(output_col_spec)} output names."
286
- )
369
+ _LOGGER.error(f"Mismatch in '{input_col_name}': function produced {len(result.columns)} columns, but recipe specifies {len(output_col_spec)} output names.")
370
+ raise ValueError()
287
371
 
288
372
  renamed_df = result.rename(dict(zip(result.columns, output_col_spec)))
289
373
  processed_columns.extend(renamed_df.get_columns())
@@ -299,19 +383,19 @@ class DataProcessor:
299
383
  processed_columns.extend(renamed_df.get_columns())
300
384
 
301
385
  else:
302
- raise TypeError(
303
- f"Function for '{input_col_name}' returned a DataFrame, "
304
- f"so 'output_col' must be a list of names or a string prefix."
305
- )
386
+ _LOGGER.error(f"Function for '{input_col_name}' returned a DataFrame, so 'output_col' must be a list of names or a string prefix.")
387
+ raise TypeError()
306
388
 
307
389
  else:
308
- raise TypeError(f"Function for '{input_col_name}' returned an unexpected type: {type(result)}.")
390
+ _LOGGER.error(f"Function for '{input_col_name}' returned an unexpected type: {type(result)}.")
391
+ raise TypeError()
309
392
 
310
- else: # This case is now unlikely due to builder validation.
311
- raise TypeError(f"Invalid 'transform' action for '{input_col_name}': {transform_action}")
393
+ else: # This case is unlikely due to builder validation.
394
+ _LOGGER.error(f"Invalid 'transform' action for '{input_col_name}': {transform_action}")
395
+ raise TypeError()
312
396
 
313
397
  if not processed_columns:
314
- _LOGGER.warning("⚠️ The transformation resulted in an empty DataFrame.")
398
+ _LOGGER.error("The transformation resulted in an empty DataFrame.")
315
399
  return pl.DataFrame()
316
400
 
317
401
  return pl.DataFrame(processed_columns)
@@ -381,18 +465,17 @@ class BinaryTransformer:
381
465
  ):
382
466
  # --- Validation: Enforce one and only one option ---
383
467
  if true_keywords is not None and false_keywords is not None:
384
- raise ValueError(
385
- "Provide either 'true_keywords' or 'false_keywords', but not both."
386
- )
468
+ _LOGGER.error("Provide either 'true_keywords' or 'false_keywords', but not both.")
469
+ raise ValueError()
387
470
  if true_keywords is None and false_keywords is None:
388
- raise ValueError(
389
- "You must provide either 'true_keywords' or 'false_keywords'."
390
- )
471
+ _LOGGER.error("You must provide either 'true_keywords' or 'false_keywords'.")
472
+ raise ValueError()
391
473
 
392
474
  # --- Configuration ---
393
475
  self.keywords: List[str] = true_keywords if true_keywords is not None else false_keywords # type: ignore
394
476
  if not self.keywords:
395
- raise ValueError("Keyword list cannot be empty.")
477
+ _LOGGER.error("Keyword list cannot be empty.")
478
+ raise ValueError()
396
479
 
397
480
  self.mode: str = "true_mode" if true_keywords is not None else "false_mode"
398
481
 
@@ -468,9 +551,11 @@ class MultiBinaryDummifier:
468
551
  """
469
552
  def __init__(self, keywords: List[str], case_insensitive: bool = True):
470
553
  if not isinstance(keywords, list) or not all(isinstance(k, str) for k in keywords):
471
- raise TypeError("The 'keywords' argument must be a list of strings.")
554
+ _LOGGER.error("The 'keywords' argument must be a list of strings.")
555
+ raise TypeError()
472
556
  if not keywords:
473
- raise ValueError("The 'keywords' list cannot be empty.")
557
+ _LOGGER.error("The 'keywords' list cannot be empty.")
558
+ raise ValueError()
474
559
 
475
560
  self.keywords = keywords
476
561
  self.case_insensitive = case_insensitive
@@ -530,7 +615,8 @@ class KeywordDummifier:
530
615
  """
531
616
  def __init__(self, group_names: List[str], group_keywords: List[List[str]], case_insensitive: bool = True):
532
617
  if len(group_names) != len(group_keywords):
533
- raise ValueError("Initialization failed: 'group_names' and 'group_keywords' must have the same length.")
618
+ _LOGGER.error("Initialization failed: 'group_names' and 'group_keywords' must have the same length.")
619
+ raise ValueError()
534
620
 
535
621
  self.group_names = group_names
536
622
  self.group_keywords = group_keywords
@@ -610,23 +696,28 @@ class NumberExtractor:
610
696
  ):
611
697
  # --- Validation ---
612
698
  if not isinstance(regex_pattern, str):
613
- raise TypeError("regex_pattern must be a string.")
699
+ _LOGGER.error("regex_pattern must be a string.")
700
+ raise TypeError()
614
701
 
615
702
  # Validate that the regex has exactly one capturing group
616
703
  try:
617
704
  if re.compile(regex_pattern).groups != 1:
618
- raise ValueError("regex_pattern must contain exactly one capturing group '(...)'")
705
+ _LOGGER.error("regex_pattern must contain exactly one capturing group '(...)'")
706
+ raise ValueError()
619
707
  except re.error as e:
620
- raise ValueError(f"Invalid regex pattern provided: {e}") from e
708
+ _LOGGER.error(f"Invalid regex pattern provided: {e}")
709
+ raise ValueError()
621
710
 
622
711
  if dtype not in ["float", "int"]:
623
- raise ValueError("dtype must be either 'float' or 'int'.")
712
+ _LOGGER.error("dtype must be either 'float' or 'int'.")
713
+ raise ValueError()
624
714
 
625
715
  if round_digits is not None:
626
716
  if not isinstance(round_digits, int):
627
- raise TypeError("round_digits must be an integer.")
717
+ _LOGGER.error("round_digits must be an integer.")
718
+ raise TypeError()
628
719
  if dtype == "int":
629
- _LOGGER.warning(f"⚠️ 'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
720
+ _LOGGER.warning(f"'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
630
721
 
631
722
  self.regex_pattern = regex_pattern
632
723
  self.dtype = dtype
@@ -684,21 +775,26 @@ class MultiNumberExtractor:
684
775
  ):
685
776
  # --- Validation ---
686
777
  if not isinstance(num_outputs, int) or num_outputs <= 0:
687
- raise ValueError("num_outputs must be a positive integer.")
778
+ _LOGGER.error("num_outputs must be a positive integer.")
779
+ raise ValueError()
688
780
 
689
781
  if not isinstance(regex_pattern, str):
690
- raise TypeError("regex_pattern must be a string.")
782
+ _LOGGER.error("regex_pattern must be a string.")
783
+ raise TypeError()
691
784
 
692
785
  # Validate that the regex has exactly one capturing group
693
786
  try:
694
787
  if re.compile(regex_pattern).groups != 1:
695
- raise ValueError("regex_pattern must contain exactly one capturing group '(...)'")
788
+ _LOGGER.error("regex_pattern must contain exactly one capturing group '(...)'")
789
+ raise ValueError()
696
790
  except re.error as e:
697
- raise ValueError(f"Invalid regex pattern provided: {e}") from e
791
+ _LOGGER.error(f"Invalid regex pattern provided: {e}")
792
+ raise ValueError()
698
793
 
699
794
  # Validate dtype
700
795
  if dtype not in ["float", "int"]:
701
- raise ValueError("dtype must be either 'float' or 'int'.")
796
+ _LOGGER.error("dtype must be either 'float' or 'int'.")
797
+ raise ValueError()
702
798
 
703
799
  self.num_outputs = num_outputs
704
800
  self.regex_pattern = regex_pattern
@@ -751,17 +847,14 @@ class RatioCalculator:
751
847
  try:
752
848
  compiled_pattern = re.compile(regex_pattern)
753
849
  if compiled_pattern.groups != 2:
754
- raise ValueError(
755
- "RatioCalculator regex_pattern must contain exactly two "
756
- "capturing groups '(...)'."
757
- )
850
+ _LOGGER.error("RatioCalculator regex_pattern must contain exactly two capturing groups '(...)'.")
851
+ raise ValueError()
758
852
  if compiled_pattern.groupindex:
759
- raise ValueError(
760
- "RatioCalculator must be initialized with unnamed capturing groups "
761
- "(e.g., '(\\d+)'), not named groups (e.g., '(?P<name>\\d+)')."
762
- )
853
+ _LOGGER.error("RatioCalculator must be initialized with unnamed capturing groups (e.g., '(\\d+)'), not named groups (e.g., '(?P<name>\\d+)').")
854
+ raise ValueError()
763
855
  except re.error as e:
764
- raise ValueError(f"Invalid regex pattern provided: {e}") from e
856
+ _LOGGER.error(f"Invalid regex pattern provided: {e}")
857
+ raise ValueError()
765
858
 
766
859
  self.regex_pattern = regex_pattern
767
860
 
@@ -805,7 +898,8 @@ class CategoryMapper:
805
898
  unseen_value: Optional[Union[int, float]] = None,
806
899
  ):
807
900
  if not isinstance(mapping, dict):
808
- raise TypeError("The 'mapping' argument must be a dictionary.")
901
+ _LOGGER.error("The 'mapping' argument must be a dictionary.")
902
+ raise TypeError()
809
903
 
810
904
  self.mapping = mapping
811
905
  self.default_value = unseen_value
@@ -866,7 +960,8 @@ class RegexMapper:
866
960
  ):
867
961
  # --- Validation ---
868
962
  if not isinstance(mapping, dict):
869
- raise TypeError("The 'mapping' argument must be a dictionary.")
963
+ _LOGGER.error("The 'mapping' argument must be a dictionary.")
964
+ raise TypeError()
870
965
 
871
966
  self.unseen_value = unseen_value
872
967
 
@@ -880,9 +975,11 @@ class RegexMapper:
880
975
  try:
881
976
  re.compile(final_pattern)
882
977
  except re.error as e:
883
- raise ValueError(f"Invalid regex pattern '{final_pattern}': {e}") from e
978
+ _LOGGER.error(f"Invalid regex pattern '{final_pattern}': {e}")
979
+ raise ValueError()
884
980
  if not isinstance(value, (int, float)):
885
- raise TypeError(f"Mapping values must be int or float, but got {type(value)} for pattern '{pattern}'.")
981
+ _LOGGER.error(f"Mapping values must be int or float, but got {type(value)} for pattern '{pattern}'.")
982
+ raise TypeError()
886
983
 
887
984
  self.processed_mapping.append((final_pattern, value))
888
985
 
@@ -937,11 +1034,13 @@ class ValueBinner:
937
1034
  ):
938
1035
  # --- Validation ---
939
1036
  if not isinstance(breaks, list) or len(breaks) < 2:
940
- raise ValueError("The 'breaks' argument must be a list of at least two numbers.")
1037
+ _LOGGER.error("The 'breaks' argument must be a list of at least two numbers.")
1038
+ raise ValueError()
941
1039
 
942
1040
  # Check if the list is sorted
943
1041
  if not all(breaks[i] <= breaks[i+1] for i in range(len(breaks)-1)):
944
- raise ValueError("The 'breaks' list must be sorted in ascending order.")
1042
+ _LOGGER.error("The 'breaks' list must be sorted in ascending order.")
1043
+ raise ValueError()
945
1044
 
946
1045
  self.breaks = breaks
947
1046
  self.left_closed = left_closed
@@ -1001,14 +1100,13 @@ class DateFeatureExtractor:
1001
1100
  ):
1002
1101
  # --- Validation ---
1003
1102
  if not isinstance(features, list) or not features:
1004
- raise ValueError("'features' must be a non-empty list of strings.")
1103
+ _LOGGER.error("'features' must be a non-empty list of strings.")
1104
+ raise ValueError()
1005
1105
 
1006
1106
  for feature in features:
1007
1107
  if feature not in self.ALLOWED_FEATURES:
1008
- raise ValueError(
1009
- f"Feature '{feature}' is not supported. "
1010
- f"Allowed features are: {self.ALLOWED_FEATURES}"
1011
- )
1108
+ _LOGGER.error(f"Feature '{feature}' is not supported. Allowed features are: {self.ALLOWED_FEATURES}")
1109
+ raise ValueError()
1012
1110
 
1013
1111
  self.features = features
1014
1112
  self.format = format
@@ -88,7 +88,7 @@ class ConfigManager:
88
88
 
89
89
  path = Path(file_path)
90
90
  if path.exists():
91
- _LOGGER.warning(f"⚠️ Configuration file already exists at {path}, or wrong path provided. Aborting.")
91
+ _LOGGER.warning(f"Configuration file already exists at {path}, or wrong path provided. Aborting.")
92
92
  return
93
93
 
94
94
  config = configparser.ConfigParser()
@@ -150,7 +150,7 @@ class ConfigManager:
150
150
 
151
151
  with open(path, 'w') as configfile:
152
152
  config.write(configfile)
153
- _LOGGER.info(f"📝 Successfully generated config template at: '{path}'")
153
+ _LOGGER.info(f"Successfully generated config template at: '{path}'")
154
154
 
155
155
 
156
156
  # --- GUI Factory ---
@@ -442,14 +442,14 @@ def catch_exceptions(show_popup: bool = True):
442
442
  def wrapper(*args, **kwargs):
443
443
  try:
444
444
  return func(*args, **kwargs)
445
- except Exception as e:
445
+ except Exception:
446
446
  # Format the full traceback to give detailed error info
447
- error_msg = traceback.format_exc()
448
447
  if show_popup:
448
+ error_msg = traceback.format_exc()
449
449
  sg.popup_error("An error occurred:", error_msg, title="Error")
450
450
  else:
451
451
  # Fallback for non-GUI contexts or if popup is disabled
452
- _LOGGER.error(error_msg)
452
+ _LOGGER.exception("An error occurred.")
453
453
  return wrapper
454
454
  return decorator
455
455