dragon-ml-toolbox 2.4.0__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (33) hide show
  1. {dragon_ml_toolbox-2.4.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-3.1.0}/PKG-INFO +7 -4
  2. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/README.md +5 -2
  3. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0/dragon_ml_toolbox.egg-info}/PKG-INFO +7 -4
  4. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +7 -4
  5. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/ETL_engineering.py +49 -19
  6. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/GUI_tools.py +24 -25
  7. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/MICE_imputation.py +8 -4
  8. dragon_ml_toolbox-3.1.0/ml_tools/ML_callbacks.py +341 -0
  9. dragon_ml_toolbox-3.1.0/ml_tools/ML_evaluation.py +255 -0
  10. dragon_ml_toolbox-3.1.0/ml_tools/ML_trainer.py +344 -0
  11. dragon_ml_toolbox-3.1.0/ml_tools/ML_tutorial.py +300 -0
  12. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/PSO_optimization.py +27 -20
  13. dragon_ml_toolbox-3.1.0/ml_tools/RNN_forecast.py +49 -0
  14. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/VIF_factor.py +6 -5
  15. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/data_exploration.py +2 -2
  16. dragon_ml_toolbox-3.1.0/ml_tools/datasetmaster.py +681 -0
  17. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/ensemble_learning.py +12 -9
  18. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/handle_excel.py +9 -10
  19. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/logger.py +45 -8
  20. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/utilities.py +18 -1
  21. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/pyproject.toml +2 -2
  22. dragon_ml_toolbox-2.4.0/ml_tools/datasetmaster.py +0 -607
  23. dragon_ml_toolbox-2.4.0/ml_tools/trainer.py +0 -346
  24. dragon_ml_toolbox-2.4.0/ml_tools/vision_helpers.py +0 -231
  25. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/LICENSE +0 -0
  26. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/LICENSE-THIRD-PARTY.md +0 -0
  27. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  28. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  29. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  30. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/__init__.py +0 -0
  31. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/_particle_swarm_optimization.py +0 -0
  32. /dragon_ml_toolbox-2.4.0/ml_tools/pytorch_models.py → /dragon_ml_toolbox-3.1.0/ml_tools/_pytorch_models.py +0 -0
  33. {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/setup.cfg +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 2.4.0
4
- Summary: A collection of tools for data science and machine learning projects
3
+ Version: 3.1.0
4
+ Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
@@ -125,9 +125,12 @@ GUI_tools
125
125
  handle_excel
126
126
  logger
127
127
  MICE_imputation
128
+ ML_callbacks
129
+ ML_evaluation
130
+ ML_trainer
131
+ ML_tutorial
128
132
  PSO_optimization
129
- trainer
133
+ RNN_forecast
130
134
  utilities
131
135
  VIF_factor
132
- vision_helpers
133
136
  ```
@@ -82,9 +82,12 @@ GUI_tools
82
82
  handle_excel
83
83
  logger
84
84
  MICE_imputation
85
+ ML_callbacks
86
+ ML_evaluation
87
+ ML_trainer
88
+ ML_tutorial
85
89
  PSO_optimization
86
- trainer
90
+ RNN_forecast
87
91
  utilities
88
92
  VIF_factor
89
- vision_helpers
90
93
  ```
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 2.4.0
4
- Summary: A collection of tools for data science and machine learning projects
3
+ Version: 3.1.0
4
+ Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
@@ -125,9 +125,12 @@ GUI_tools
125
125
  handle_excel
126
126
  logger
127
127
  MICE_imputation
128
+ ML_callbacks
129
+ ML_evaluation
130
+ ML_trainer
131
+ ML_tutorial
128
132
  PSO_optimization
129
- trainer
133
+ RNN_forecast
130
134
  utilities
131
135
  VIF_factor
132
- vision_helpers
133
136
  ```
@@ -10,16 +10,19 @@ dragon_ml_toolbox.egg-info/top_level.txt
10
10
  ml_tools/ETL_engineering.py
11
11
  ml_tools/GUI_tools.py
12
12
  ml_tools/MICE_imputation.py
13
+ ml_tools/ML_callbacks.py
14
+ ml_tools/ML_evaluation.py
15
+ ml_tools/ML_trainer.py
16
+ ml_tools/ML_tutorial.py
13
17
  ml_tools/PSO_optimization.py
18
+ ml_tools/RNN_forecast.py
14
19
  ml_tools/VIF_factor.py
15
20
  ml_tools/__init__.py
16
21
  ml_tools/_particle_swarm_optimization.py
22
+ ml_tools/_pytorch_models.py
17
23
  ml_tools/data_exploration.py
18
24
  ml_tools/datasetmaster.py
19
25
  ml_tools/ensemble_learning.py
20
26
  ml_tools/handle_excel.py
21
27
  ml_tools/logger.py
22
- ml_tools/pytorch_models.py
23
- ml_tools/trainer.py
24
- ml_tools/utilities.py
25
- ml_tools/vision_helpers.py
28
+ ml_tools/utilities.py
@@ -3,17 +3,18 @@ import re
3
3
  from typing import Literal, Union, Optional, Any, Callable, List, Dict
4
4
  from .utilities import _script_info
5
5
  import pandas as pd
6
+ from .logger import _LOGGER
6
7
 
7
8
 
8
9
  __all__ = [
9
10
  "ColumnCleaner",
10
- "DataFrameCleaner"
11
+ "DataFrameCleaner",
11
12
  "TransformationRecipe",
12
13
  "DataProcessor",
13
14
  "KeywordDummifier",
14
15
  "NumberExtractor",
15
16
  "MultiNumberExtractor",
16
- "RatioCalculator"
17
+ "RatioCalculator",
17
18
  "CategoryMapper",
18
19
  "RegexMapper",
19
20
  "ValueBinner",
@@ -24,18 +25,26 @@ __all__ = [
24
25
 
25
26
  class ColumnCleaner:
26
27
  """
27
- Cleans and standardizes a single pandas Series based on a dictionary of regex-to-value replacement rules.
28
+ Cleans and standardizes a pandas Series by applying regex-to-replacement rules.
29
+ Supports sub-string replacements and case-insensitivity.
30
+
31
+ Notes:
32
+ - Write separate, specific rules for each case. Don't combine patterns with an "OR".
33
+ - Define rules from most specific to more general to create a fallback system.
34
+ - Beware of chain replacements (rules matching strings that have already been changed by a previous rule).
28
35
 
29
36
  Args:
30
37
  rules (Dict[str, str]):
31
- A dictionary where each key is a regular expression pattern and
32
- each value is the standardized string to replace matches with.
38
+ A dictionary of regex patterns to replacement strings. Can use
39
+ backreferences in the replacement statement (e.g., r'\\1 \\2 \\3 \\4 \\5') for captured groups.
40
+ case_insensitive (bool):
41
+ If True, regex matching ignores case.
33
42
  """
34
- def __init__(self, rules: Dict[str, str]):
43
+ def __init__(self, rules: Dict[str, str], case_insensitive: bool = True):
35
44
  if not isinstance(rules, dict):
36
45
  raise TypeError("The 'rules' argument must be a dictionary.")
37
46
 
38
- # Validate that all keys are valid regular expressions
47
+ # Validate regex patterns
39
48
  for pattern in rules.keys():
40
49
  try:
41
50
  re.compile(pattern)
@@ -43,32 +52,52 @@ class ColumnCleaner:
43
52
  raise ValueError(f"Invalid regex pattern '{pattern}': {e}") from e
44
53
 
45
54
  self.rules = rules
55
+ self.case_insensitive = case_insensitive
46
56
 
47
57
  def clean(self, series: pd.Series) -> pd.Series:
48
58
  """
49
- Applies the standardization rules to the provided Series (requires string data).
59
+ Applies the standardization rules sequentially to the provided Series.
50
60
 
51
- Non-matching values are kept as they are.
52
-
53
61
  Args:
54
62
  series (pd.Series): The pandas Series to clean.
55
63
 
56
64
  Returns:
57
- pd.Series: A new Series with the values cleaned and standardized.
65
+ pd.Series: A new Series with the regex replacements applied.
58
66
  """
59
- return series.astype(str).replace(self.rules, regex=True)
67
+ cleaned_series = series.astype(str)
68
+
69
+ # Set the regex flags based on the case_insensitive setting
70
+ flags = re.IGNORECASE if self.case_insensitive else 0
71
+
72
+ # Sequentially apply each regex rule
73
+ for pattern, replacement in self.rules.items():
74
+ cleaned_series = cleaned_series.str.replace(
75
+ pattern,
76
+ replacement,
77
+ regex=True,
78
+ flags=flags
79
+ )
80
+
81
+ return cleaned_series
60
82
 
61
83
 
62
84
  class DataFrameCleaner:
63
85
  """
64
86
  Orchestrates the cleaning of multiple columns in a pandas DataFrame using a nested dictionary of rules and `ColumnCleaner` objects.
87
+
88
+ Chosen case-sensitivity is applied to all columns.
89
+
90
+ Notes:
91
+ - Write separate, specific rules for each case. Don't combine patterns with an "OR".
92
+ - Define rules from most specific to more general to create a fallback system.
93
+ - Beware of chain replacements (rules matching strings that have already been changed by a previous rule).
65
94
 
66
95
  Args:
67
96
  rules (Dict[str, Dict[str, str]]):
68
97
  A nested dictionary where each top-level key is a column name,
69
98
  and its value is a dictionary of regex rules for that column, as expected by `ColumnCleaner`.
70
99
  """
71
- def __init__(self, rules: Dict[str, Dict[str, str]]):
100
+ def __init__(self, rules: Dict[str, Dict[str, str]], case_insensitive: bool = True):
72
101
  if not isinstance(rules, dict):
73
102
  raise TypeError("The 'rules' argument must be a nested dictionary.")
74
103
 
@@ -80,6 +109,7 @@ class DataFrameCleaner:
80
109
  )
81
110
 
82
111
  self.rules = rules
112
+ self.case_insensitive = case_insensitive
83
113
 
84
114
  def clean(self, df: pd.DataFrame) -> pd.DataFrame:
85
115
  """
@@ -108,7 +138,7 @@ class DataFrameCleaner:
108
138
 
109
139
  for column_name, column_rules in self.rules.items():
110
140
  # Create and apply the specific cleaner for the column
111
- cleaner = ColumnCleaner(rules=column_rules)
141
+ cleaner = ColumnCleaner(rules=column_rules, case_insensitive=self.case_insensitive)
112
142
  df_cleaned[column_name] = cleaner.clean(df_cleaned[column_name])
113
143
 
114
144
  return df_cleaned
@@ -251,7 +281,7 @@ class DataProcessor:
251
281
  raise TypeError(f"Invalid 'transform' action for '{input_col_name}': {transform_action}")
252
282
 
253
283
  if not processed_columns:
254
- print("Warning: The transformation resulted in an empty DataFrame.")
284
+ _LOGGER.warning("The transformation resulted in an empty DataFrame.")
255
285
  return pl.DataFrame()
256
286
 
257
287
  return pl.DataFrame(processed_columns)
@@ -403,7 +433,7 @@ class NumberExtractor:
403
433
  if not isinstance(round_digits, int):
404
434
  raise TypeError("round_digits must be an integer.")
405
435
  if dtype == "int":
406
- print(f"Warning: 'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
436
+ _LOGGER.warning(f"'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
407
437
 
408
438
  self.regex_pattern = regex_pattern
409
439
  self.dtype = dtype
@@ -561,9 +591,9 @@ class RatioCalculator:
561
591
  denominator = groups.struct.field("group_2").cast(pl.Float64, strict=False)
562
592
 
563
593
  # Safely perform division, returning null if denominator is 0
564
- return pl.when(denominator != 0).then(
565
- numerator / denominator
566
- ).otherwise(None)
594
+ final_expr = pl.when(denominator != 0).then(numerator / denominator).otherwise(None)
595
+
596
+ return pl.select(final_expr).to_series()
567
597
 
568
598
 
569
599
  class CategoryMapper:
@@ -7,6 +7,7 @@ from functools import wraps
7
7
  from typing import Any, Dict, Tuple, List
8
8
  from .utilities import _script_info
9
9
  import numpy as np
10
+ from .logger import _LOGGER
10
11
 
11
12
 
12
13
  __all__ = [
@@ -46,7 +47,7 @@ class PathManager:
46
47
  if self._is_bundled:
47
48
  # In a Briefcase bundle, resource_path gives an absolute path
48
49
  # to the resource directory.
49
- self.package_root = self._resource_path_func(self.package_name, "")
50
+ self.package_root = self._resource_path_func(self.package_name, "") # type: ignore
50
51
  else:
51
52
  # In development mode, the package root is the directory
52
53
  # containing the anchor file.
@@ -56,7 +57,7 @@ class PathManager:
56
57
  """Checks if the app is running in a bundled environment."""
57
58
  try:
58
59
  # This is the function Briefcase provides in a bundled app
59
- from briefcase.platforms.base import resource_path
60
+ from briefcase.platforms.base import resource_path # type: ignore
60
61
  return True, resource_path
61
62
  except ImportError:
62
63
  return False, None
@@ -147,7 +148,7 @@ class ConfigManager:
147
148
  """
148
149
  path = Path(file_path)
149
150
  if path.exists() and not force_overwrite:
150
- print(f"Configuration file already exists at {path}. Aborting.")
151
+ _LOGGER.warning(f"Configuration file already exists at {path}. Aborting.")
151
152
  return
152
153
 
153
154
  config = configparser.ConfigParser()
@@ -205,7 +206,7 @@ class ConfigManager:
205
206
 
206
207
  with open(path, 'w') as configfile:
207
208
  config.write(configfile)
208
- print(f"Successfully generated config template at: '{path}'")
209
+ _LOGGER.info(f"Successfully generated config template at: '{path}'")
209
210
 
210
211
 
211
212
  # --- GUI Factory ---
@@ -219,8 +220,8 @@ class GUIFactory:
219
220
  Initializes the factory with a configuration object.
220
221
  """
221
222
  self.config = config
222
- sg.theme(self.config.general.theme)
223
- sg.set_options(font=(self.config.general.font_family, 12))
223
+ sg.theme(self.config.general.theme) # type: ignore
224
+ sg.set_options(font=(self.config.general.font_family, 12)) # type: ignore
224
225
 
225
226
  # --- Atomic Element Generators ---
226
227
  def make_button(self, text: str, key: str, **kwargs) -> sg.Button:
@@ -234,13 +235,13 @@ class GUIFactory:
234
235
  (e.g., `tooltip='Click me'`, `disabled=True`).
235
236
  """
236
237
  cfg = self.config
237
- font = (cfg.fonts.font_family, cfg.fonts.button_size, cfg.fonts.button_style)
238
+ font = (cfg.fonts.font_family, cfg.fonts.button_size, cfg.fonts.button_style) # type: ignore
238
239
 
239
240
  style_args = {
240
- "size": cfg.layout.button_size,
241
+ "size": cfg.layout.button_size, # type: ignore
241
242
  "font": font,
242
- "button_color": (cfg.colors.button_text, cfg.colors.button_background),
243
- "mouseover_colors": (cfg.colors.button_text, cfg.colors.button_background_hover),
243
+ "button_color": (cfg.colors.button_text, cfg.colors.button_background), # type: ignore
244
+ "mouseover_colors": (cfg.colors.button_text, cfg.colors.button_background_hover), # type: ignore
244
245
  "border_width": 0,
245
246
  **kwargs
246
247
  }
@@ -257,7 +258,7 @@ class GUIFactory:
257
258
  (e.g., `title_color='red'`, `relief=sg.RELIEF_SUNKEN`).
258
259
  """
259
260
  cfg = self.config
260
- font = (cfg.fonts.font_family, cfg.fonts.frame_size)
261
+ font = (cfg.fonts.font_family, cfg.fonts.frame_size) # type: ignore
261
262
 
262
263
  style_args = {
263
264
  "font": font,
@@ -289,7 +290,7 @@ class GUIFactory:
289
290
  """
290
291
  cfg = self.config
291
292
  bg_color = sg.theme_background_color()
292
- label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style)
293
+ label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style) # type: ignore
293
294
 
294
295
  columns = []
295
296
  for name, (val_min, val_max) in data_dict.items():
@@ -298,21 +299,21 @@ class GUIFactory:
298
299
 
299
300
  label = sg.Text(name, font=label_font, background_color=bg_color, key=f"_text_{name}")
300
301
 
301
- input_style = {"size": cfg.layout.input_size_cont, "justification": "center"}
302
+ input_style = {"size": cfg.layout.input_size_cont, "justification": "center"} # type: ignore
302
303
  if is_target:
303
- input_style["text_color"] = cfg.colors.target_text
304
- input_style["disabled_readonly_background_color"] = cfg.colors.target_background
304
+ input_style["text_color"] = cfg.colors.target_text # type: ignore
305
+ input_style["disabled_readonly_background_color"] = cfg.colors.target_background # type: ignore
305
306
 
306
307
  element = sg.Input(default_text, key=key, disabled=is_target, **input_style)
307
308
 
308
309
  if is_target:
309
310
  layout = [[label], [element]]
310
311
  else:
311
- range_font = (cfg.fonts.font_family, cfg.fonts.range_size)
312
+ range_font = (cfg.fonts.font_family, cfg.fonts.range_size) # type: ignore
312
313
  range_text = sg.Text(f"Range: {int(val_min)}-{int(val_max)}", font=range_font, background_color=bg_color)
313
314
  layout = [[label], [element], [range_text]]
314
315
 
315
- layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)])
316
+ layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
316
317
  columns.append(sg.Column(layout, background_color=bg_color))
317
318
 
318
319
  if layout_mode == 'row':
@@ -340,17 +341,17 @@ class GUIFactory:
340
341
  """
341
342
  cfg = self.config
342
343
  bg_color = sg.theme_background_color()
343
- label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style)
344
+ label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style) # type: ignore
344
345
 
345
346
  columns = []
346
347
  for name, values in data_dict.items():
347
348
  label = sg.Text(name, font=label_font, background_color=bg_color, key=f"_text_{name}")
348
349
  element = sg.Combo(
349
350
  values, default_value=values[0], key=name,
350
- size=cfg.layout.input_size_binary, readonly=True
351
+ size=cfg.layout.input_size_binary, readonly=True # type: ignore
351
352
  )
352
353
  layout = [[label], [element]]
353
- layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)])
354
+ layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
354
355
  columns.append(sg.Column(layout, background_color=bg_color))
355
356
 
356
357
  if layout_mode == 'row':
@@ -370,8 +371,8 @@ class GUIFactory:
370
371
  **kwargs: Additional arguments to pass to the sg.Window constructor
371
372
  (e.g., `location=(100, 100)`, `keep_on_top=True`).
372
373
  """
373
- cfg = self.config.general
374
- version = getattr(self.config.meta, 'version', None)
374
+ cfg = self.config.general # type: ignore
375
+ version = getattr(self.config.meta, 'version', None) # type: ignore
375
376
  full_title = f"{title} v{version}" if version else title
376
377
 
377
378
  window_args = {
@@ -406,9 +407,7 @@ def catch_exceptions(show_popup: bool = True):
406
407
  sg.popup_error("An error occurred:", error_msg, title="Error")
407
408
  else:
408
409
  # Fallback for non-GUI contexts or if popup is disabled
409
- print("--- An exception occurred ---")
410
- print(error_msg)
411
- print("-----------------------------")
410
+ _LOGGER.error(error_msg)
412
411
  return wrapper
413
412
  return decorator
414
413
 
@@ -6,6 +6,7 @@ import numpy as np
6
6
  from .utilities import load_dataframe, list_csv_paths, sanitize_filename, _script_info, merge_dataframes, save_dataframe, threshold_binary_values, make_fullpath
7
7
  from plotnine import ggplot, labs, theme, element_blank # type: ignore
8
8
  from typing import Optional, Union
9
+ from .logger import _LOGGER
9
10
 
10
11
 
11
12
  __all__ = [
@@ -40,7 +41,9 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
40
41
  if binary_columns is not None:
41
42
  invalid_binary_columns = set(binary_columns) - set(df.columns)
42
43
  if invalid_binary_columns:
43
- print(f"⚠️ These 'binary columns' are not in the dataset: {invalid_binary_columns}")
44
+ _LOGGER.warning(f"⚠️ These 'binary columns' are not in the dataset:")
45
+ for invalid_binary_col in invalid_binary_columns:
46
+ print(f" - {invalid_binary_col}")
44
47
  valid_binary_columns = [col for col in binary_columns if col not in invalid_binary_columns]
45
48
  for imputed_df in imputed_datasets:
46
49
  for binary_column_name in valid_binary_columns:
@@ -125,7 +128,7 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
125
128
  plt.savefig(save_path, bbox_inches='tight', format="svg")
126
129
  plt.close()
127
130
 
128
- print(f"{dataset_file_dir} completed.")
131
+ _LOGGER.info(f"{dataset_file_dir} completed.")
129
132
 
130
133
 
131
134
  # Imputed distributions
@@ -210,7 +213,7 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
210
213
  fig = kernel.plot_imputed_distributions(variables=[feature])
211
214
  _process_figure(fig, feature)
212
215
 
213
- print(f"{local_dir_name} completed.")
216
+ _LOGGER.info(f"{local_dir_name} completed.")
214
217
 
215
218
 
216
219
  def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str],
@@ -240,7 +243,8 @@ def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str]
240
243
  all_file_paths = list(list_csv_paths(input_path).values())
241
244
 
242
245
  for df_path in all_file_paths:
243
- df, df_name = load_dataframe(df_path=df_path)
246
+ df: pd.DataFrame
247
+ df, df_name = load_dataframe(df_path=df_path, kind="pandas") # type: ignore
244
248
 
245
249
  df, df_targets = _skip_targets(df, target_columns)
246
250