dragon-ml-toolbox 1.4.8__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (25) hide show
  1. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/LICENSE-THIRD-PARTY.md +5 -4
  2. {dragon_ml_toolbox-1.4.8/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-2.1.0}/PKG-INFO +24 -14
  3. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/README.md +20 -11
  4. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0/dragon_ml_toolbox.egg-info}/PKG-INFO +24 -14
  5. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +2 -1
  6. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/dragon_ml_toolbox.egg-info/requires.txt +2 -1
  7. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/MICE_imputation.py +27 -28
  8. dragon_ml_toolbox-2.1.0/ml_tools/PSO_optimization.py +490 -0
  9. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/VIF_factor.py +20 -17
  10. dragon_ml_toolbox-1.4.8/ml_tools/particle_swarm_optimization.py → dragon_ml_toolbox-2.1.0/ml_tools/_particle_swarm_optimization.py +5 -0
  11. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/data_exploration.py +58 -32
  12. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/ensemble_learning.py +40 -42
  13. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/handle_excel.py +98 -78
  14. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/logger.py +13 -11
  15. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/utilities.py +134 -46
  16. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/pyproject.toml +5 -4
  17. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/LICENSE +0 -0
  18. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  19. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  20. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/__init__.py +0 -0
  21. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/datasetmaster.py +0 -0
  22. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/pytorch_models.py +0 -0
  23. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/trainer.py +0 -0
  24. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/ml_tools/vision_helpers.py +0 -0
  25. {dragon_ml_toolbox-1.4.8 → dragon_ml_toolbox-2.1.0}/setup.cfg +0 -0
@@ -5,10 +5,10 @@ This project depends on the following third-party packages. Each is governed by
5
5
  - [pandas](https://github.com/pandas-dev/pandas/blob/main/LICENSE)
6
6
  - [numpy](https://github.com/numpy/numpy/blob/main/LICENSE.txt)
7
7
  - [matplotlib](https://github.com/matplotlib/matplotlib/blob/main/LICENSE/LICENSE)
8
- - [seaborn](https://github.com/mwaskom/seaborn/blob/main/LICENSE)
8
+ - [seaborn](https://github.com/mwaskom/seaborn/blob/master/LICENSE.md)
9
9
  - [statsmodels](https://github.com/statsmodels/statsmodels/blob/main/LICENSE.txt)
10
- - [ipython](https://github.com/ipython/ipython/blob/main/COPYING.rst)
11
- - [ipykernel](https://github.com/ipython/ipykernel/blob/main/COPYING.rst)
10
+ - [ipython](https://github.com/ipython/ipython/blob/main/LICENSE)
11
+ - [ipykernel](https://github.com/ipython/ipykernel/blob/main/LICENSE)
12
12
  - [notebook](https://github.com/jupyter/notebook/blob/main/LICENSE)
13
13
  - [jupyterlab](https://github.com/jupyterlab/jupyterlab/blob/main/LICENSE)
14
14
  - [ipywidgets](https://github.com/jupyter-widgets/ipywidgets/blob/main/LICENSE)
@@ -24,5 +24,6 @@ This project depends on the following third-party packages. Each is governed by
24
24
  - [openpyxl](https://github.com/chronossc/openpyxl/blob/main/LICENSE)
25
25
  - [miceforest](https://github.com/AnotherSamWilson/miceforest/blob/main/LICENSE)
26
26
  - [polars](https://github.com/pola-rs/polars/blob/main/LICENSE)
27
- - [plotnine](https://github.com/has2k1/plotnine/blob/main/LICENSE.txt)
27
+ - [plotnine](https://github.com/has2k1/plotnine/blob/main/LICENSE)
28
28
  - [pyswarm](https://pythonhosted.org/pyswarm/#license)
29
+ - [tqdm](https://github.com/tqdm/tqdm/blob/master/LICENSE)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 1.4.8
3
+ Version: 2.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -8,7 +8,7 @@ Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
8
8
  Project-URL: Changelog, https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md
9
9
  Classifier: Programming Language :: Python :: 3
10
10
  Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.9
11
+ Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  License-File: LICENSE-THIRD-PARTY.md
@@ -32,9 +32,10 @@ Requires-Dist: joblib
32
32
  Requires-Dist: xgboost
33
33
  Requires-Dist: lightgbm<=4.5.0
34
34
  Requires-Dist: shap
35
+ Requires-Dist: tqdm>=4.0
36
+ Requires-Dist: Pillow
35
37
  Provides-Extra: pytorch
36
38
  Requires-Dist: torch; extra == "pytorch"
37
- Requires-Dist: Pillow; extra == "pytorch"
38
39
  Requires-Dist: torchvision; extra == "pytorch"
39
40
  Dynamic: license-file
40
41
 
@@ -49,7 +50,7 @@ A collection of Python utilities for data science and machine learning, structur
49
50
 
50
51
  ## Installation
51
52
 
52
- **Python 3.9+ recommended.**
53
+ **Python 3.10+ recommended.**
53
54
 
54
55
  ### Via PyPI
55
56
 
@@ -59,6 +60,16 @@ Install the latest stable release from PyPI:
59
60
  pip install dragon-ml-tools
60
61
  ```
61
62
 
63
+ ### Via GitHub (Editable)
64
+
65
+ Clone the repository and install in editable mode with optional dependencies:
66
+
67
+ ```bash
68
+ git clone https://github.com/DrAg0n-BoRn/ML_tools.git
69
+ cd ML_tools
70
+ pip install -e .
71
+ ```
72
+
62
73
  ### Via conda-forge
63
74
 
64
75
  Install from the conda-forge channel:
@@ -66,22 +77,21 @@ Install from the conda-forge channel:
66
77
  ```bash
67
78
  conda install -c conda-forge dragon-ml-toolbox
68
79
  ```
80
+ **Note:** This version is outdated or broken due to dependency incompatibilities.
69
81
 
70
- #### Optional dependencies
82
+ ## Optional dependencies
83
+
84
+ **PyTorch**, which provides different builds depending on the **platform** and **hardware acceleration** (e.g., CUDA for NVIDIA GPUs on Linux/Windows, or MPS for Apple Silicon on macOS).
85
+
86
+ Install the default CPU-only version with
71
87
 
72
88
  ```bash
73
89
  pip install dragon-ml-tools[pytorch]
74
90
  ```
75
91
 
76
- ### Via GitHub (Editable)
77
-
78
- Clone the repository and install in editable mode with optional dependencies:
92
+ To make use of GPU acceleration use the official PyTorch installation instructions:
79
93
 
80
- ```bash
81
- git clone https://github.com/DrAg0n-BoRn/ML_tools.git
82
- cd ML_tools
83
- pip install -e .
84
- ```
94
+ [PyTorch Instructions](https://pytorch.org/get-started/locally/)
85
95
 
86
96
  ## Usage
87
97
 
@@ -101,7 +111,7 @@ ensemble_learning
101
111
  handle_excel
102
112
  logger
103
113
  MICE_imputation
104
- particle_swarm_optimization
114
+ PSO_optimization
105
115
  trainer
106
116
  utilities
107
117
  VIF_factor
@@ -9,7 +9,7 @@ A collection of Python utilities for data science and machine learning, structur
9
9
 
10
10
  ## Installation
11
11
 
12
- **Python 3.9+ recommended.**
12
+ **Python 3.10+ recommended.**
13
13
 
14
14
  ### Via PyPI
15
15
 
@@ -19,6 +19,16 @@ Install the latest stable release from PyPI:
19
19
  pip install dragon-ml-tools
20
20
  ```
21
21
 
22
+ ### Via GitHub (Editable)
23
+
24
+ Clone the repository and install in editable mode with optional dependencies:
25
+
26
+ ```bash
27
+ git clone https://github.com/DrAg0n-BoRn/ML_tools.git
28
+ cd ML_tools
29
+ pip install -e .
30
+ ```
31
+
22
32
  ### Via conda-forge
23
33
 
24
34
  Install from the conda-forge channel:
@@ -26,22 +36,21 @@ Install from the conda-forge channel:
26
36
  ```bash
27
37
  conda install -c conda-forge dragon-ml-toolbox
28
38
  ```
39
+ **Note:** This version is outdated or broken due to dependency incompatibilities.
29
40
 
30
- #### Optional dependencies
41
+ ## Optional dependencies
42
+
43
+ **PyTorch**, which provides different builds depending on the **platform** and **hardware acceleration** (e.g., CUDA for NVIDIA GPUs on Linux/Windows, or MPS for Apple Silicon on macOS).
44
+
45
+ Install the default CPU-only version with
31
46
 
32
47
  ```bash
33
48
  pip install dragon-ml-tools[pytorch]
34
49
  ```
35
50
 
36
- ### Via GitHub (Editable)
37
-
38
- Clone the repository and install in editable mode with optional dependencies:
51
+ To make use of GPU acceleration use the official PyTorch installation instructions:
39
52
 
40
- ```bash
41
- git clone https://github.com/DrAg0n-BoRn/ML_tools.git
42
- cd ML_tools
43
- pip install -e .
44
- ```
53
+ [PyTorch Instructions](https://pytorch.org/get-started/locally/)
45
54
 
46
55
  ## Usage
47
56
 
@@ -61,7 +70,7 @@ ensemble_learning
61
70
  handle_excel
62
71
  logger
63
72
  MICE_imputation
64
- particle_swarm_optimization
73
+ PSO_optimization
65
74
  trainer
66
75
  utilities
67
76
  VIF_factor
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 1.4.8
3
+ Version: 2.1.0
4
4
  Summary: A collection of tools for data science and machine learning projects
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -8,7 +8,7 @@ Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
8
8
  Project-URL: Changelog, https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md
9
9
  Classifier: Programming Language :: Python :: 3
10
10
  Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.9
11
+ Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  License-File: LICENSE-THIRD-PARTY.md
@@ -32,9 +32,10 @@ Requires-Dist: joblib
32
32
  Requires-Dist: xgboost
33
33
  Requires-Dist: lightgbm<=4.5.0
34
34
  Requires-Dist: shap
35
+ Requires-Dist: tqdm>=4.0
36
+ Requires-Dist: Pillow
35
37
  Provides-Extra: pytorch
36
38
  Requires-Dist: torch; extra == "pytorch"
37
- Requires-Dist: Pillow; extra == "pytorch"
38
39
  Requires-Dist: torchvision; extra == "pytorch"
39
40
  Dynamic: license-file
40
41
 
@@ -49,7 +50,7 @@ A collection of Python utilities for data science and machine learning, structur
49
50
 
50
51
  ## Installation
51
52
 
52
- **Python 3.9+ recommended.**
53
+ **Python 3.10+ recommended.**
53
54
 
54
55
  ### Via PyPI
55
56
 
@@ -59,6 +60,16 @@ Install the latest stable release from PyPI:
59
60
  pip install dragon-ml-tools
60
61
  ```
61
62
 
63
+ ### Via GitHub (Editable)
64
+
65
+ Clone the repository and install in editable mode with optional dependencies:
66
+
67
+ ```bash
68
+ git clone https://github.com/DrAg0n-BoRn/ML_tools.git
69
+ cd ML_tools
70
+ pip install -e .
71
+ ```
72
+
62
73
  ### Via conda-forge
63
74
 
64
75
  Install from the conda-forge channel:
@@ -66,22 +77,21 @@ Install from the conda-forge channel:
66
77
  ```bash
67
78
  conda install -c conda-forge dragon-ml-toolbox
68
79
  ```
80
+ **Note:** This version is outdated or broken due to dependency incompatibilities.
69
81
 
70
- #### Optional dependencies
82
+ ## Optional dependencies
83
+
84
+ **PyTorch**, which provides different builds depending on the **platform** and **hardware acceleration** (e.g., CUDA for NVIDIA GPUs on Linux/Windows, or MPS for Apple Silicon on macOS).
85
+
86
+ Install the default CPU-only version with
71
87
 
72
88
  ```bash
73
89
  pip install dragon-ml-tools[pytorch]
74
90
  ```
75
91
 
76
- ### Via GitHub (Editable)
77
-
78
- Clone the repository and install in editable mode with optional dependencies:
92
+ To make use of GPU acceleration use the official PyTorch installation instructions:
79
93
 
80
- ```bash
81
- git clone https://github.com/DrAg0n-BoRn/ML_tools.git
82
- cd ML_tools
83
- pip install -e .
84
- ```
94
+ [PyTorch Instructions](https://pytorch.org/get-started/locally/)
85
95
 
86
96
  ## Usage
87
97
 
@@ -101,7 +111,7 @@ ensemble_learning
101
111
  handle_excel
102
112
  logger
103
113
  MICE_imputation
104
- particle_swarm_optimization
114
+ PSO_optimization
105
115
  trainer
106
116
  utilities
107
117
  VIF_factor
@@ -8,14 +8,15 @@ dragon_ml_toolbox.egg-info/dependency_links.txt
8
8
  dragon_ml_toolbox.egg-info/requires.txt
9
9
  dragon_ml_toolbox.egg-info/top_level.txt
10
10
  ml_tools/MICE_imputation.py
11
+ ml_tools/PSO_optimization.py
11
12
  ml_tools/VIF_factor.py
12
13
  ml_tools/__init__.py
14
+ ml_tools/_particle_swarm_optimization.py
13
15
  ml_tools/data_exploration.py
14
16
  ml_tools/datasetmaster.py
15
17
  ml_tools/ensemble_learning.py
16
18
  ml_tools/handle_excel.py
17
19
  ml_tools/logger.py
18
- ml_tools/particle_swarm_optimization.py
19
20
  ml_tools/pytorch_models.py
20
21
  ml_tools/trainer.py
21
22
  ml_tools/utilities.py
@@ -18,8 +18,9 @@ joblib
18
18
  xgboost
19
19
  lightgbm<=4.5.0
20
20
  shap
21
+ tqdm>=4.0
22
+ Pillow
21
23
 
22
24
  [pytorch]
23
25
  torch
24
- Pillow
25
26
  torchvision
@@ -1,11 +1,11 @@
1
1
  import pandas as pd
2
2
  import miceforest as mf
3
- import os
3
+ from pathlib import Path
4
4
  import matplotlib.pyplot as plt
5
5
  import numpy as np
6
- from .utilities import load_dataframe, list_csv_paths, sanitize_filename, _script_info, merge_dataframes, save_dataframe, threshold_binary_values
6
+ from .utilities import load_dataframe, list_csv_paths, sanitize_filename, _script_info, merge_dataframes, save_dataframe, threshold_binary_values, make_fullpath
7
7
  from plotnine import ggplot, labs, theme, element_blank # type: ignore
8
- from typing import Optional
8
+ from typing import Optional, Union
9
9
 
10
10
 
11
11
  __all__ = [
@@ -60,7 +60,7 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
60
60
  return kernel, imputed_datasets, imputed_dataset_names
61
61
 
62
62
 
63
- def save_imputed_datasets(save_dir: str, imputed_datasets: list, df_targets: pd.DataFrame, imputed_dataset_names: list[str]):
63
+ def save_imputed_datasets(save_dir: Union[str, Path], imputed_datasets: list, df_targets: pd.DataFrame, imputed_dataset_names: list[str]):
64
64
  for imputed_df, subname in zip(imputed_datasets, imputed_dataset_names):
65
65
  merged_df = merge_dataframes(imputed_df, df_targets, direction="horizontal", verbose=False)
66
66
  save_dataframe(df=merged_df, save_dir=save_dir, filename=subname)
@@ -72,7 +72,7 @@ def get_na_column_names(df: pd.DataFrame):
72
72
 
73
73
 
74
74
  #Convergence diagnostic
75
- def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_names: list[str], column_names: list[str], root_dir: str, fontsize: int=16):
75
+ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_names: list[str], column_names: list[str], root_dir: Union[str,Path], fontsize: int=16):
76
76
  """
77
77
  Generate and save convergence diagnostic plots for imputed variables.
78
78
 
@@ -90,7 +90,7 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
90
90
  raise ValueError(f"Expected {dataset_count} names in imputed_dataset_names, got {len(imputed_dataset_names)}")
91
91
 
92
92
  # Check path
93
- os.makedirs(root_dir, exist_ok=True)
93
+ root_path = make_fullpath(root_dir, make=True)
94
94
 
95
95
  # Styling parameters
96
96
  label_font = {'size': fontsize, 'weight': 'bold'}
@@ -99,8 +99,7 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
99
99
  for dataset_id, imputed_dataset_name in zip(range(dataset_count), imputed_dataset_names):
100
100
  #Check directory for current dataset
101
101
  dataset_file_dir = f"Convergence_Metrics_{imputed_dataset_name}"
102
- local_save_dir = os.path.join(root_dir, dataset_file_dir)
103
- os.makedirs(local_save_dir, exist_ok=True)
102
+ local_save_dir = make_fullpath(input_path=root_path / dataset_file_dir, make=True)
104
103
 
105
104
  for feature_name in column_names:
106
105
  means_per_iteration = []
@@ -121,8 +120,8 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
121
120
  plt.grid(True)
122
121
 
123
122
  feature_save_name = sanitize_filename(feature_name)
124
-
125
- save_path = os.path.join(local_save_dir, feature_save_name + ".svg")
123
+ feature_save_name = feature_save_name + ".svg"
124
+ save_path = local_save_dir / feature_save_name
126
125
  plt.savefig(save_path, bbox_inches='tight', format="svg")
127
126
  plt.close()
128
127
 
@@ -130,18 +129,17 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
130
129
 
131
130
 
132
131
  # Imputed distributions
133
- def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_dir: str, column_names: list[str], one_plot: bool=False, fontsize: int=14):
132
+ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_dir: Union[str, Path], column_names: list[str], one_plot: bool=False, fontsize: int=14):
134
133
  '''
135
134
  It works using miceforest's authors implementation of the method `.plot_imputed_distributions()`.
136
135
 
137
136
  Set `one_plot=True` to save a single image including all feature distribution plots instead.
138
137
  '''
139
138
  # Check path
140
- os.makedirs(root_dir, exist_ok=True)
139
+ root_path = make_fullpath(root_dir, make=True)
140
+
141
141
  local_dir_name = f"Distribution_Metrics_{df_name}_imputed"
142
- local_save_dir = os.path.join(root_dir, local_dir_name)
143
- if not os.path.isdir(local_save_dir):
144
- os.makedirs(local_save_dir)
142
+ local_save_dir = make_fullpath(root_path / local_dir_name, make=True)
145
143
 
146
144
  # Styling parameters
147
145
  legend_kwargs = {'frameon': True, 'facecolor': 'white', 'framealpha': 0.8}
@@ -191,9 +189,11 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
191
189
 
192
190
  # sanitize savename
193
191
  feature_save_name = sanitize_filename(filename)
192
+ feature_save_name = feature_save_name + ".svg"
193
+ new_save_path = local_save_dir / feature_save_name
194
194
 
195
195
  fig.savefig(
196
- os.path.join(local_save_dir, feature_save_name + ".svg"),
196
+ new_save_path,
197
197
  format='svg',
198
198
  bbox_inches='tight',
199
199
  pad_inches=0.1
@@ -213,8 +213,8 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
213
213
  print(f"{local_dir_name} completed.")
214
214
 
215
215
 
216
- def run_mice_pipeline(df_path_or_dir: str, target_columns: list[str],
217
- save_datasets_dir: str, save_metrics_dir: str,
216
+ def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str],
217
+ save_datasets_dir: Union[str,Path], save_metrics_dir: Union[str,Path],
218
218
  binary_columns: Optional[list[str]]=None,
219
219
  resulting_datasets: int=1,
220
220
  iterations: int=20,
@@ -230,15 +230,14 @@ def run_mice_pipeline(df_path_or_dir: str, target_columns: list[str],
230
230
  Target columns must be skipped from the imputation. Binary columns will be thresholded after imputation.
231
231
  """
232
232
  # Check paths
233
- os.makedirs(save_datasets_dir, exist_ok=True)
234
- os.makedirs(save_metrics_dir, exist_ok=True)
233
+ save_datasets_path = make_fullpath(save_datasets_dir, make=True)
234
+ save_metrics_path = make_fullpath(save_metrics_dir, make=True)
235
235
 
236
- if os.path.isfile(df_path_or_dir):
237
- all_file_paths = [df_path_or_dir]
238
- elif os.path.isdir(df_path_or_dir):
239
- all_file_paths = list(list_csv_paths(df_path_or_dir).values())
236
+ input_path = make_fullpath(df_path_or_dir)
237
+ if input_path.is_file():
238
+ all_file_paths = [input_path]
240
239
  else:
241
- raise ValueError(f"Invalid path or directory: {df_path_or_dir}")
240
+ all_file_paths = list(list_csv_paths(input_path).values())
242
241
 
243
242
  for df_path in all_file_paths:
244
243
  df, df_name = load_dataframe(df_path=df_path)
@@ -247,13 +246,13 @@ def run_mice_pipeline(df_path_or_dir: str, target_columns: list[str],
247
246
 
248
247
  kernel, imputed_datasets, imputed_dataset_names = apply_mice(df=df, df_name=df_name, binary_columns=binary_columns, resulting_datasets=resulting_datasets, iterations=iterations, random_state=random_state)
249
248
 
250
- save_imputed_datasets(save_dir=save_datasets_dir, imputed_datasets=imputed_datasets, df_targets=df_targets, imputed_dataset_names=imputed_dataset_names)
249
+ save_imputed_datasets(save_dir=save_datasets_path, imputed_datasets=imputed_datasets, df_targets=df_targets, imputed_dataset_names=imputed_dataset_names)
251
250
 
252
251
  imputed_column_names = get_na_column_names(df=df)
253
252
 
254
- get_convergence_diagnostic(kernel=kernel, imputed_dataset_names=imputed_dataset_names, column_names=imputed_column_names, root_dir=save_metrics_dir)
253
+ get_convergence_diagnostic(kernel=kernel, imputed_dataset_names=imputed_dataset_names, column_names=imputed_column_names, root_dir=save_metrics_path)
255
254
 
256
- get_imputed_distributions(kernel=kernel, df_name=df_name, root_dir=save_metrics_dir, column_names=imputed_column_names)
255
+ get_imputed_distributions(kernel=kernel, df_name=df_name, root_dir=save_metrics_path, column_names=imputed_column_names)
257
256
 
258
257
 
259
258
  def _skip_targets(df: pd.DataFrame, target_cols: list[str]):