dragon-ml-toolbox 2.4.0__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-2.4.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-3.1.0}/PKG-INFO +7 -4
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/README.md +5 -2
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0/dragon_ml_toolbox.egg-info}/PKG-INFO +7 -4
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +7 -4
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/ETL_engineering.py +49 -19
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/GUI_tools.py +24 -25
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/MICE_imputation.py +8 -4
- dragon_ml_toolbox-3.1.0/ml_tools/ML_callbacks.py +341 -0
- dragon_ml_toolbox-3.1.0/ml_tools/ML_evaluation.py +255 -0
- dragon_ml_toolbox-3.1.0/ml_tools/ML_trainer.py +344 -0
- dragon_ml_toolbox-3.1.0/ml_tools/ML_tutorial.py +300 -0
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/PSO_optimization.py +27 -20
- dragon_ml_toolbox-3.1.0/ml_tools/RNN_forecast.py +49 -0
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/VIF_factor.py +6 -5
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/data_exploration.py +2 -2
- dragon_ml_toolbox-3.1.0/ml_tools/datasetmaster.py +681 -0
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/ensemble_learning.py +12 -9
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/handle_excel.py +9 -10
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/logger.py +45 -8
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/utilities.py +18 -1
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/pyproject.toml +2 -2
- dragon_ml_toolbox-2.4.0/ml_tools/datasetmaster.py +0 -607
- dragon_ml_toolbox-2.4.0/ml_tools/trainer.py +0 -346
- dragon_ml_toolbox-2.4.0/ml_tools/vision_helpers.py +0 -231
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/LICENSE +0 -0
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/ml_tools/_particle_swarm_optimization.py +0 -0
- /dragon_ml_toolbox-2.4.0/ml_tools/pytorch_models.py → /dragon_ml_toolbox-3.1.0/ml_tools/_pytorch_models.py +0 -0
- {dragon_ml_toolbox-2.4.0 → dragon_ml_toolbox-3.1.0}/setup.cfg +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dragon-ml-toolbox
|
|
3
|
-
Version:
|
|
4
|
-
Summary: A collection of tools for data science and machine learning projects
|
|
3
|
+
Version: 3.1.0
|
|
4
|
+
Summary: A collection of tools for data science and machine learning projects.
|
|
5
5
|
Author-email: Karl Loza <luigiloza@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
|
|
@@ -125,9 +125,12 @@ GUI_tools
|
|
|
125
125
|
handle_excel
|
|
126
126
|
logger
|
|
127
127
|
MICE_imputation
|
|
128
|
+
ML_callbacks
|
|
129
|
+
ML_evaluation
|
|
130
|
+
ML_trainer
|
|
131
|
+
ML_tutorial
|
|
128
132
|
PSO_optimization
|
|
129
|
-
|
|
133
|
+
RNN_forecast
|
|
130
134
|
utilities
|
|
131
135
|
VIF_factor
|
|
132
|
-
vision_helpers
|
|
133
136
|
```
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dragon-ml-toolbox
|
|
3
|
-
Version:
|
|
4
|
-
Summary: A collection of tools for data science and machine learning projects
|
|
3
|
+
Version: 3.1.0
|
|
4
|
+
Summary: A collection of tools for data science and machine learning projects.
|
|
5
5
|
Author-email: Karl Loza <luigiloza@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
|
|
@@ -125,9 +125,12 @@ GUI_tools
|
|
|
125
125
|
handle_excel
|
|
126
126
|
logger
|
|
127
127
|
MICE_imputation
|
|
128
|
+
ML_callbacks
|
|
129
|
+
ML_evaluation
|
|
130
|
+
ML_trainer
|
|
131
|
+
ML_tutorial
|
|
128
132
|
PSO_optimization
|
|
129
|
-
|
|
133
|
+
RNN_forecast
|
|
130
134
|
utilities
|
|
131
135
|
VIF_factor
|
|
132
|
-
vision_helpers
|
|
133
136
|
```
|
|
@@ -10,16 +10,19 @@ dragon_ml_toolbox.egg-info/top_level.txt
|
|
|
10
10
|
ml_tools/ETL_engineering.py
|
|
11
11
|
ml_tools/GUI_tools.py
|
|
12
12
|
ml_tools/MICE_imputation.py
|
|
13
|
+
ml_tools/ML_callbacks.py
|
|
14
|
+
ml_tools/ML_evaluation.py
|
|
15
|
+
ml_tools/ML_trainer.py
|
|
16
|
+
ml_tools/ML_tutorial.py
|
|
13
17
|
ml_tools/PSO_optimization.py
|
|
18
|
+
ml_tools/RNN_forecast.py
|
|
14
19
|
ml_tools/VIF_factor.py
|
|
15
20
|
ml_tools/__init__.py
|
|
16
21
|
ml_tools/_particle_swarm_optimization.py
|
|
22
|
+
ml_tools/_pytorch_models.py
|
|
17
23
|
ml_tools/data_exploration.py
|
|
18
24
|
ml_tools/datasetmaster.py
|
|
19
25
|
ml_tools/ensemble_learning.py
|
|
20
26
|
ml_tools/handle_excel.py
|
|
21
27
|
ml_tools/logger.py
|
|
22
|
-
ml_tools/
|
|
23
|
-
ml_tools/trainer.py
|
|
24
|
-
ml_tools/utilities.py
|
|
25
|
-
ml_tools/vision_helpers.py
|
|
28
|
+
ml_tools/utilities.py
|
|
@@ -3,17 +3,18 @@ import re
|
|
|
3
3
|
from typing import Literal, Union, Optional, Any, Callable, List, Dict
|
|
4
4
|
from .utilities import _script_info
|
|
5
5
|
import pandas as pd
|
|
6
|
+
from .logger import _LOGGER
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
__all__ = [
|
|
9
10
|
"ColumnCleaner",
|
|
10
|
-
"DataFrameCleaner"
|
|
11
|
+
"DataFrameCleaner",
|
|
11
12
|
"TransformationRecipe",
|
|
12
13
|
"DataProcessor",
|
|
13
14
|
"KeywordDummifier",
|
|
14
15
|
"NumberExtractor",
|
|
15
16
|
"MultiNumberExtractor",
|
|
16
|
-
"RatioCalculator"
|
|
17
|
+
"RatioCalculator",
|
|
17
18
|
"CategoryMapper",
|
|
18
19
|
"RegexMapper",
|
|
19
20
|
"ValueBinner",
|
|
@@ -24,18 +25,26 @@ __all__ = [
|
|
|
24
25
|
|
|
25
26
|
class ColumnCleaner:
|
|
26
27
|
"""
|
|
27
|
-
Cleans and standardizes a
|
|
28
|
+
Cleans and standardizes a pandas Series by applying regex-to-replacement rules.
|
|
29
|
+
Supports sub-string replacements and case-insensitivity.
|
|
30
|
+
|
|
31
|
+
Notes:
|
|
32
|
+
- Write separate, specific rules for each case. Don't combine patterns with an "OR".
|
|
33
|
+
- Define rules from most specific to more general to create a fallback system.
|
|
34
|
+
- Beware of chain replacements (rules matching strings that have already been changed by a previous rule).
|
|
28
35
|
|
|
29
36
|
Args:
|
|
30
37
|
rules (Dict[str, str]):
|
|
31
|
-
A dictionary
|
|
32
|
-
|
|
38
|
+
A dictionary of regex patterns to replacement strings. Can use
|
|
39
|
+
backreferences in the replacement statement (e.g., r'\\1 \\2 \\3 \\4 \\5') for captured groups.
|
|
40
|
+
case_insensitive (bool):
|
|
41
|
+
If True, regex matching ignores case.
|
|
33
42
|
"""
|
|
34
|
-
def __init__(self, rules: Dict[str, str]):
|
|
43
|
+
def __init__(self, rules: Dict[str, str], case_insensitive: bool = True):
|
|
35
44
|
if not isinstance(rules, dict):
|
|
36
45
|
raise TypeError("The 'rules' argument must be a dictionary.")
|
|
37
46
|
|
|
38
|
-
# Validate
|
|
47
|
+
# Validate regex patterns
|
|
39
48
|
for pattern in rules.keys():
|
|
40
49
|
try:
|
|
41
50
|
re.compile(pattern)
|
|
@@ -43,32 +52,52 @@ class ColumnCleaner:
|
|
|
43
52
|
raise ValueError(f"Invalid regex pattern '{pattern}': {e}") from e
|
|
44
53
|
|
|
45
54
|
self.rules = rules
|
|
55
|
+
self.case_insensitive = case_insensitive
|
|
46
56
|
|
|
47
57
|
def clean(self, series: pd.Series) -> pd.Series:
|
|
48
58
|
"""
|
|
49
|
-
Applies the standardization rules to the provided Series
|
|
59
|
+
Applies the standardization rules sequentially to the provided Series.
|
|
50
60
|
|
|
51
|
-
Non-matching values are kept as they are.
|
|
52
|
-
|
|
53
61
|
Args:
|
|
54
62
|
series (pd.Series): The pandas Series to clean.
|
|
55
63
|
|
|
56
64
|
Returns:
|
|
57
|
-
pd.Series: A new Series with the
|
|
65
|
+
pd.Series: A new Series with the regex replacements applied.
|
|
58
66
|
"""
|
|
59
|
-
|
|
67
|
+
cleaned_series = series.astype(str)
|
|
68
|
+
|
|
69
|
+
# Set the regex flags based on the case_insensitive setting
|
|
70
|
+
flags = re.IGNORECASE if self.case_insensitive else 0
|
|
71
|
+
|
|
72
|
+
# Sequentially apply each regex rule
|
|
73
|
+
for pattern, replacement in self.rules.items():
|
|
74
|
+
cleaned_series = cleaned_series.str.replace(
|
|
75
|
+
pattern,
|
|
76
|
+
replacement,
|
|
77
|
+
regex=True,
|
|
78
|
+
flags=flags
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return cleaned_series
|
|
60
82
|
|
|
61
83
|
|
|
62
84
|
class DataFrameCleaner:
|
|
63
85
|
"""
|
|
64
86
|
Orchestrates the cleaning of multiple columns in a pandas DataFrame using a nested dictionary of rules and `ColumnCleaner` objects.
|
|
87
|
+
|
|
88
|
+
Chosen case-sensitivity is applied to all columns.
|
|
89
|
+
|
|
90
|
+
Notes:
|
|
91
|
+
- Write separate, specific rules for each case. Don't combine patterns with an "OR".
|
|
92
|
+
- Define rules from most specific to more general to create a fallback system.
|
|
93
|
+
- Beware of chain replacements (rules matching strings that have already been changed by a previous rule).
|
|
65
94
|
|
|
66
95
|
Args:
|
|
67
96
|
rules (Dict[str, Dict[str, str]]):
|
|
68
97
|
A nested dictionary where each top-level key is a column name,
|
|
69
98
|
and its value is a dictionary of regex rules for that column, as expected by `ColumnCleaner`.
|
|
70
99
|
"""
|
|
71
|
-
def __init__(self, rules: Dict[str, Dict[str, str]]):
|
|
100
|
+
def __init__(self, rules: Dict[str, Dict[str, str]], case_insensitive: bool = True):
|
|
72
101
|
if not isinstance(rules, dict):
|
|
73
102
|
raise TypeError("The 'rules' argument must be a nested dictionary.")
|
|
74
103
|
|
|
@@ -80,6 +109,7 @@ class DataFrameCleaner:
|
|
|
80
109
|
)
|
|
81
110
|
|
|
82
111
|
self.rules = rules
|
|
112
|
+
self.case_insensitive = case_insensitive
|
|
83
113
|
|
|
84
114
|
def clean(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
85
115
|
"""
|
|
@@ -108,7 +138,7 @@ class DataFrameCleaner:
|
|
|
108
138
|
|
|
109
139
|
for column_name, column_rules in self.rules.items():
|
|
110
140
|
# Create and apply the specific cleaner for the column
|
|
111
|
-
cleaner = ColumnCleaner(rules=column_rules)
|
|
141
|
+
cleaner = ColumnCleaner(rules=column_rules, case_insensitive=self.case_insensitive)
|
|
112
142
|
df_cleaned[column_name] = cleaner.clean(df_cleaned[column_name])
|
|
113
143
|
|
|
114
144
|
return df_cleaned
|
|
@@ -251,7 +281,7 @@ class DataProcessor:
|
|
|
251
281
|
raise TypeError(f"Invalid 'transform' action for '{input_col_name}': {transform_action}")
|
|
252
282
|
|
|
253
283
|
if not processed_columns:
|
|
254
|
-
|
|
284
|
+
_LOGGER.warning("The transformation resulted in an empty DataFrame.")
|
|
255
285
|
return pl.DataFrame()
|
|
256
286
|
|
|
257
287
|
return pl.DataFrame(processed_columns)
|
|
@@ -403,7 +433,7 @@ class NumberExtractor:
|
|
|
403
433
|
if not isinstance(round_digits, int):
|
|
404
434
|
raise TypeError("round_digits must be an integer.")
|
|
405
435
|
if dtype == "int":
|
|
406
|
-
|
|
436
|
+
_LOGGER.warning(f"'round_digits' is specified but dtype is 'int'. Rounding will be ignored.")
|
|
407
437
|
|
|
408
438
|
self.regex_pattern = regex_pattern
|
|
409
439
|
self.dtype = dtype
|
|
@@ -561,9 +591,9 @@ class RatioCalculator:
|
|
|
561
591
|
denominator = groups.struct.field("group_2").cast(pl.Float64, strict=False)
|
|
562
592
|
|
|
563
593
|
# Safely perform division, returning null if denominator is 0
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
).
|
|
594
|
+
final_expr = pl.when(denominator != 0).then(numerator / denominator).otherwise(None)
|
|
595
|
+
|
|
596
|
+
return pl.select(final_expr).to_series()
|
|
567
597
|
|
|
568
598
|
|
|
569
599
|
class CategoryMapper:
|
|
@@ -7,6 +7,7 @@ from functools import wraps
|
|
|
7
7
|
from typing import Any, Dict, Tuple, List
|
|
8
8
|
from .utilities import _script_info
|
|
9
9
|
import numpy as np
|
|
10
|
+
from .logger import _LOGGER
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
__all__ = [
|
|
@@ -46,7 +47,7 @@ class PathManager:
|
|
|
46
47
|
if self._is_bundled:
|
|
47
48
|
# In a Briefcase bundle, resource_path gives an absolute path
|
|
48
49
|
# to the resource directory.
|
|
49
|
-
self.package_root = self._resource_path_func(self.package_name, "")
|
|
50
|
+
self.package_root = self._resource_path_func(self.package_name, "") # type: ignore
|
|
50
51
|
else:
|
|
51
52
|
# In development mode, the package root is the directory
|
|
52
53
|
# containing the anchor file.
|
|
@@ -56,7 +57,7 @@ class PathManager:
|
|
|
56
57
|
"""Checks if the app is running in a bundled environment."""
|
|
57
58
|
try:
|
|
58
59
|
# This is the function Briefcase provides in a bundled app
|
|
59
|
-
from briefcase.platforms.base import resource_path
|
|
60
|
+
from briefcase.platforms.base import resource_path # type: ignore
|
|
60
61
|
return True, resource_path
|
|
61
62
|
except ImportError:
|
|
62
63
|
return False, None
|
|
@@ -147,7 +148,7 @@ class ConfigManager:
|
|
|
147
148
|
"""
|
|
148
149
|
path = Path(file_path)
|
|
149
150
|
if path.exists() and not force_overwrite:
|
|
150
|
-
|
|
151
|
+
_LOGGER.warning(f"Configuration file already exists at {path}. Aborting.")
|
|
151
152
|
return
|
|
152
153
|
|
|
153
154
|
config = configparser.ConfigParser()
|
|
@@ -205,7 +206,7 @@ class ConfigManager:
|
|
|
205
206
|
|
|
206
207
|
with open(path, 'w') as configfile:
|
|
207
208
|
config.write(configfile)
|
|
208
|
-
|
|
209
|
+
_LOGGER.info(f"Successfully generated config template at: '{path}'")
|
|
209
210
|
|
|
210
211
|
|
|
211
212
|
# --- GUI Factory ---
|
|
@@ -219,8 +220,8 @@ class GUIFactory:
|
|
|
219
220
|
Initializes the factory with a configuration object.
|
|
220
221
|
"""
|
|
221
222
|
self.config = config
|
|
222
|
-
sg.theme(self.config.general.theme)
|
|
223
|
-
sg.set_options(font=(self.config.general.font_family, 12))
|
|
223
|
+
sg.theme(self.config.general.theme) # type: ignore
|
|
224
|
+
sg.set_options(font=(self.config.general.font_family, 12)) # type: ignore
|
|
224
225
|
|
|
225
226
|
# --- Atomic Element Generators ---
|
|
226
227
|
def make_button(self, text: str, key: str, **kwargs) -> sg.Button:
|
|
@@ -234,13 +235,13 @@ class GUIFactory:
|
|
|
234
235
|
(e.g., `tooltip='Click me'`, `disabled=True`).
|
|
235
236
|
"""
|
|
236
237
|
cfg = self.config
|
|
237
|
-
font = (cfg.fonts.font_family, cfg.fonts.button_size, cfg.fonts.button_style)
|
|
238
|
+
font = (cfg.fonts.font_family, cfg.fonts.button_size, cfg.fonts.button_style) # type: ignore
|
|
238
239
|
|
|
239
240
|
style_args = {
|
|
240
|
-
"size": cfg.layout.button_size,
|
|
241
|
+
"size": cfg.layout.button_size, # type: ignore
|
|
241
242
|
"font": font,
|
|
242
|
-
"button_color": (cfg.colors.button_text, cfg.colors.button_background),
|
|
243
|
-
"mouseover_colors": (cfg.colors.button_text, cfg.colors.button_background_hover),
|
|
243
|
+
"button_color": (cfg.colors.button_text, cfg.colors.button_background), # type: ignore
|
|
244
|
+
"mouseover_colors": (cfg.colors.button_text, cfg.colors.button_background_hover), # type: ignore
|
|
244
245
|
"border_width": 0,
|
|
245
246
|
**kwargs
|
|
246
247
|
}
|
|
@@ -257,7 +258,7 @@ class GUIFactory:
|
|
|
257
258
|
(e.g., `title_color='red'`, `relief=sg.RELIEF_SUNKEN`).
|
|
258
259
|
"""
|
|
259
260
|
cfg = self.config
|
|
260
|
-
font = (cfg.fonts.font_family, cfg.fonts.frame_size)
|
|
261
|
+
font = (cfg.fonts.font_family, cfg.fonts.frame_size) # type: ignore
|
|
261
262
|
|
|
262
263
|
style_args = {
|
|
263
264
|
"font": font,
|
|
@@ -289,7 +290,7 @@ class GUIFactory:
|
|
|
289
290
|
"""
|
|
290
291
|
cfg = self.config
|
|
291
292
|
bg_color = sg.theme_background_color()
|
|
292
|
-
label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style)
|
|
293
|
+
label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style) # type: ignore
|
|
293
294
|
|
|
294
295
|
columns = []
|
|
295
296
|
for name, (val_min, val_max) in data_dict.items():
|
|
@@ -298,21 +299,21 @@ class GUIFactory:
|
|
|
298
299
|
|
|
299
300
|
label = sg.Text(name, font=label_font, background_color=bg_color, key=f"_text_{name}")
|
|
300
301
|
|
|
301
|
-
input_style = {"size": cfg.layout.input_size_cont, "justification": "center"}
|
|
302
|
+
input_style = {"size": cfg.layout.input_size_cont, "justification": "center"} # type: ignore
|
|
302
303
|
if is_target:
|
|
303
|
-
input_style["text_color"] = cfg.colors.target_text
|
|
304
|
-
input_style["disabled_readonly_background_color"] = cfg.colors.target_background
|
|
304
|
+
input_style["text_color"] = cfg.colors.target_text # type: ignore
|
|
305
|
+
input_style["disabled_readonly_background_color"] = cfg.colors.target_background # type: ignore
|
|
305
306
|
|
|
306
307
|
element = sg.Input(default_text, key=key, disabled=is_target, **input_style)
|
|
307
308
|
|
|
308
309
|
if is_target:
|
|
309
310
|
layout = [[label], [element]]
|
|
310
311
|
else:
|
|
311
|
-
range_font = (cfg.fonts.font_family, cfg.fonts.range_size)
|
|
312
|
+
range_font = (cfg.fonts.font_family, cfg.fonts.range_size) # type: ignore
|
|
312
313
|
range_text = sg.Text(f"Range: {int(val_min)}-{int(val_max)}", font=range_font, background_color=bg_color)
|
|
313
314
|
layout = [[label], [element], [range_text]]
|
|
314
315
|
|
|
315
|
-
layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)])
|
|
316
|
+
layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
|
|
316
317
|
columns.append(sg.Column(layout, background_color=bg_color))
|
|
317
318
|
|
|
318
319
|
if layout_mode == 'row':
|
|
@@ -340,17 +341,17 @@ class GUIFactory:
|
|
|
340
341
|
"""
|
|
341
342
|
cfg = self.config
|
|
342
343
|
bg_color = sg.theme_background_color()
|
|
343
|
-
label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style)
|
|
344
|
+
label_font = (cfg.fonts.font_family, cfg.fonts.label_size, cfg.fonts.label_style) # type: ignore
|
|
344
345
|
|
|
345
346
|
columns = []
|
|
346
347
|
for name, values in data_dict.items():
|
|
347
348
|
label = sg.Text(name, font=label_font, background_color=bg_color, key=f"_text_{name}")
|
|
348
349
|
element = sg.Combo(
|
|
349
350
|
values, default_value=values[0], key=name,
|
|
350
|
-
size=cfg.layout.input_size_binary, readonly=True
|
|
351
|
+
size=cfg.layout.input_size_binary, readonly=True # type: ignore
|
|
351
352
|
)
|
|
352
353
|
layout = [[label], [element]]
|
|
353
|
-
layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)])
|
|
354
|
+
layout.append([sg.Text(" ", font=(cfg.fonts.font_family, 2), background_color=bg_color)]) # type: ignore
|
|
354
355
|
columns.append(sg.Column(layout, background_color=bg_color))
|
|
355
356
|
|
|
356
357
|
if layout_mode == 'row':
|
|
@@ -370,8 +371,8 @@ class GUIFactory:
|
|
|
370
371
|
**kwargs: Additional arguments to pass to the sg.Window constructor
|
|
371
372
|
(e.g., `location=(100, 100)`, `keep_on_top=True`).
|
|
372
373
|
"""
|
|
373
|
-
cfg = self.config.general
|
|
374
|
-
version = getattr(self.config.meta, 'version', None)
|
|
374
|
+
cfg = self.config.general # type: ignore
|
|
375
|
+
version = getattr(self.config.meta, 'version', None) # type: ignore
|
|
375
376
|
full_title = f"{title} v{version}" if version else title
|
|
376
377
|
|
|
377
378
|
window_args = {
|
|
@@ -406,9 +407,7 @@ def catch_exceptions(show_popup: bool = True):
|
|
|
406
407
|
sg.popup_error("An error occurred:", error_msg, title="Error")
|
|
407
408
|
else:
|
|
408
409
|
# Fallback for non-GUI contexts or if popup is disabled
|
|
409
|
-
|
|
410
|
-
print(error_msg)
|
|
411
|
-
print("-----------------------------")
|
|
410
|
+
_LOGGER.error(error_msg)
|
|
412
411
|
return wrapper
|
|
413
412
|
return decorator
|
|
414
413
|
|
|
@@ -6,6 +6,7 @@ import numpy as np
|
|
|
6
6
|
from .utilities import load_dataframe, list_csv_paths, sanitize_filename, _script_info, merge_dataframes, save_dataframe, threshold_binary_values, make_fullpath
|
|
7
7
|
from plotnine import ggplot, labs, theme, element_blank # type: ignore
|
|
8
8
|
from typing import Optional, Union
|
|
9
|
+
from .logger import _LOGGER
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
@@ -40,7 +41,9 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
|
|
|
40
41
|
if binary_columns is not None:
|
|
41
42
|
invalid_binary_columns = set(binary_columns) - set(df.columns)
|
|
42
43
|
if invalid_binary_columns:
|
|
43
|
-
|
|
44
|
+
_LOGGER.warning(f"⚠️ These 'binary columns' are not in the dataset:")
|
|
45
|
+
for invalid_binary_col in invalid_binary_columns:
|
|
46
|
+
print(f" - {invalid_binary_col}")
|
|
44
47
|
valid_binary_columns = [col for col in binary_columns if col not in invalid_binary_columns]
|
|
45
48
|
for imputed_df in imputed_datasets:
|
|
46
49
|
for binary_column_name in valid_binary_columns:
|
|
@@ -125,7 +128,7 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
|
|
|
125
128
|
plt.savefig(save_path, bbox_inches='tight', format="svg")
|
|
126
129
|
plt.close()
|
|
127
130
|
|
|
128
|
-
|
|
131
|
+
_LOGGER.info(f"{dataset_file_dir} completed.")
|
|
129
132
|
|
|
130
133
|
|
|
131
134
|
# Imputed distributions
|
|
@@ -210,7 +213,7 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
|
|
|
210
213
|
fig = kernel.plot_imputed_distributions(variables=[feature])
|
|
211
214
|
_process_figure(fig, feature)
|
|
212
215
|
|
|
213
|
-
|
|
216
|
+
_LOGGER.info(f"{local_dir_name} completed.")
|
|
214
217
|
|
|
215
218
|
|
|
216
219
|
def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str],
|
|
@@ -240,7 +243,8 @@ def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str]
|
|
|
240
243
|
all_file_paths = list(list_csv_paths(input_path).values())
|
|
241
244
|
|
|
242
245
|
for df_path in all_file_paths:
|
|
243
|
-
df
|
|
246
|
+
df: pd.DataFrame
|
|
247
|
+
df, df_name = load_dataframe(df_path=df_path, kind="pandas") # type: ignore
|
|
244
248
|
|
|
245
249
|
df, df_targets = _skip_targets(df, target_columns)
|
|
246
250
|
|