dragon-ml-toolbox 3.12.6__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- dragon_ml_toolbox-4.1.0.dist-info/METADATA +253 -0
- dragon_ml_toolbox-4.1.0.dist-info/RECORD +30 -0
- ml_tools/ETL_engineering.py +2 -2
- ml_tools/GUI_tools.py +2 -2
- ml_tools/MICE_imputation.py +4 -3
- ml_tools/ML_callbacks.py +8 -4
- ml_tools/ML_evaluation.py +11 -6
- ml_tools/ML_inference.py +131 -0
- ml_tools/ML_trainer.py +17 -8
- ml_tools/PSO_optimization.py +116 -62
- ml_tools/RNN_forecast.py +5 -0
- ml_tools/SQL.py +272 -0
- ml_tools/VIF_factor.py +4 -3
- ml_tools/_logger.py +36 -0
- ml_tools/_pytorch_models.py +1 -1
- ml_tools/_script_info.py +8 -0
- ml_tools/{logger.py → custom_logger.py} +4 -66
- ml_tools/data_exploration.py +2 -66
- ml_tools/datasetmaster.py +3 -2
- ml_tools/ensemble_inference.py +249 -0
- ml_tools/ensemble_learning.py +40 -294
- ml_tools/handle_excel.py +3 -2
- ml_tools/keys.py +13 -2
- ml_tools/path_manager.py +194 -31
- ml_tools/utilities.py +2 -180
- dragon_ml_toolbox-3.12.6.dist-info/METADATA +0 -137
- dragon_ml_toolbox-3.12.6.dist-info/RECORD +0 -26
- ml_tools/ML_tutorial.py +0 -300
- {dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-3.12.6.dist-info → dragon_ml_toolbox-4.1.0.dist-info}/top_level.txt +0 -0
ml_tools/path_manager.py
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
from pprint import pprint
|
|
2
|
-
from typing import Optional, List, Dict,
|
|
2
|
+
from typing import Optional, List, Dict, Union, Literal
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
from .
|
|
4
|
+
import re
|
|
5
|
+
from ._script_info import _script_info
|
|
6
|
+
from ._logger import _LOGGER
|
|
7
|
+
import sys
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
__all__ = [
|
|
9
|
-
"PathManager"
|
|
11
|
+
"PathManager",
|
|
12
|
+
"make_fullpath",
|
|
13
|
+
"sanitize_filename",
|
|
14
|
+
"list_csv_paths",
|
|
15
|
+
"list_files_by_extension",
|
|
10
16
|
]
|
|
11
17
|
|
|
12
18
|
|
|
@@ -14,7 +20,7 @@ class PathManager:
|
|
|
14
20
|
"""
|
|
15
21
|
Manages and stores a project's file paths, acting as a centralized
|
|
16
22
|
"path database". It supports both development mode and applications
|
|
17
|
-
bundled with
|
|
23
|
+
bundled with Pyinstaller.
|
|
18
24
|
|
|
19
25
|
Supports python dictionary syntax.
|
|
20
26
|
"""
|
|
@@ -23,23 +29,14 @@ class PathManager:
|
|
|
23
29
|
anchor_file: str,
|
|
24
30
|
base_directories: Optional[List[str]] = None
|
|
25
31
|
):
|
|
26
|
-
"""
|
|
27
|
-
The initializer determines the project's root directory and can pre-register
|
|
28
|
-
a list of base directories relative to that root.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
anchor_file (str): The absolute path to a file whose parent directory will be considered the package root and name. Typically, `__file__`.
|
|
32
|
-
base_directories (Optional[List[str]]): A list of directory names located at the same level as the anchor file to be registered immediately.
|
|
33
|
-
"""
|
|
34
32
|
resolved_anchor_path = Path(anchor_file).resolve()
|
|
35
33
|
self._package_name = resolved_anchor_path.parent.name
|
|
36
|
-
self._is_bundled,
|
|
34
|
+
self._is_bundled, bundle_root = self._get_bundle_root()
|
|
37
35
|
self._paths: Dict[str, Path] = {}
|
|
38
36
|
|
|
39
37
|
if self._is_bundled:
|
|
40
|
-
# In a bundle,
|
|
41
|
-
|
|
42
|
-
package_root = self._resource_path_func(self._package_name) # type: ignore
|
|
38
|
+
# In a PyInstaller bundle, the package is inside the temp _MEIPASS dir
|
|
39
|
+
package_root = Path(bundle_root) / self._package_name # type: ignore
|
|
43
40
|
else:
|
|
44
41
|
# In dev mode, the package root is the directory containing the anchor file.
|
|
45
42
|
package_root = resolved_anchor_path.parent
|
|
@@ -50,21 +47,21 @@ class PathManager:
|
|
|
50
47
|
# Register all the base directories
|
|
51
48
|
if base_directories:
|
|
52
49
|
for dir_name in base_directories:
|
|
53
|
-
#
|
|
54
|
-
|
|
55
|
-
if self._is_bundled:
|
|
56
|
-
self._paths[dir_name] = self._resource_path_func(self._package_name, dir_name) # type: ignore
|
|
57
|
-
else:
|
|
58
|
-
self._paths[dir_name] = package_root / dir_name
|
|
50
|
+
# This logic works for both dev mode and bundled mode
|
|
51
|
+
self._paths[dir_name] = package_root / dir_name
|
|
59
52
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
53
|
+
def _get_bundle_root(self) -> tuple[bool, Optional[str]]:
|
|
54
|
+
"""
|
|
55
|
+
Checks if the app is running in a PyInstaller bundle and returns the root path.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
A tuple (is_bundled, bundle_root_path). `bundle_root_path` is the
|
|
59
|
+
path to the temporary directory `_MEIPASS` if bundled, otherwise None.
|
|
60
|
+
"""
|
|
61
|
+
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
|
|
62
|
+
# This is the standard way to check for a PyInstaller bundle
|
|
63
|
+
return True, sys._MEIPASS # type: ignore
|
|
64
|
+
else:
|
|
68
65
|
return False, None
|
|
69
66
|
|
|
70
67
|
def get(self, key: str) -> Path:
|
|
@@ -208,5 +205,171 @@ class PathManager:
|
|
|
208
205
|
return self._paths.items()
|
|
209
206
|
|
|
210
207
|
|
|
208
|
+
def make_fullpath(
|
|
209
|
+
input_path: Union[str, Path],
|
|
210
|
+
make: bool = False,
|
|
211
|
+
verbose: bool = False,
|
|
212
|
+
enforce: Optional[Literal["directory", "file"]] = None
|
|
213
|
+
) -> Path:
|
|
214
|
+
"""
|
|
215
|
+
Resolves a string or Path into an absolute Path, optionally creating it.
|
|
216
|
+
|
|
217
|
+
- If the path exists, it is returned.
|
|
218
|
+
- If the path does not exist and `make=True`, it will:
|
|
219
|
+
- Create the file if the path has a suffix
|
|
220
|
+
- Create the directory if it has no suffix
|
|
221
|
+
- If `make=False` and the path does not exist, an error is raised.
|
|
222
|
+
- If `enforce`, raises an error if the resolved path is not what was enforced.
|
|
223
|
+
- Optionally prints whether the resolved path is a file or directory.
|
|
224
|
+
|
|
225
|
+
Parameters:
|
|
226
|
+
input_path (str | Path):
|
|
227
|
+
Path to resolve.
|
|
228
|
+
make (bool):
|
|
229
|
+
If True, attempt to create file or directory.
|
|
230
|
+
verbose (bool):
|
|
231
|
+
Print classification after resolution.
|
|
232
|
+
enforce ("directory" | "file" | None):
|
|
233
|
+
Raises an error if the resolved path is not what was enforced.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Path: Resolved absolute path.
|
|
237
|
+
|
|
238
|
+
Raises:
|
|
239
|
+
ValueError: If the path doesn't exist and can't be created.
|
|
240
|
+
TypeError: If the final path does not match the `enforce` parameter.
|
|
241
|
+
|
|
242
|
+
## 🗒️ Note:
|
|
243
|
+
|
|
244
|
+
Directories with dots will be treated as files.
|
|
245
|
+
|
|
246
|
+
Files without extension will be treated as directories.
|
|
247
|
+
"""
|
|
248
|
+
path = Path(input_path).expanduser()
|
|
249
|
+
|
|
250
|
+
is_file = path.suffix != ""
|
|
251
|
+
|
|
252
|
+
try:
|
|
253
|
+
resolved = path.resolve(strict=True)
|
|
254
|
+
except FileNotFoundError:
|
|
255
|
+
if not make:
|
|
256
|
+
raise ValueError(f"❌ Path does not exist: '{path}'")
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
if is_file:
|
|
260
|
+
# Create parent directories first
|
|
261
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
262
|
+
path.touch(exist_ok=False)
|
|
263
|
+
else:
|
|
264
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
265
|
+
resolved = path.resolve(strict=True)
|
|
266
|
+
except Exception as e:
|
|
267
|
+
raise ValueError(f"❌ Failed to create {'file' if is_file else 'directory'} '{path}': {e}")
|
|
268
|
+
|
|
269
|
+
if enforce == "file" and not resolved.is_file():
|
|
270
|
+
raise TypeError(f"❌ Path was enforced as a file, but it is not: '{resolved}'")
|
|
271
|
+
|
|
272
|
+
if enforce == "directory" and not resolved.is_dir():
|
|
273
|
+
raise TypeError(f"❌ Path was enforced as a directory, but it is not: '{resolved}'")
|
|
274
|
+
|
|
275
|
+
if verbose:
|
|
276
|
+
if resolved.is_file():
|
|
277
|
+
print("📄 Path is a File")
|
|
278
|
+
elif resolved.is_dir():
|
|
279
|
+
print("📁 Path is a Directory")
|
|
280
|
+
else:
|
|
281
|
+
print("❓ Path exists but is neither file nor directory")
|
|
282
|
+
|
|
283
|
+
return resolved
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def sanitize_filename(filename: str) -> str:
|
|
287
|
+
"""
|
|
288
|
+
Sanitizes the name by:
|
|
289
|
+
- Stripping leading/trailing whitespace.
|
|
290
|
+
- Replacing all internal whitespace characters with underscores.
|
|
291
|
+
- Removing or replacing characters invalid in filenames.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
filename (str): Base filename.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
str: A sanitized string suitable to use as a filename.
|
|
298
|
+
"""
|
|
299
|
+
# Strip leading/trailing whitespace
|
|
300
|
+
sanitized = filename.strip()
|
|
301
|
+
|
|
302
|
+
# Replace all whitespace sequences (space, tab, etc.) with underscores
|
|
303
|
+
sanitized = re.sub(r'\s+', '_', sanitized)
|
|
304
|
+
|
|
305
|
+
# Conservative filter to keep filenames safe across platforms
|
|
306
|
+
sanitized = re.sub(r'[^\w\-.]', '', sanitized)
|
|
307
|
+
|
|
308
|
+
# Check for empty string after sanitization
|
|
309
|
+
if not sanitized:
|
|
310
|
+
raise ValueError("The sanitized filename is empty. The original input may have contained only invalid characters.")
|
|
311
|
+
|
|
312
|
+
return sanitized
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def list_csv_paths(directory: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
|
|
316
|
+
"""
|
|
317
|
+
Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
|
|
318
|
+
|
|
319
|
+
Parameters:
|
|
320
|
+
directory (str | Path): Path to the directory containing `.csv` files.
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
(dict[str, Path]): Dictionary mapping {filename: filepath}.
|
|
324
|
+
"""
|
|
325
|
+
dir_path = make_fullpath(directory)
|
|
326
|
+
|
|
327
|
+
csv_paths = list(dir_path.glob("*.csv"))
|
|
328
|
+
if not csv_paths:
|
|
329
|
+
raise IOError(f"❌ No CSV files found in directory: {dir_path.name}")
|
|
330
|
+
|
|
331
|
+
# make a dictionary of paths and names
|
|
332
|
+
name_path_dict = {p.stem: p for p in csv_paths}
|
|
333
|
+
|
|
334
|
+
if verbose:
|
|
335
|
+
_LOGGER.info("🗂️ CSV files found:")
|
|
336
|
+
for name in name_path_dict.keys():
|
|
337
|
+
print(f"\t{name}")
|
|
338
|
+
|
|
339
|
+
return name_path_dict
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def list_files_by_extension(directory: Union[str,Path], extension: str, verbose: bool=True) -> dict[str, Path]:
|
|
343
|
+
"""
|
|
344
|
+
Lists all files with the specified extension in the given directory and returns a mapping:
|
|
345
|
+
filenames (without extensions) to their absolute paths.
|
|
346
|
+
|
|
347
|
+
Parameters:
|
|
348
|
+
directory (str | Path): Path to the directory to search in.
|
|
349
|
+
extension (str): File extension to search for (e.g., 'json', 'txt').
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
(dict[str, Path]): Dictionary mapping {filename: filepath}.
|
|
353
|
+
"""
|
|
354
|
+
dir_path = make_fullpath(directory)
|
|
355
|
+
|
|
356
|
+
# Normalize the extension (remove leading dot if present)
|
|
357
|
+
normalized_ext = extension.lstrip(".").lower()
|
|
358
|
+
pattern = f"*.{normalized_ext}"
|
|
359
|
+
|
|
360
|
+
matched_paths = list(dir_path.glob(pattern))
|
|
361
|
+
if not matched_paths:
|
|
362
|
+
raise IOError(f"❌ No '.{normalized_ext}' files found in directory: {dir_path}")
|
|
363
|
+
|
|
364
|
+
name_path_dict = {p.stem: p for p in matched_paths}
|
|
365
|
+
|
|
366
|
+
if verbose:
|
|
367
|
+
_LOGGER.info(f"\n📂 '{normalized_ext.upper()}' files found:")
|
|
368
|
+
for name in name_path_dict:
|
|
369
|
+
print(f"\t{name}")
|
|
370
|
+
|
|
371
|
+
return name_path_dict
|
|
372
|
+
|
|
373
|
+
|
|
211
374
|
def info():
|
|
212
375
|
_script_info(__all__)
|
ml_tools/utilities.py
CHANGED
|
@@ -3,23 +3,20 @@ import numpy as np
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import polars as pl
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
import re
|
|
7
6
|
from typing import Literal, Union, Sequence, Optional, Any, Iterator, Tuple
|
|
8
7
|
import joblib
|
|
9
8
|
from joblib.externals.loky.process_executor import TerminatedWorkerError
|
|
9
|
+
from .path_manager import sanitize_filename, make_fullpath, list_csv_paths
|
|
10
|
+
from ._script_info import _script_info
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
# Keep track of available tools
|
|
13
14
|
__all__ = [
|
|
14
|
-
"make_fullpath",
|
|
15
|
-
"list_csv_paths",
|
|
16
|
-
"list_files_by_extension",
|
|
17
15
|
"load_dataframe",
|
|
18
16
|
"yield_dataframes_from_dir",
|
|
19
17
|
"merge_dataframes",
|
|
20
18
|
"save_dataframe",
|
|
21
19
|
"normalize_mixed_list",
|
|
22
|
-
"sanitize_filename",
|
|
23
20
|
"threshold_binary_values",
|
|
24
21
|
"threshold_binary_values_batch",
|
|
25
22
|
"serialize_object",
|
|
@@ -29,143 +26,6 @@ __all__ = [
|
|
|
29
26
|
]
|
|
30
27
|
|
|
31
28
|
|
|
32
|
-
def make_fullpath(
|
|
33
|
-
input_path: Union[str, Path],
|
|
34
|
-
make: bool = False,
|
|
35
|
-
verbose: bool = False,
|
|
36
|
-
enforce: Optional[Literal["directory", "file"]] = None
|
|
37
|
-
) -> Path:
|
|
38
|
-
"""
|
|
39
|
-
Resolves a string or Path into an absolute Path, optionally creating it.
|
|
40
|
-
|
|
41
|
-
- If the path exists, it is returned.
|
|
42
|
-
- If the path does not exist and `make=True`, it will:
|
|
43
|
-
- Create the file if the path has a suffix
|
|
44
|
-
- Create the directory if it has no suffix
|
|
45
|
-
- If `make=False` and the path does not exist, an error is raised.
|
|
46
|
-
- If `enforce`, raises an error if the resolved path is not what was enforced.
|
|
47
|
-
- Optionally prints whether the resolved path is a file or directory.
|
|
48
|
-
|
|
49
|
-
Parameters:
|
|
50
|
-
input_path (str | Path):
|
|
51
|
-
Path to resolve.
|
|
52
|
-
make (bool):
|
|
53
|
-
If True, attempt to create file or directory.
|
|
54
|
-
verbose (bool):
|
|
55
|
-
Print classification after resolution.
|
|
56
|
-
enforce ("directory" | "file" | None):
|
|
57
|
-
Raises an error if the resolved path is not what was enforced.
|
|
58
|
-
|
|
59
|
-
Returns:
|
|
60
|
-
Path: Resolved absolute path.
|
|
61
|
-
|
|
62
|
-
Raises:
|
|
63
|
-
ValueError: If the path doesn't exist and can't be created.
|
|
64
|
-
TypeError: If the final path does not match the `enforce` parameter.
|
|
65
|
-
|
|
66
|
-
## 🗒️ Note:
|
|
67
|
-
|
|
68
|
-
Directories with dots will be treated as files.
|
|
69
|
-
|
|
70
|
-
Files without extension will be treated as directories.
|
|
71
|
-
"""
|
|
72
|
-
path = Path(input_path).expanduser()
|
|
73
|
-
|
|
74
|
-
is_file = path.suffix != ""
|
|
75
|
-
|
|
76
|
-
try:
|
|
77
|
-
resolved = path.resolve(strict=True)
|
|
78
|
-
except FileNotFoundError:
|
|
79
|
-
if not make:
|
|
80
|
-
raise ValueError(f"❌ Path does not exist: '{path}'")
|
|
81
|
-
|
|
82
|
-
try:
|
|
83
|
-
if is_file:
|
|
84
|
-
# Create parent directories first
|
|
85
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
86
|
-
path.touch(exist_ok=False)
|
|
87
|
-
else:
|
|
88
|
-
path.mkdir(parents=True, exist_ok=True)
|
|
89
|
-
resolved = path.resolve(strict=True)
|
|
90
|
-
except Exception as e:
|
|
91
|
-
raise ValueError(f"❌ Failed to create {'file' if is_file else 'directory'} '{path}': {e}")
|
|
92
|
-
|
|
93
|
-
if enforce == "file" and not resolved.is_file():
|
|
94
|
-
raise TypeError(f"❌ Path was enforced as a file, but it is not: '{resolved}'")
|
|
95
|
-
|
|
96
|
-
if enforce == "directory" and not resolved.is_dir():
|
|
97
|
-
raise TypeError(f"❌ Path was enforced as a directory, but it is not: '{resolved}'")
|
|
98
|
-
|
|
99
|
-
if verbose:
|
|
100
|
-
if resolved.is_file():
|
|
101
|
-
print("📄 Path is a File")
|
|
102
|
-
elif resolved.is_dir():
|
|
103
|
-
print("📁 Path is a Directory")
|
|
104
|
-
else:
|
|
105
|
-
print("❓ Path exists but is neither file nor directory")
|
|
106
|
-
|
|
107
|
-
return resolved
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def list_csv_paths(directory: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
|
|
111
|
-
"""
|
|
112
|
-
Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
|
|
113
|
-
|
|
114
|
-
Parameters:
|
|
115
|
-
directory (str | Path): Path to the directory containing `.csv` files.
|
|
116
|
-
|
|
117
|
-
Returns:
|
|
118
|
-
(dict[str, Path]): Dictionary mapping {filename: filepath}.
|
|
119
|
-
"""
|
|
120
|
-
dir_path = make_fullpath(directory)
|
|
121
|
-
|
|
122
|
-
csv_paths = list(dir_path.glob("*.csv"))
|
|
123
|
-
if not csv_paths:
|
|
124
|
-
raise IOError(f"❌ No CSV files found in directory: {dir_path.name}")
|
|
125
|
-
|
|
126
|
-
# make a dictionary of paths and names
|
|
127
|
-
name_path_dict = {p.stem: p for p in csv_paths}
|
|
128
|
-
|
|
129
|
-
if verbose:
|
|
130
|
-
print("\n🗂️ CSV files found:")
|
|
131
|
-
for name in name_path_dict.keys():
|
|
132
|
-
print(f"\t{name}")
|
|
133
|
-
|
|
134
|
-
return name_path_dict
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def list_files_by_extension(directory: Union[str,Path], extension: str, verbose: bool=True) -> dict[str, Path]:
|
|
138
|
-
"""
|
|
139
|
-
Lists all files with the specified extension in the given directory and returns a mapping:
|
|
140
|
-
filenames (without extensions) to their absolute paths.
|
|
141
|
-
|
|
142
|
-
Parameters:
|
|
143
|
-
directory (str | Path): Path to the directory to search in.
|
|
144
|
-
extension (str): File extension to search for (e.g., 'json', 'txt').
|
|
145
|
-
|
|
146
|
-
Returns:
|
|
147
|
-
(dict[str, Path]): Dictionary mapping {filename: filepath}.
|
|
148
|
-
"""
|
|
149
|
-
dir_path = make_fullpath(directory)
|
|
150
|
-
|
|
151
|
-
# Normalize the extension (remove leading dot if present)
|
|
152
|
-
normalized_ext = extension.lstrip(".").lower()
|
|
153
|
-
pattern = f"*.{normalized_ext}"
|
|
154
|
-
|
|
155
|
-
matched_paths = list(dir_path.glob(pattern))
|
|
156
|
-
if not matched_paths:
|
|
157
|
-
raise IOError(f"❌ No '.{normalized_ext}' files found in directory: {dir_path}")
|
|
158
|
-
|
|
159
|
-
name_path_dict = {p.stem: p for p in matched_paths}
|
|
160
|
-
|
|
161
|
-
if verbose:
|
|
162
|
-
print(f"\n📂 '{normalized_ext.upper()}' files found:")
|
|
163
|
-
for name in name_path_dict:
|
|
164
|
-
print(f"\t{name}")
|
|
165
|
-
|
|
166
|
-
return name_path_dict
|
|
167
|
-
|
|
168
|
-
|
|
169
29
|
def load_dataframe(
|
|
170
30
|
df_path: Union[str, Path],
|
|
171
31
|
kind: Literal["pandas", "polars"] = "pandas",
|
|
@@ -412,35 +272,6 @@ def normalize_mixed_list(data: list, threshold: int = 2) -> list[float]:
|
|
|
412
272
|
return [x / total for x in adjusted]
|
|
413
273
|
|
|
414
274
|
|
|
415
|
-
def sanitize_filename(filename: str) -> str:
|
|
416
|
-
"""
|
|
417
|
-
Sanitizes the name by:
|
|
418
|
-
- Stripping leading/trailing whitespace.
|
|
419
|
-
- Replacing all internal whitespace characters with underscores.
|
|
420
|
-
- Removing or replacing characters invalid in filenames.
|
|
421
|
-
|
|
422
|
-
Args:
|
|
423
|
-
filename (str): Base filename.
|
|
424
|
-
|
|
425
|
-
Returns:
|
|
426
|
-
str: A sanitized string suitable to use as a filename.
|
|
427
|
-
"""
|
|
428
|
-
# Strip leading/trailing whitespace
|
|
429
|
-
sanitized = filename.strip()
|
|
430
|
-
|
|
431
|
-
# Replace all whitespace sequences (space, tab, etc.) with underscores
|
|
432
|
-
sanitized = re.sub(r'\s+', '_', sanitized)
|
|
433
|
-
|
|
434
|
-
# Conservative filter to keep filenames safe across platforms
|
|
435
|
-
sanitized = re.sub(r'[^\w\-.]', '', sanitized)
|
|
436
|
-
|
|
437
|
-
# Check for empty string after sanitization
|
|
438
|
-
if not sanitized:
|
|
439
|
-
raise ValueError("The sanitized filename is empty. The original input may have contained only invalid characters.")
|
|
440
|
-
|
|
441
|
-
return sanitized
|
|
442
|
-
|
|
443
|
-
|
|
444
275
|
def threshold_binary_values(
|
|
445
276
|
input_array: Union[Sequence[float], np.ndarray, pd.Series, pl.Series],
|
|
446
277
|
binary_values: Optional[int] = None
|
|
@@ -675,14 +506,5 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
|
|
|
675
506
|
print(f"\n✅ {total_saved} single-target datasets were created.")
|
|
676
507
|
|
|
677
508
|
|
|
678
|
-
def _script_info(all_data: list[str]):
|
|
679
|
-
"""
|
|
680
|
-
List available names.
|
|
681
|
-
"""
|
|
682
|
-
print("Available functions and objects:")
|
|
683
|
-
for i, name in enumerate(all_data, start=1):
|
|
684
|
-
print(f"{i} - {name}")
|
|
685
|
-
|
|
686
|
-
|
|
687
509
|
def info():
|
|
688
510
|
_script_info(__all__)
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: dragon-ml-toolbox
|
|
3
|
-
Version: 3.12.6
|
|
4
|
-
Summary: A collection of tools for data science and machine learning projects.
|
|
5
|
-
Author-email: Karl Loza <luigiloza@gmail.com>
|
|
6
|
-
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
|
|
8
|
-
Project-URL: Changelog, https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.10
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
License-File: LICENSE-THIRD-PARTY.md
|
|
15
|
-
Requires-Dist: numpy<2.0
|
|
16
|
-
Requires-Dist: scikit-learn
|
|
17
|
-
Requires-Dist: openpyxl
|
|
18
|
-
Requires-Dist: miceforest>=6.0.0
|
|
19
|
-
Requires-Dist: plotnine>=0.12
|
|
20
|
-
Requires-Dist: matplotlib
|
|
21
|
-
Requires-Dist: seaborn
|
|
22
|
-
Requires-Dist: pandas
|
|
23
|
-
Requires-Dist: polars
|
|
24
|
-
Requires-Dist: imbalanced-learn
|
|
25
|
-
Requires-Dist: statsmodels
|
|
26
|
-
Requires-Dist: ipython
|
|
27
|
-
Requires-Dist: ipykernel
|
|
28
|
-
Requires-Dist: notebook
|
|
29
|
-
Requires-Dist: jupyterlab
|
|
30
|
-
Requires-Dist: ipywidgets
|
|
31
|
-
Requires-Dist: joblib
|
|
32
|
-
Requires-Dist: xgboost
|
|
33
|
-
Requires-Dist: lightgbm<=4.5.0
|
|
34
|
-
Requires-Dist: shap
|
|
35
|
-
Requires-Dist: tqdm>=4.0
|
|
36
|
-
Requires-Dist: Pillow
|
|
37
|
-
Provides-Extra: pytorch
|
|
38
|
-
Requires-Dist: torch; extra == "pytorch"
|
|
39
|
-
Requires-Dist: torchvision; extra == "pytorch"
|
|
40
|
-
Provides-Extra: gui
|
|
41
|
-
Requires-Dist: FreeSimpleGUI>=5.2; extra == "gui"
|
|
42
|
-
Dynamic: license-file
|
|
43
|
-
|
|
44
|
-
# dragon-ml-toolbox
|
|
45
|
-
|
|
46
|
-
A collection of Python utilities for data science and machine learning, structured as a modular package for easy reuse and installation.
|
|
47
|
-
|
|
48
|
-
## Features
|
|
49
|
-
|
|
50
|
-
- Modular scripts for data exploration, logging, machine learning, and more.
|
|
51
|
-
- Designed for seamless integration as a Git submodule or installable Python package.
|
|
52
|
-
|
|
53
|
-
## Installation
|
|
54
|
-
|
|
55
|
-
**Python 3.10+ recommended.**
|
|
56
|
-
|
|
57
|
-
### Via PyPI
|
|
58
|
-
|
|
59
|
-
Install the latest stable release from PyPI:
|
|
60
|
-
|
|
61
|
-
```bash
|
|
62
|
-
pip install dragon-ml-toolbox
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
### Via GitHub (Editable)
|
|
66
|
-
|
|
67
|
-
Clone the repository and install in editable mode with optional dependencies:
|
|
68
|
-
|
|
69
|
-
```bash
|
|
70
|
-
git clone https://github.com/DrAg0n-BoRn/ML_tools.git
|
|
71
|
-
cd ML_tools
|
|
72
|
-
pip install -e .
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
### Via conda-forge
|
|
76
|
-
|
|
77
|
-
Install from the conda-forge channel:
|
|
78
|
-
|
|
79
|
-
```bash
|
|
80
|
-
conda install -c conda-forge dragon-ml-toolbox
|
|
81
|
-
```
|
|
82
|
-
**Note:** This version is outdated or broken due to dependency incompatibilities. Use PyPi instead.
|
|
83
|
-
|
|
84
|
-
## Optional dependencies
|
|
85
|
-
|
|
86
|
-
### FreeSimpleGUI
|
|
87
|
-
|
|
88
|
-
Wrapper library used to build powerful GUIs. Requires the tkinter backend.
|
|
89
|
-
|
|
90
|
-
```bash
|
|
91
|
-
pip install dragon-ml-toolbox[gui]
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
### PyTorch
|
|
95
|
-
|
|
96
|
-
Different builds available depending on the **platform** and **hardware acceleration** (e.g., CUDA for NVIDIA GPUs on Linux/Windows, or MPS for Apple Silicon on macOS).
|
|
97
|
-
|
|
98
|
-
Install the default CPU-only version with
|
|
99
|
-
|
|
100
|
-
```bash
|
|
101
|
-
pip install dragon-ml-toolbox[pytorch]
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
To make use of GPU acceleration use the official PyTorch installation instructions:
|
|
105
|
-
|
|
106
|
-
[PyTorch Instructions](https://pytorch.org/get-started/locally/)
|
|
107
|
-
|
|
108
|
-
## Usage
|
|
109
|
-
|
|
110
|
-
After installation, import modules like this:
|
|
111
|
-
|
|
112
|
-
```python
|
|
113
|
-
from ml_tools.utilities import sanitize_filename
|
|
114
|
-
from ml_tools.logger import custom_logger
|
|
115
|
-
```
|
|
116
|
-
|
|
117
|
-
## Available modules
|
|
118
|
-
|
|
119
|
-
```bash
|
|
120
|
-
data_exploration
|
|
121
|
-
datasetmaster
|
|
122
|
-
ensemble_learning
|
|
123
|
-
ETL_engineering
|
|
124
|
-
GUI_tools
|
|
125
|
-
handle_excel
|
|
126
|
-
logger
|
|
127
|
-
MICE_imputation
|
|
128
|
-
ML_callbacks
|
|
129
|
-
ML_evaluation
|
|
130
|
-
ML_trainer
|
|
131
|
-
ML_tutorial
|
|
132
|
-
path_manager
|
|
133
|
-
PSO_optimization
|
|
134
|
-
RNN_forecast
|
|
135
|
-
utilities
|
|
136
|
-
VIF_factor
|
|
137
|
-
```
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
dragon_ml_toolbox-3.12.6.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
|
|
2
|
-
dragon_ml_toolbox-3.12.6.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
|
|
3
|
-
ml_tools/ETL_engineering.py,sha256=yeZsW_7zRvEcuMZbM4E2GV1dxwBoWIeJAcFFk2AK0fY,39502
|
|
4
|
-
ml_tools/GUI_tools.py,sha256=dkB2nxY1RxR_PDw6h2virWc9KR0E_C7elzVZ36uFJVM,45406
|
|
5
|
-
ml_tools/MICE_imputation.py,sha256=7CDsIQxx5Jb_DwPAmWmz3FXcn85sUyH7g9UcZ1_E07s,11412
|
|
6
|
-
ml_tools/ML_callbacks.py,sha256=g_9nSzoA22UJOQZCPKeDz-Ayh0ECFZLzRd6rZ8SokrE,13080
|
|
7
|
-
ml_tools/ML_evaluation.py,sha256=oiDV6HItQloUUKCUpltV-2pogubWLBieGpc-VUwosAQ,10106
|
|
8
|
-
ml_tools/ML_trainer.py,sha256=gGXAu65v_5yYCqKqmHpSLJ3yY0M_Scr_nJ6qHBHSK1k,14487
|
|
9
|
-
ml_tools/ML_tutorial.py,sha256=m5mZPULhO4mOpfp32fM_mUNVduv-S2hoKNbsZObNI4k,12233
|
|
10
|
-
ml_tools/PSO_optimization.py,sha256=1wRM-goZSwCji5LQVDP1VjF0LyGN5-QWBvofbwfjQRQ,24780
|
|
11
|
-
ml_tools/RNN_forecast.py,sha256=IZLcPs3by0Chei7ill_Grjxs7BBUnzau0Oavi3dWiyE,1886
|
|
12
|
-
ml_tools/VIF_factor.py,sha256=gD3sZ9HBdTHlf4gbvUvx6kKczO_JFxMZKTXw1h0KVCg,10365
|
|
13
|
-
ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
ml_tools/_pytorch_models.py,sha256=bpWZsrSwCvHJQkR6UfoPpElsMv9AvmiNErNHC8NYB_I,10132
|
|
15
|
-
ml_tools/data_exploration.py,sha256=ZpjK_lN5mDhjf9iQpvyYNA2SF7M5q4D5m09saln7YFI,25241
|
|
16
|
-
ml_tools/datasetmaster.py,sha256=S3PKHNQZ9cyAOck8xQltVLZhaD1gFLfgHFL-aRjz4JU,30077
|
|
17
|
-
ml_tools/ensemble_learning.py,sha256=D-9IbOKtCvyAB-LbPu3sdSRtdp0RZIcQEZcyMnarHmQ,45758
|
|
18
|
-
ml_tools/handle_excel.py,sha256=2Q_MBArss4emPQ8p-Uj9x_e7wGg3OoYM2AU_HG59UCY,12978
|
|
19
|
-
ml_tools/keys.py,sha256=3YVbcYARSjE3vKr_6PavJSf7vXvlos7szu3qva4T3Ts,781
|
|
20
|
-
ml_tools/logger.py,sha256=UkbiU9ihBhw9VKyn3rZzisdClWV94EBV6B09_D0iUU0,6026
|
|
21
|
-
ml_tools/path_manager.py,sha256=1LD9JFzqVyJQl2kTA7tK930_IV3qxfiV4cMIBzItytY,8309
|
|
22
|
-
ml_tools/utilities.py,sha256=Vh4ZdI03g8EpgQL7KDwnAw2vtBlHtx6KxCuAATxLvT4,24208
|
|
23
|
-
dragon_ml_toolbox-3.12.6.dist-info/METADATA,sha256=qhni3r9Wsp7cx0WORT1m-xd3P_iqM7wdq08WIdC0g2U,3274
|
|
24
|
-
dragon_ml_toolbox-3.12.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
25
|
-
dragon_ml_toolbox-3.12.6.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
26
|
-
dragon_ml_toolbox-3.12.6.dist-info/RECORD,,
|