dragon-ml-toolbox 19.10.0__py3-none-any.whl → 19.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/RECORD +19 -19
- ml_tools/ML_callbacks.py +8 -4
- ml_tools/_core/_MICE_imputation.py +2 -2
- ml_tools/_core/_ML_callbacks.py +461 -171
- ml_tools/_core/_ML_trainer.py +50 -50
- ml_tools/_core/_ML_utilities.py +153 -50
- ml_tools/_core/_PSO_optimization.py +1 -1
- ml_tools/_core/_ensemble_inference.py +1 -1
- ml_tools/_core/_keys.py +32 -1
- ml_tools/_core/_optimization_tools.py +1 -1
- ml_tools/_core/_path_manager.py +149 -27
- ml_tools/_core/_utilities.py +6 -2
- ml_tools/keys.py +2 -0
- ml_tools/path_manager.py +5 -1
- {dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/top_level.txt +0 -0
ml_tools/_core/_path_manager.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from pprint import pprint
|
|
2
1
|
from typing import Optional, List, Dict, Union, Literal
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
import re
|
|
5
4
|
import sys
|
|
5
|
+
import shutil
|
|
6
6
|
|
|
7
7
|
from ._script_info import _script_info
|
|
8
8
|
from ._logger import get_logger
|
|
@@ -17,7 +17,9 @@ __all__ = [
|
|
|
17
17
|
"sanitize_filename",
|
|
18
18
|
"list_csv_paths",
|
|
19
19
|
"list_files_by_extension",
|
|
20
|
-
"list_subdirectories"
|
|
20
|
+
"list_subdirectories",
|
|
21
|
+
"clean_directory",
|
|
22
|
+
"safe_move",
|
|
21
23
|
]
|
|
22
24
|
|
|
23
25
|
|
|
@@ -436,35 +438,28 @@ def sanitize_filename(filename: str) -> str:
|
|
|
436
438
|
return sanitized
|
|
437
439
|
|
|
438
440
|
|
|
439
|
-
def list_csv_paths(directory: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
|
|
441
|
+
def list_csv_paths(directory: Union[str, Path], verbose: bool = True, raise_on_empty: bool = True) -> dict[str, Path]:
|
|
440
442
|
"""
|
|
441
443
|
Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
|
|
442
444
|
|
|
443
445
|
Parameters:
|
|
444
446
|
directory (str | Path): Path to the directory containing `.csv` files.
|
|
447
|
+
verbose (bool): If True, prints found files.
|
|
448
|
+
raise_on_empty (bool): If True, raises IOError if no files are found.
|
|
445
449
|
|
|
446
450
|
Returns:
|
|
447
451
|
(dict[str, Path]): Dictionary mapping {filename: filepath}.
|
|
448
452
|
"""
|
|
449
|
-
|
|
453
|
+
# wraps the more general function
|
|
454
|
+
return list_files_by_extension(directory=directory, extension="csv", verbose=verbose, raise_on_empty=raise_on_empty)
|
|
450
455
|
|
|
451
|
-
csv_paths = list(dir_path.glob("*.csv"))
|
|
452
|
-
if not csv_paths:
|
|
453
|
-
_LOGGER.error(f"No CSV files found in directory: {dir_path.name}")
|
|
454
|
-
raise IOError()
|
|
455
|
-
|
|
456
|
-
# make a dictionary of paths and names
|
|
457
|
-
name_path_dict = {p.stem: p for p in csv_paths}
|
|
458
|
-
|
|
459
|
-
if verbose:
|
|
460
|
-
_LOGGER.info("🗂️ CSV files found:")
|
|
461
|
-
for name in name_path_dict.keys():
|
|
462
|
-
print(f"\t{name}")
|
|
463
|
-
|
|
464
|
-
return name_path_dict
|
|
465
456
|
|
|
466
|
-
|
|
467
|
-
|
|
457
|
+
def list_files_by_extension(
|
|
458
|
+
directory: Union[str, Path],
|
|
459
|
+
extension: str,
|
|
460
|
+
verbose: bool = True,
|
|
461
|
+
raise_on_empty: bool = True
|
|
462
|
+
) -> dict[str, Path]:
|
|
468
463
|
"""
|
|
469
464
|
Lists all files with the specified extension in the given directory and returns a mapping:
|
|
470
465
|
filenames (without extensions) to their absolute paths.
|
|
@@ -472,20 +467,29 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
|
|
|
472
467
|
Parameters:
|
|
473
468
|
directory (str | Path): Path to the directory to search in.
|
|
474
469
|
extension (str): File extension to search for (e.g., 'json', 'txt').
|
|
470
|
+
verbose (bool): If True, logs the files found.
|
|
471
|
+
raise_on_empty (bool): If True, raises IOError if no matching files are found.
|
|
475
472
|
|
|
476
473
|
Returns:
|
|
477
|
-
(dict[str, Path]): Dictionary mapping {filename: filepath}.
|
|
474
|
+
(dict[str, Path]): Dictionary mapping {filename: filepath}. Returns empty dict if none found and raise_on_empty is False.
|
|
478
475
|
"""
|
|
479
|
-
dir_path = make_fullpath(directory)
|
|
476
|
+
dir_path = make_fullpath(directory, enforce="directory")
|
|
480
477
|
|
|
481
478
|
# Normalize the extension (remove leading dot if present)
|
|
482
479
|
normalized_ext = extension.lstrip(".").lower()
|
|
483
480
|
pattern = f"*.{normalized_ext}"
|
|
484
481
|
|
|
485
482
|
matched_paths = list(dir_path.glob(pattern))
|
|
483
|
+
|
|
486
484
|
if not matched_paths:
|
|
487
|
-
|
|
488
|
-
|
|
485
|
+
msg = f"No '.{normalized_ext}' files found in directory: {dir_path}."
|
|
486
|
+
if raise_on_empty:
|
|
487
|
+
_LOGGER.error(msg)
|
|
488
|
+
raise IOError()
|
|
489
|
+
else:
|
|
490
|
+
if verbose:
|
|
491
|
+
_LOGGER.warning(msg)
|
|
492
|
+
return {}
|
|
489
493
|
|
|
490
494
|
name_path_dict = {p.stem: p for p in matched_paths}
|
|
491
495
|
|
|
@@ -497,13 +501,18 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
|
|
|
497
501
|
return name_path_dict
|
|
498
502
|
|
|
499
503
|
|
|
500
|
-
def list_subdirectories(
|
|
504
|
+
def list_subdirectories(
|
|
505
|
+
root_dir: Union[str, Path],
|
|
506
|
+
verbose: bool = True,
|
|
507
|
+
raise_on_empty: bool = True
|
|
508
|
+
) -> dict[str, Path]:
|
|
501
509
|
"""
|
|
502
510
|
Scans a directory and returns a dictionary of its immediate subdirectories.
|
|
503
511
|
|
|
504
512
|
Args:
|
|
505
513
|
root_dir (str | Path): The path to the directory to scan.
|
|
506
514
|
verbose (bool): If True, prints the number of directories found.
|
|
515
|
+
raise_on_empty (bool): If True, raises IOError if no subdirectories are found.
|
|
507
516
|
|
|
508
517
|
Returns:
|
|
509
518
|
dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
|
|
@@ -513,8 +522,14 @@ def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[s
|
|
|
513
522
|
directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
|
|
514
523
|
|
|
515
524
|
if len(directories) < 1:
|
|
516
|
-
|
|
517
|
-
|
|
525
|
+
msg = f"No subdirectories found inside '{root_path}'"
|
|
526
|
+
if raise_on_empty:
|
|
527
|
+
_LOGGER.error(msg)
|
|
528
|
+
raise IOError()
|
|
529
|
+
else:
|
|
530
|
+
if verbose:
|
|
531
|
+
_LOGGER.warning(msg)
|
|
532
|
+
return {}
|
|
518
533
|
|
|
519
534
|
if verbose:
|
|
520
535
|
count = len(directories)
|
|
@@ -529,5 +544,112 @@ def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[s
|
|
|
529
544
|
return dir_map
|
|
530
545
|
|
|
531
546
|
|
|
547
|
+
def clean_directory(directory: Union[str, Path], verbose: bool = False) -> None:
|
|
548
|
+
"""
|
|
549
|
+
⚠️ DANGER: DESTRUCTIVE OPERATION ⚠️
|
|
550
|
+
|
|
551
|
+
Deletes all files and subdirectories inside the specified directory. It is designed to empty a folder, not delete the folder itself.
|
|
552
|
+
|
|
553
|
+
Safety: It skips hidden files and directories (those starting with a period '.'). This works for macOS/Linux hidden files and dot-config folders on Windows.
|
|
554
|
+
|
|
555
|
+
Args:
|
|
556
|
+
directory (str | Path): The directory path to clean.
|
|
557
|
+
verbose (bool): If True, prints the name of each top-level item deleted.
|
|
558
|
+
"""
|
|
559
|
+
target_dir = make_fullpath(directory, enforce="directory")
|
|
560
|
+
|
|
561
|
+
if verbose:
|
|
562
|
+
_LOGGER.warning(f"Starting cleanup of directory: {target_dir}")
|
|
563
|
+
|
|
564
|
+
for item in target_dir.iterdir():
|
|
565
|
+
# Safety Check: Skip hidden files/dirs
|
|
566
|
+
if item.name.startswith("."):
|
|
567
|
+
continue
|
|
568
|
+
|
|
569
|
+
try:
|
|
570
|
+
if item.is_file() or item.is_symlink():
|
|
571
|
+
item.unlink()
|
|
572
|
+
if verbose:
|
|
573
|
+
print(f" 🗑️ Deleted file: {item.name}")
|
|
574
|
+
elif item.is_dir():
|
|
575
|
+
shutil.rmtree(item)
|
|
576
|
+
if verbose:
|
|
577
|
+
print(f" 🗑️ Deleted directory: {item.name}")
|
|
578
|
+
except Exception as e:
|
|
579
|
+
_LOGGER.warning(f"Failed to delete item '{item.name}': {e}")
|
|
580
|
+
continue
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def safe_move(
|
|
584
|
+
source: Union[str, Path],
|
|
585
|
+
final_destination: Union[str, Path],
|
|
586
|
+
rename: Optional[str] = None,
|
|
587
|
+
overwrite: bool = False
|
|
588
|
+
) -> Path:
|
|
589
|
+
"""
|
|
590
|
+
Moves a file or directory to a destination directory with safety checks.
|
|
591
|
+
|
|
592
|
+
Features:
|
|
593
|
+
- Supports optional renaming (sanitized automatically).
|
|
594
|
+
- PRESERVES file extensions during renaming (cannot be modified).
|
|
595
|
+
- Prevents accidental overwrites unless explicit.
|
|
596
|
+
|
|
597
|
+
Args:
|
|
598
|
+
source (str | Path): The file or directory to move.
|
|
599
|
+
final_destination (str | Path): The destination DIRECTORY where the item will be moved. It will be created if it does not exist.
|
|
600
|
+
rename (Optional[str]): If provided, the moved item will be renamed to this. Note: For files, the extension is strictly preserved.
|
|
601
|
+
overwrite (bool): If True, overwrites the destination path if it exists.
|
|
602
|
+
|
|
603
|
+
Returns:
|
|
604
|
+
Path: The new absolute path of the moved item.
|
|
605
|
+
"""
|
|
606
|
+
# 1. Validation and Setup
|
|
607
|
+
src_path = make_fullpath(source, make=False)
|
|
608
|
+
|
|
609
|
+
# Ensure destination directory exists
|
|
610
|
+
dest_dir_path = make_fullpath(final_destination, make=True, enforce="directory")
|
|
611
|
+
|
|
612
|
+
# 2. Determine Target Name
|
|
613
|
+
if rename:
|
|
614
|
+
sanitized_name = sanitize_filename(rename)
|
|
615
|
+
if src_path.is_file():
|
|
616
|
+
# Strict Extension Preservation
|
|
617
|
+
final_name = f"{sanitized_name}{src_path.suffix}"
|
|
618
|
+
else:
|
|
619
|
+
final_name = sanitized_name
|
|
620
|
+
else:
|
|
621
|
+
final_name = src_path.name
|
|
622
|
+
|
|
623
|
+
final_path = dest_dir_path / final_name
|
|
624
|
+
|
|
625
|
+
# 3. Safety Checks (Collision Detection)
|
|
626
|
+
if final_path.exists():
|
|
627
|
+
if not overwrite:
|
|
628
|
+
_LOGGER.error(f"Destination already exists: '{final_path}'. Use overwrite=True to force.")
|
|
629
|
+
raise FileExistsError()
|
|
630
|
+
|
|
631
|
+
# Smart Overwrite Handling
|
|
632
|
+
if final_path.is_dir():
|
|
633
|
+
if src_path.is_file():
|
|
634
|
+
_LOGGER.error(f"Cannot overwrite directory '{final_path}' with file '{src_path}'")
|
|
635
|
+
raise IsADirectoryError()
|
|
636
|
+
# If overwriting a directory, we must remove the old one first to avoid nesting/errors
|
|
637
|
+
shutil.rmtree(final_path)
|
|
638
|
+
else:
|
|
639
|
+
# Destination is a file
|
|
640
|
+
if src_path.is_dir():
|
|
641
|
+
_LOGGER.error(f"Cannot overwrite file '{final_path}' with directory '{src_path}'")
|
|
642
|
+
raise FileExistsError()
|
|
643
|
+
final_path.unlink()
|
|
644
|
+
|
|
645
|
+
# 4. Perform Move
|
|
646
|
+
try:
|
|
647
|
+
shutil.move(str(src_path), str(final_path))
|
|
648
|
+
return final_path
|
|
649
|
+
except Exception as e:
|
|
650
|
+
_LOGGER.exception(f"Failed to move '{src_path}' to '{final_path}'")
|
|
651
|
+
raise e
|
|
652
|
+
|
|
653
|
+
|
|
532
654
|
def info():
|
|
533
655
|
_script_info(__all__)
|
ml_tools/_core/_utilities.py
CHANGED
|
@@ -166,8 +166,12 @@ def load_dataframe_greedy(directory: Union[str, Path],
|
|
|
166
166
|
dir_path = make_fullpath(directory, enforce="directory")
|
|
167
167
|
|
|
168
168
|
# list all csv files and grab one (should be the only one)
|
|
169
|
-
csv_dict = list_csv_paths(directory=dir_path, verbose=False)
|
|
169
|
+
csv_dict = list_csv_paths(directory=dir_path, verbose=False, raise_on_empty=True)
|
|
170
170
|
|
|
171
|
+
# explicitly check that there is only one csv file
|
|
172
|
+
if len(csv_dict) > 1:
|
|
173
|
+
_LOGGER.warning(f"Multiple CSV files found in '{dir_path}'. Only one will be loaded.")
|
|
174
|
+
|
|
171
175
|
for df_path in csv_dict.values():
|
|
172
176
|
df , _df_name = load_dataframe(df_path=df_path,
|
|
173
177
|
use_columns=use_columns,
|
|
@@ -260,7 +264,7 @@ def yield_dataframes_from_dir(datasets_dir: Union[str,Path], verbose: bool=True)
|
|
|
260
264
|
- Output is streamed via a generator to support lazy loading of multiple datasets.
|
|
261
265
|
"""
|
|
262
266
|
datasets_path = make_fullpath(datasets_dir)
|
|
263
|
-
files_dict = list_csv_paths(datasets_path, verbose=verbose)
|
|
267
|
+
files_dict = list_csv_paths(datasets_path, verbose=verbose, raise_on_empty=True)
|
|
264
268
|
for df_name, df_path in files_dict.items():
|
|
265
269
|
df: pd.DataFrame
|
|
266
270
|
df, _ = load_dataframe(df_path, kind="pandas", verbose=verbose) # type: ignore
|
ml_tools/keys.py
CHANGED
|
@@ -2,10 +2,12 @@ from ._core._keys import (
|
|
|
2
2
|
PyTorchInferenceKeys as InferenceKeys,
|
|
3
3
|
_CheckpointCallbackKeys as CheckpointCallbackKeys,
|
|
4
4
|
_FinalizedFileKeys as FinalizedFileKeys,
|
|
5
|
+
_PublicTaskKeys as TaskKeys,
|
|
5
6
|
)
|
|
6
7
|
|
|
7
8
|
__all__ = [
|
|
8
9
|
"InferenceKeys",
|
|
9
10
|
"CheckpointCallbackKeys",
|
|
10
11
|
"FinalizedFileKeys",
|
|
12
|
+
"TaskKeys",
|
|
11
13
|
]
|
ml_tools/path_manager.py
CHANGED
|
@@ -5,6 +5,8 @@ from ._core._path_manager import (
|
|
|
5
5
|
list_csv_paths,
|
|
6
6
|
list_files_by_extension,
|
|
7
7
|
list_subdirectories,
|
|
8
|
+
clean_directory,
|
|
9
|
+
safe_move,
|
|
8
10
|
info
|
|
9
11
|
)
|
|
10
12
|
|
|
@@ -14,5 +16,7 @@ __all__ = [
|
|
|
14
16
|
"sanitize_filename",
|
|
15
17
|
"list_csv_paths",
|
|
16
18
|
"list_files_by_extension",
|
|
17
|
-
"list_subdirectories"
|
|
19
|
+
"list_subdirectories",
|
|
20
|
+
"clean_directory",
|
|
21
|
+
"safe_move",
|
|
18
22
|
]
|
|
File without changes
|
{dragon_ml_toolbox-19.10.0.dist-info → dragon_ml_toolbox-19.12.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|