dragon-ml-toolbox 19.10.0__py3-none-any.whl → 19.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
- from pprint import pprint
2
1
  from typing import Optional, List, Dict, Union, Literal
3
2
  from pathlib import Path
4
3
  import re
5
4
  import sys
5
+ import shutil
6
6
 
7
7
  from ._script_info import _script_info
8
8
  from ._logger import get_logger
@@ -17,7 +17,9 @@ __all__ = [
17
17
  "sanitize_filename",
18
18
  "list_csv_paths",
19
19
  "list_files_by_extension",
20
- "list_subdirectories"
20
+ "list_subdirectories",
21
+ "clean_directory",
22
+ "safe_move",
21
23
  ]
22
24
 
23
25
 
@@ -436,35 +438,28 @@ def sanitize_filename(filename: str) -> str:
436
438
  return sanitized
437
439
 
438
440
 
439
- def list_csv_paths(directory: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
441
+ def list_csv_paths(directory: Union[str, Path], verbose: bool = True, raise_on_empty: bool = True) -> dict[str, Path]:
440
442
  """
441
443
  Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
442
444
 
443
445
  Parameters:
444
446
  directory (str | Path): Path to the directory containing `.csv` files.
447
+ verbose (bool): If True, prints found files.
448
+ raise_on_empty (bool): If True, raises IOError if no files are found.
445
449
 
446
450
  Returns:
447
451
  (dict[str, Path]): Dictionary mapping {filename: filepath}.
448
452
  """
449
- dir_path = make_fullpath(directory)
453
+ # wraps the more general function
454
+ return list_files_by_extension(directory=directory, extension="csv", verbose=verbose, raise_on_empty=raise_on_empty)
450
455
 
451
- csv_paths = list(dir_path.glob("*.csv"))
452
- if not csv_paths:
453
- _LOGGER.error(f"No CSV files found in directory: {dir_path.name}")
454
- raise IOError()
455
-
456
- # make a dictionary of paths and names
457
- name_path_dict = {p.stem: p for p in csv_paths}
458
-
459
- if verbose:
460
- _LOGGER.info("🗂️ CSV files found:")
461
- for name in name_path_dict.keys():
462
- print(f"\t{name}")
463
-
464
- return name_path_dict
465
456
 
466
-
467
- def list_files_by_extension(directory: Union[str,Path], extension: str, verbose: bool=True) -> dict[str, Path]:
457
+ def list_files_by_extension(
458
+ directory: Union[str, Path],
459
+ extension: str,
460
+ verbose: bool = True,
461
+ raise_on_empty: bool = True
462
+ ) -> dict[str, Path]:
468
463
  """
469
464
  Lists all files with the specified extension in the given directory and returns a mapping:
470
465
  filenames (without extensions) to their absolute paths.
@@ -472,20 +467,29 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
472
467
  Parameters:
473
468
  directory (str | Path): Path to the directory to search in.
474
469
  extension (str): File extension to search for (e.g., 'json', 'txt').
470
+ verbose (bool): If True, logs the files found.
471
+ raise_on_empty (bool): If True, raises IOError if no matching files are found.
475
472
 
476
473
  Returns:
477
- (dict[str, Path]): Dictionary mapping {filename: filepath}.
474
+ (dict[str, Path]): Dictionary mapping {filename: filepath}. Returns empty dict if none found and raise_on_empty is False.
478
475
  """
479
- dir_path = make_fullpath(directory)
476
+ dir_path = make_fullpath(directory, enforce="directory")
480
477
 
481
478
  # Normalize the extension (remove leading dot if present)
482
479
  normalized_ext = extension.lstrip(".").lower()
483
480
  pattern = f"*.{normalized_ext}"
484
481
 
485
482
  matched_paths = list(dir_path.glob(pattern))
483
+
486
484
  if not matched_paths:
487
- _LOGGER.error(f"No '.{normalized_ext}' files found in directory: {dir_path}.")
488
- raise IOError()
485
+ msg = f"No '.{normalized_ext}' files found in directory: {dir_path}."
486
+ if raise_on_empty:
487
+ _LOGGER.error(msg)
488
+ raise IOError()
489
+ else:
490
+ if verbose:
491
+ _LOGGER.warning(msg)
492
+ return {}
489
493
 
490
494
  name_path_dict = {p.stem: p for p in matched_paths}
491
495
 
@@ -497,13 +501,18 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
497
501
  return name_path_dict
498
502
 
499
503
 
500
- def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
504
+ def list_subdirectories(
505
+ root_dir: Union[str, Path],
506
+ verbose: bool = True,
507
+ raise_on_empty: bool = True
508
+ ) -> dict[str, Path]:
501
509
  """
502
510
  Scans a directory and returns a dictionary of its immediate subdirectories.
503
511
 
504
512
  Args:
505
513
  root_dir (str | Path): The path to the directory to scan.
506
514
  verbose (bool): If True, prints the number of directories found.
515
+ raise_on_empty (bool): If True, raises IOError if no subdirectories are found.
507
516
 
508
517
  Returns:
509
518
  dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
@@ -513,8 +522,14 @@ def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[s
513
522
  directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
514
523
 
515
524
  if len(directories) < 1:
516
- _LOGGER.error(f"No subdirectories found inside '{root_path}'")
517
- raise IOError()
525
+ msg = f"No subdirectories found inside '{root_path}'"
526
+ if raise_on_empty:
527
+ _LOGGER.error(msg)
528
+ raise IOError()
529
+ else:
530
+ if verbose:
531
+ _LOGGER.warning(msg)
532
+ return {}
518
533
 
519
534
  if verbose:
520
535
  count = len(directories)
@@ -529,5 +544,112 @@ def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[s
529
544
  return dir_map
530
545
 
531
546
 
547
+ def clean_directory(directory: Union[str, Path], verbose: bool = False) -> None:
548
+ """
549
+ ⚠️ DANGER: DESTRUCTIVE OPERATION ⚠️
550
+
551
+ Deletes all files and subdirectories inside the specified directory. It is designed to empty a folder, not delete the folder itself.
552
+
553
+ Safety: It skips hidden files and directories (those starting with a period '.'). This works for macOS/Linux hidden files and dot-config folders on Windows.
554
+
555
+ Args:
556
+ directory (str | Path): The directory path to clean.
557
+ verbose (bool): If True, prints the name of each top-level item deleted.
558
+ """
559
+ target_dir = make_fullpath(directory, enforce="directory")
560
+
561
+ if verbose:
562
+ _LOGGER.warning(f"Starting cleanup of directory: {target_dir}")
563
+
564
+ for item in target_dir.iterdir():
565
+ # Safety Check: Skip hidden files/dirs
566
+ if item.name.startswith("."):
567
+ continue
568
+
569
+ try:
570
+ if item.is_file() or item.is_symlink():
571
+ item.unlink()
572
+ if verbose:
573
+ print(f" 🗑️ Deleted file: {item.name}")
574
+ elif item.is_dir():
575
+ shutil.rmtree(item)
576
+ if verbose:
577
+ print(f" 🗑️ Deleted directory: {item.name}")
578
+ except Exception as e:
579
+ _LOGGER.warning(f"Failed to delete item '{item.name}': {e}")
580
+ continue
581
+
582
+
583
+ def safe_move(
584
+ source: Union[str, Path],
585
+ final_destination: Union[str, Path],
586
+ rename: Optional[str] = None,
587
+ overwrite: bool = False
588
+ ) -> Path:
589
+ """
590
+ Moves a file or directory to a destination directory with safety checks.
591
+
592
+ Features:
593
+ - Supports optional renaming (sanitized automatically).
594
+ - PRESERVES file extensions during renaming (cannot be modified).
595
+ - Prevents accidental overwrites unless explicit.
596
+
597
+ Args:
598
+ source (str | Path): The file or directory to move.
599
+ final_destination (str | Path): The destination DIRECTORY where the item will be moved. It will be created if it does not exist.
600
+ rename (Optional[str]): If provided, the moved item will be renamed to this. Note: For files, the extension is strictly preserved.
601
+ overwrite (bool): If True, overwrites the destination path if it exists.
602
+
603
+ Returns:
604
+ Path: The new absolute path of the moved item.
605
+ """
606
+ # 1. Validation and Setup
607
+ src_path = make_fullpath(source, make=False)
608
+
609
+ # Ensure destination directory exists
610
+ dest_dir_path = make_fullpath(final_destination, make=True, enforce="directory")
611
+
612
+ # 2. Determine Target Name
613
+ if rename:
614
+ sanitized_name = sanitize_filename(rename)
615
+ if src_path.is_file():
616
+ # Strict Extension Preservation
617
+ final_name = f"{sanitized_name}{src_path.suffix}"
618
+ else:
619
+ final_name = sanitized_name
620
+ else:
621
+ final_name = src_path.name
622
+
623
+ final_path = dest_dir_path / final_name
624
+
625
+ # 3. Safety Checks (Collision Detection)
626
+ if final_path.exists():
627
+ if not overwrite:
628
+ _LOGGER.error(f"Destination already exists: '{final_path}'. Use overwrite=True to force.")
629
+ raise FileExistsError()
630
+
631
+ # Smart Overwrite Handling
632
+ if final_path.is_dir():
633
+ if src_path.is_file():
634
+ _LOGGER.error(f"Cannot overwrite directory '{final_path}' with file '{src_path}'")
635
+ raise IsADirectoryError()
636
+ # If overwriting a directory, we must remove the old one first to avoid nesting/errors
637
+ shutil.rmtree(final_path)
638
+ else:
639
+ # Destination is a file
640
+ if src_path.is_dir():
641
+ _LOGGER.error(f"Cannot overwrite file '{final_path}' with directory '{src_path}'")
642
+ raise FileExistsError()
643
+ final_path.unlink()
644
+
645
+ # 4. Perform Move
646
+ try:
647
+ shutil.move(str(src_path), str(final_path))
648
+ return final_path
649
+ except Exception as e:
650
+ _LOGGER.exception(f"Failed to move '{src_path}' to '{final_path}'")
651
+ raise e
652
+
653
+
532
654
  def info():
533
655
  _script_info(__all__)
@@ -166,8 +166,12 @@ def load_dataframe_greedy(directory: Union[str, Path],
166
166
  dir_path = make_fullpath(directory, enforce="directory")
167
167
 
168
168
  # list all csv files and grab one (should be the only one)
169
- csv_dict = list_csv_paths(directory=dir_path, verbose=False)
169
+ csv_dict = list_csv_paths(directory=dir_path, verbose=False, raise_on_empty=True)
170
170
 
171
+ # explicitly check that there is only one csv file
172
+ if len(csv_dict) > 1:
173
+ _LOGGER.warning(f"Multiple CSV files found in '{dir_path}'. Only one will be loaded.")
174
+
171
175
  for df_path in csv_dict.values():
172
176
  df , _df_name = load_dataframe(df_path=df_path,
173
177
  use_columns=use_columns,
@@ -260,7 +264,7 @@ def yield_dataframes_from_dir(datasets_dir: Union[str,Path], verbose: bool=True)
260
264
  - Output is streamed via a generator to support lazy loading of multiple datasets.
261
265
  """
262
266
  datasets_path = make_fullpath(datasets_dir)
263
- files_dict = list_csv_paths(datasets_path, verbose=verbose)
267
+ files_dict = list_csv_paths(datasets_path, verbose=verbose, raise_on_empty=True)
264
268
  for df_name, df_path in files_dict.items():
265
269
  df: pd.DataFrame
266
270
  df, _ = load_dataframe(df_path, kind="pandas", verbose=verbose) # type: ignore
ml_tools/keys.py CHANGED
@@ -2,10 +2,12 @@ from ._core._keys import (
2
2
  PyTorchInferenceKeys as InferenceKeys,
3
3
  _CheckpointCallbackKeys as CheckpointCallbackKeys,
4
4
  _FinalizedFileKeys as FinalizedFileKeys,
5
+ _PublicTaskKeys as TaskKeys,
5
6
  )
6
7
 
7
8
  __all__ = [
8
9
  "InferenceKeys",
9
10
  "CheckpointCallbackKeys",
10
11
  "FinalizedFileKeys",
12
+ "TaskKeys",
11
13
  ]
ml_tools/path_manager.py CHANGED
@@ -5,6 +5,8 @@ from ._core._path_manager import (
5
5
  list_csv_paths,
6
6
  list_files_by_extension,
7
7
  list_subdirectories,
8
+ clean_directory,
9
+ safe_move,
8
10
  info
9
11
  )
10
12
 
@@ -14,5 +16,7 @@ __all__ = [
14
16
  "sanitize_filename",
15
17
  "list_csv_paths",
16
18
  "list_files_by_extension",
17
- "list_subdirectories"
19
+ "list_subdirectories",
20
+ "clean_directory",
21
+ "safe_move",
18
22
  ]