dragon-ml-toolbox 19.10.0__tar.gz → 19.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. {dragon_ml_toolbox-19.10.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-19.11.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_MICE_imputation.py +2 -2
  4. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_utilities.py +153 -50
  5. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_PSO_optimization.py +1 -1
  6. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ensemble_inference.py +1 -1
  7. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_optimization_tools.py +1 -1
  8. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_path_manager.py +38 -25
  9. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_utilities.py +6 -2
  10. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/pyproject.toml +1 -1
  11. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/LICENSE +0 -0
  12. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/LICENSE-THIRD-PARTY.md +0 -0
  13. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/README.md +0 -0
  14. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  15. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  16. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  17. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  18. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ETL_cleaning.py +0 -0
  19. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ETL_engineering.py +0 -0
  20. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/GUI_tools.py +0 -0
  21. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/IO_tools.py +0 -0
  22. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/MICE_imputation.py +0 -0
  23. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_callbacks.py +0 -0
  24. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_chaining_inference.py +0 -0
  25. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_chaining_utilities.py +0 -0
  26. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_configuration.py +0 -0
  27. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_configuration_pytab.py +0 -0
  28. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_datasetmaster.py +0 -0
  29. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_evaluation.py +0 -0
  30. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_evaluation_captum.py +0 -0
  31. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_evaluation_multi.py +0 -0
  32. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_finalize_handler.py +0 -0
  33. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_inference.py +0 -0
  34. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_models.py +0 -0
  35. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_models_advanced.py +0 -0
  36. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_models_pytab.py +0 -0
  37. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_optimization.py +0 -0
  38. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_optimization_pareto.py +0 -0
  39. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_scaler.py +0 -0
  40. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_sequence_datasetmaster.py +0 -0
  41. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_sequence_evaluation.py +0 -0
  42. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_sequence_inference.py +0 -0
  43. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_sequence_models.py +0 -0
  44. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_trainer.py +0 -0
  45. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_utilities.py +0 -0
  46. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_vision_datasetmaster.py +0 -0
  47. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_vision_evaluation.py +0 -0
  48. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_vision_inference.py +0 -0
  49. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_vision_models.py +0 -0
  50. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ML_vision_transformers.py +0 -0
  51. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/PSO_optimization.py +0 -0
  52. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/SQL.py +0 -0
  53. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/VIF_factor.py +0 -0
  54. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/__init__.py +0 -0
  55. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ETL_cleaning.py +0 -0
  56. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ETL_engineering.py +0 -0
  57. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_GUI_tools.py +0 -0
  58. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_IO_tools.py +0 -0
  59. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_callbacks.py +0 -0
  60. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_chaining_inference.py +0 -0
  61. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_chaining_utilities.py +0 -0
  62. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_configuration.py +0 -0
  63. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_configuration_pytab.py +0 -0
  64. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_datasetmaster.py +0 -0
  65. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_evaluation.py +0 -0
  66. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_evaluation_captum.py +0 -0
  67. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_evaluation_multi.py +0 -0
  68. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_finalize_handler.py +0 -0
  69. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_inference.py +0 -0
  70. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_models.py +0 -0
  71. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_models_advanced.py +0 -0
  72. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_models_pytab.py +0 -0
  73. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_optimization.py +0 -0
  74. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_optimization_pareto.py +0 -0
  75. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_scaler.py +0 -0
  76. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_sequence_datasetmaster.py +0 -0
  77. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_sequence_evaluation.py +0 -0
  78. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_sequence_inference.py +0 -0
  79. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_sequence_models.py +0 -0
  80. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_trainer.py +0 -0
  81. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_vision_datasetmaster.py +0 -0
  82. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_vision_evaluation.py +0 -0
  83. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_vision_inference.py +0 -0
  84. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_vision_models.py +0 -0
  85. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ML_vision_transformers.py +0 -0
  86. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_SQL.py +0 -0
  87. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_VIF_factor.py +0 -0
  88. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/__init__.py +0 -0
  89. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_data_exploration.py +0 -0
  90. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ensemble_evaluation.py +0 -0
  91. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_ensemble_learning.py +0 -0
  92. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_excel_handler.py +0 -0
  93. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_keys.py +0 -0
  94. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_logger.py +0 -0
  95. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_math_utilities.py +0 -0
  96. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_models_advanced_base.py +0 -0
  97. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_models_advanced_helpers.py +0 -0
  98. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_plot_fonts.py +0 -0
  99. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_schema.py +0 -0
  100. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_script_info.py +0 -0
  101. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/_core/_serde.py +0 -0
  102. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/constants.py +0 -0
  103. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/data_exploration.py +0 -0
  104. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ensemble_evaluation.py +0 -0
  105. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ensemble_inference.py +0 -0
  106. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/ensemble_learning.py +0 -0
  107. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/excel_handler.py +0 -0
  108. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/keys.py +0 -0
  109. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/math_utilities.py +0 -0
  110. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/optimization_tools.py +0 -0
  111. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/path_manager.py +0 -0
  112. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/plot_fonts.py +0 -0
  113. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/schema.py +0 -0
  114. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/serde.py +0 -0
  115. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/ml_tools/utilities.py +0 -0
  116. {dragon_ml_toolbox-19.10.0 → dragon_ml_toolbox-19.11.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 19.10.0
3
+ Version: 19.11.0
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 19.10.0
3
+ Version: 19.11.0
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -256,7 +256,7 @@ def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str]
256
256
  if input_path.is_file():
257
257
  all_file_paths = [input_path]
258
258
  else:
259
- all_file_paths = list(list_csv_paths(input_path).values())
259
+ all_file_paths = list(list_csv_paths(input_path, raise_on_empty=True).values())
260
260
 
261
261
  for df_path in all_file_paths:
262
262
  df: pd.DataFrame
@@ -461,7 +461,7 @@ class DragonMICE:
461
461
  if input_path.is_file():
462
462
  all_file_paths = [input_path]
463
463
  else:
464
- all_file_paths = list(list_csv_paths(input_path).values())
464
+ all_file_paths = list(list_csv_paths(input_path, raise_on_empty=True).values())
465
465
 
466
466
  for df_path in all_file_paths:
467
467
 
@@ -46,17 +46,24 @@ class ArtifactFinder:
46
46
  └── FeatureSchema.json (Required if `load_schema` is True)
47
47
  ```
48
48
  """
49
- def __init__(self, directory: Union[str, Path], load_scaler: bool, load_schema: bool) -> None:
49
+ def __init__(self,
50
+ directory: Union[str, Path],
51
+ load_scaler: bool,
52
+ load_schema: bool,
53
+ strict: bool=False,
54
+ verbose: bool=True) -> None:
50
55
  """
51
56
  Args:
52
57
  directory (str | Path): The path to the directory that contains training artifacts.
53
58
  load_scaler (bool): If True, requires and searches for a scaler file `scaler_*.pth`.
54
59
  load_schema (bool): If True, requires and searches for a FeatureSchema file `FeatureSchema.json`.
60
+ strict (bool): If True, raises an error if any artifact is missing. If False, returns None for missing artifacts silently.
61
+ verbose (bool): Displays the missing artifacts in the directory or a success message.
55
62
  """
56
63
  # validate directory
57
64
  dir_path = make_fullpath(directory, enforce="directory")
58
65
 
59
- parsing_dict = _find_model_artifacts(target_directory=dir_path, load_scaler=load_scaler, verbose=False)
66
+ parsing_dict = _find_model_artifacts(target_directory=dir_path, load_scaler=load_scaler, verbose=False, strict=strict)
60
67
 
61
68
  self._weights_path = parsing_dict[PytorchArtifactPathKeys.WEIGHTS_PATH]
62
69
  self._feature_names_path = parsing_dict[PytorchArtifactPathKeys.FEATURES_PATH]
@@ -64,71 +71,121 @@ class ArtifactFinder:
64
71
  self._model_architecture_path = parsing_dict[PytorchArtifactPathKeys.ARCHITECTURE_PATH]
65
72
  self._scaler_path = None
66
73
  self._schema = None
74
+ self._strict = strict
67
75
 
68
76
  if load_scaler:
69
77
  self._scaler_path = parsing_dict[PytorchArtifactPathKeys.SCALER_PATH]
70
78
 
71
79
  if load_schema:
72
- self._schema = FeatureSchema.from_json(directory=dir_path)
80
+ try:
81
+ self._schema = FeatureSchema.from_json(directory=dir_path)
82
+ except Exception:
83
+ if strict:
84
+ # FeatureSchema logs its own error details
85
+ # _LOGGER.error(f"Failed to load FeatureSchema from '{dir_path.name}': {e}")
86
+ raise FileNotFoundError()
87
+ else:
88
+ # _LOGGER.warning(f"Could not load FeatureSchema from '{dir_path.name}': {e}")
89
+ self._schema = None
90
+
91
+ # Process feature names
92
+ if self._feature_names_path is not None:
93
+ self._feature_names = self._process_text(self._feature_names_path)
94
+ else:
95
+ self._feature_names = None
96
+ # Process target names
97
+ if self._target_names_path is not None:
98
+ self._target_names = self._process_text(self._target_names_path)
99
+ else:
100
+ self._target_names = None
101
+
102
+ if verbose:
103
+ # log missing artifacts
104
+ missing_artifacts = []
105
+ if self._feature_names is None:
106
+ missing_artifacts.append("Feature Names")
107
+ if self._target_names is None:
108
+ missing_artifacts.append("Target Names")
109
+ if self._weights_path is None:
110
+ missing_artifacts.append("Weights File")
111
+ if self._model_architecture_path is None:
112
+ missing_artifacts.append("Model Architecture File")
113
+ if load_scaler and self._scaler_path is None:
114
+ missing_artifacts.append("Scaler File")
115
+ if load_schema and self._schema is None:
116
+ missing_artifacts.append("FeatureSchema File")
117
+
118
+ if missing_artifacts:
119
+ _LOGGER.warning(f"Missing artifacts in '{dir_path.name}': {', '.join(missing_artifacts)}.")
120
+ else:
121
+ _LOGGER.info(f"All artifacts successfully loaded from '{dir_path.name}'.")
73
122
 
74
- # Process text files
75
- self._feature_names = self._process_text(self._feature_names_path)
76
- self._target_names = self._process_text(self._target_names_path)
77
-
78
123
  def _process_text(self, text_file_path: Path):
79
124
  list_strings = load_list_strings(text_file=text_file_path, verbose=False)
80
125
  return list_strings
81
126
 
82
127
  @property
83
- def feature_names(self) -> list[str]:
128
+ def feature_names(self) -> Union[list[str], None]:
84
129
  """Returns the feature names as a list of strings."""
130
+ if self._strict and not self._feature_names:
131
+ _LOGGER.error("No feature names loaded for Strict mode.")
132
+ raise ValueError()
85
133
  return self._feature_names
86
134
 
87
135
  @property
88
- def target_names(self) -> list[str]:
136
+ def target_names(self) -> Union[list[str], None]:
89
137
  """Returns the target names as a list of strings."""
138
+ if self._strict and not self._target_names:
139
+ _LOGGER.error("No target names loaded for Strict mode.")
140
+ raise ValueError()
90
141
  return self._target_names
91
142
 
92
143
  @property
93
- def weights_path(self) -> Path:
144
+ def weights_path(self) -> Union[Path, None]:
94
145
  """Returns the path to the state dictionary pth file."""
146
+ if self._strict and self._weights_path is None:
147
+ _LOGGER.error("No weights file loaded for Strict mode.")
148
+ raise ValueError()
95
149
  return self._weights_path
96
150
 
97
151
  @property
98
- def model_architecture_path(self) -> Path:
152
+ def model_architecture_path(self) -> Union[Path, None]:
99
153
  """Returns the path to the model architecture json file."""
154
+ if self._strict and self._model_architecture_path is None:
155
+ _LOGGER.error("No model architecture file loaded for Strict mode.")
156
+ raise ValueError()
100
157
  return self._model_architecture_path
101
158
 
102
159
  @property
103
- def scaler_path(self) -> Path:
160
+ def scaler_path(self) -> Union[Path, None]:
104
161
  """Returns the path to the scaler file."""
105
- if self._scaler_path is None:
106
- _LOGGER.error("No scaler file loaded. Set 'load_scaler=True'.")
162
+ if self._strict and self._scaler_path is None:
163
+ _LOGGER.error("No scaler file loaded for Strict mode.")
107
164
  raise ValueError()
108
165
  else:
109
166
  return self._scaler_path
110
167
 
111
168
  @property
112
- def feature_schema(self) -> FeatureSchema:
169
+ def feature_schema(self) -> Union[FeatureSchema, None]:
113
170
  """Returns the FeatureSchema object."""
114
- if self._schema is None:
115
- _LOGGER.error("No FeatureSchema loaded. Set 'load_schema=True'.")
171
+ if self._strict and self._schema is None:
172
+ _LOGGER.error("No FeatureSchema loaded for Strict mode.")
116
173
  raise ValueError()
117
174
  else:
118
175
  return self._schema
119
176
 
120
177
  def __repr__(self) -> str:
121
- dir_name = self._weights_path.parent.name
122
- n_features = len(self._feature_names)
123
- n_targets = len(self._target_names)
178
+ dir_name = self._weights_path.parent.name if self._weights_path else "Unknown"
179
+ n_features = len(self._feature_names) if self._feature_names else "None"
180
+ n_targets = len(self._target_names) if self._target_names else "None"
124
181
  scaler_status = self._scaler_path.name if self._scaler_path else "None"
125
182
  schema_status = "Loaded" if self._schema else "None"
126
183
 
127
184
  return (
128
185
  f"{self.__class__.__name__}\n"
129
186
  f" directory='{dir_name}'\n"
130
- f" weights='{self._weights_path.name}'\n"
131
- f" architecture='{self._model_architecture_path.name}'\n"
187
+ f" weights='{self._weights_path.name if self._weights_path else 'None'}'\n"
188
+ f" architecture='{self._model_architecture_path.name if self._model_architecture_path else 'None'}'\n"
132
189
  f" scaler='{scaler_status}'\n"
133
190
  f" schema='{schema_status}'\n"
134
191
  f" features={n_features}\n"
@@ -136,7 +193,7 @@ class ArtifactFinder:
136
193
  )
137
194
 
138
195
 
139
- def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, verbose: bool=False) -> dict[str, Path]:
196
+ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, verbose: bool=True, strict:bool=True) -> dict[str, Union[Path, None]]:
140
197
  """
141
198
  Scans a directory to find paths to model weights, target names, feature names, and model architecture. Optionally an scaler path if `load_scaler` is True.
142
199
 
@@ -155,41 +212,70 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
155
212
  target_directory (str | Path): The path to the directory that contains training artifacts.
156
213
  load_scaler (bool): If True, the function requires and searches for a scaler file `scaler_*.pth`.
157
214
  verbose (bool): If True, enables detailed logging during the search process.
215
+ strict (bool): If True, raises errors on missing files. If False, returns None for missing files.
158
216
  """
159
217
  # validate directory
160
218
  dir_path = make_fullpath(target_directory, enforce="directory")
161
219
  dir_name = dir_path.name
162
220
 
163
221
  # find files
164
- model_pth_dict = list_files_by_extension(directory=dir_path, extension="pth", verbose=verbose)
222
+ model_pth_dict = list_files_by_extension(directory=dir_path, extension="pth", verbose=False, raise_on_empty=False)
165
223
 
166
- # restriction
167
- if load_scaler:
168
- if len(model_pth_dict) != 2:
169
- _LOGGER.error(f"Directory '{dir_name}' should contain exactly 2 '.pth' files: scaler and weights.")
170
- raise IOError()
171
- else:
172
- if len(model_pth_dict) != 1:
173
- _LOGGER.error(f"Directory '{dir_name}' should contain exactly 1 '.pth' file for weights.")
224
+ if not model_pth_dict:
225
+ pth_msg=f"No '.pth' files found in directory: {dir_name}."
226
+ if strict:
227
+ _LOGGER.error(pth_msg)
174
228
  raise IOError()
229
+ else:
230
+ if verbose:
231
+ _LOGGER.warning(pth_msg)
232
+ model_pth_dict = None
233
+
234
+ # restriction
235
+ if model_pth_dict is not None:
236
+ valid_count = False
237
+ msg = ""
238
+
239
+ if load_scaler:
240
+ if len(model_pth_dict) == 2:
241
+ valid_count = True
242
+ else:
243
+ msg = f"Directory '{dir_name}' should contain exactly 2 '.pth' files: scaler and weights. Found {len(model_pth_dict)}."
244
+ else:
245
+ if len(model_pth_dict) == 1:
246
+ valid_count = True
247
+ else:
248
+ msg = f"Directory '{dir_name}' should contain exactly 1 '.pth' file for weights. Found {len(model_pth_dict)}."
249
+
250
+ # Respect strict mode for count mismatch
251
+ if not valid_count:
252
+ if strict:
253
+ _LOGGER.error(msg)
254
+ raise IOError()
255
+ else:
256
+ if verbose:
257
+ _LOGGER.warning(msg)
258
+ # Invalidate dictionary
259
+ model_pth_dict = None
175
260
 
176
261
  ##### Scaler and Weights #####
177
262
  scaler_path = None
178
263
  weights_path = None
179
264
 
180
265
  # load weights and scaler if present
181
- for pth_filename, pth_path in model_pth_dict.items():
182
- if load_scaler and pth_filename.lower().startswith(DatasetKeys.SCALER_PREFIX):
183
- scaler_path = pth_path
184
- else:
185
- weights_path = pth_path
266
+ if model_pth_dict is not None:
267
+ for pth_filename, pth_path in model_pth_dict.items():
268
+ if load_scaler and pth_filename.lower().startswith(DatasetKeys.SCALER_PREFIX):
269
+ scaler_path = pth_path
270
+ else:
271
+ weights_path = pth_path
186
272
 
187
273
  # validation
188
- if not weights_path:
274
+ if not weights_path and strict:
189
275
  _LOGGER.error(f"Error parsing the model weights path from '{dir_name}'")
190
276
  raise IOError()
191
277
 
192
- if load_scaler and not scaler_path:
278
+ if strict and load_scaler and not scaler_path:
193
279
  _LOGGER.error(f"Error parsing the scaler path from '{dir_name}'")
194
280
  raise IOError()
195
281
 
@@ -198,32 +284,44 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
198
284
  feature_names_path = None
199
285
 
200
286
  # load feature and target names
201
- model_txt_dict = list_files_by_extension(directory=dir_path, extension="txt", verbose=verbose)
287
+ model_txt_dict = list_files_by_extension(directory=dir_path, extension="txt", verbose=False, raise_on_empty=False)
202
288
 
289
+ # if the directory has no txt files, the loop is skipped
203
290
  for txt_filename, txt_path in model_txt_dict.items():
204
291
  if txt_filename == DatasetKeys.FEATURE_NAMES:
205
292
  feature_names_path = txt_path
206
293
  elif txt_filename == DatasetKeys.TARGET_NAMES:
207
294
  target_names_path = txt_path
208
295
 
209
- # validation
210
- if not target_names_path or not feature_names_path:
211
- _LOGGER.error(f"Error parsing features path or targets path from '{dir_name}'")
296
+ # validation per case
297
+ if strict and not target_names_path:
298
+ _LOGGER.error(f"Error parsing the target names path from '{dir_name}'")
212
299
  raise IOError()
300
+ elif verbose and not target_names_path:
301
+ _LOGGER.warning(f"Target names file not found in '{dir_name}'.")
213
302
 
303
+ if strict and not feature_names_path:
304
+ _LOGGER.error(f"Error parsing the feature names path from '{dir_name}'")
305
+ raise IOError()
306
+ elif verbose and not feature_names_path:
307
+ _LOGGER.warning(f"Feature names file not found in '{dir_name}'.")
308
+
214
309
  ##### load model architecture path #####
215
310
  architecture_path = None
216
311
 
217
- model_json_dict = list_files_by_extension(directory=dir_path, extension="json", verbose=verbose)
312
+ model_json_dict = list_files_by_extension(directory=dir_path, extension="json", verbose=False, raise_on_empty=False)
218
313
 
314
+ # if the directory has no json files, the loop is skipped
219
315
  for json_filename, json_path in model_json_dict.items():
220
316
  if json_filename == PytorchModelArchitectureKeys.SAVENAME:
221
317
  architecture_path = json_path
222
318
 
223
319
  # validation
224
- if not architecture_path:
320
+ if strict and not architecture_path:
225
321
  _LOGGER.error(f"Error parsing the model architecture path from '{dir_name}'")
226
322
  raise IOError()
323
+ elif verbose and not architecture_path:
324
+ _LOGGER.warning(f"Model architecture file not found in '{dir_name}'.")
227
325
 
228
326
  ##### Paths dictionary #####
229
327
  parsing_dict = {
@@ -233,7 +331,7 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
233
331
  PytorchArtifactPathKeys.TARGETS_PATH: target_names_path,
234
332
  }
235
333
 
236
- if scaler_path is not None:
334
+ if load_scaler:
237
335
  parsing_dict[PytorchArtifactPathKeys.SCALER_PATH] = scaler_path
238
336
 
239
337
  return parsing_dict
@@ -246,6 +344,9 @@ def find_model_artifacts_multi(target_directory: Union[str,Path], load_scaler: b
246
344
  This function operates on a specific directory structure. It expects the
247
345
  `target_directory` to contain one or more subdirectories, where each
248
346
  subdirectory represents a single trained model result.
347
+
348
+ This function works using a strict mode, meaning that it will raise errors if
349
+ any required artifacts are missing in a model's subdirectory.
249
350
 
250
351
  The expected directory structure for each model is as follows:
251
352
  ```
@@ -278,14 +379,16 @@ def find_model_artifacts_multi(target_directory: Union[str,Path], load_scaler: b
278
379
  all_artifacts: list[dict[str, Path]] = list()
279
380
 
280
381
  # find model directories
281
- result_dirs_dict = list_subdirectories(root_dir=root_path, verbose=verbose)
382
+ result_dirs_dict = list_subdirectories(root_dir=root_path, verbose=verbose, raise_on_empty=True)
282
383
  for _dir_name, dir_path in result_dirs_dict.items():
283
384
 
284
385
  parsing_dict = _find_model_artifacts(target_directory=dir_path,
285
386
  load_scaler=load_scaler,
286
- verbose=verbose)
387
+ verbose=verbose,
388
+ strict=True)
287
389
 
288
- all_artifacts.append(parsing_dict)
390
+ # parsing_dict is guaranteed to have all required paths due to strict=True
391
+ all_artifacts.append(parsing_dict) # type: ignore
289
392
 
290
393
  return all_artifacts
291
394
 
@@ -721,7 +824,7 @@ def select_features_by_shap(
721
824
  root_path = make_fullpath(root_directory, enforce="directory")
722
825
 
723
826
  # --- Step 2: Directory and File Discovery ---
724
- subdirectories = list_subdirectories(root_dir=root_path, verbose=False)
827
+ subdirectories = list_subdirectories(root_dir=root_path, verbose=False, raise_on_empty=True)
725
828
 
726
829
  shap_filename = SHAPKeys.SAVENAME + ".csv"
727
830
 
@@ -169,7 +169,7 @@ def multiple_objective_functions_from_dir(directory: Union[str,Path], add_noise:
169
169
  """
170
170
  objective_functions = list()
171
171
  objective_function_names = list()
172
- for file_name, file_path in list_files_by_extension(directory=directory, extension='joblib').items():
172
+ for file_name, file_path in list_files_by_extension(directory=directory, extension='joblib', raise_on_empty=True).items():
173
173
  current_objective = ObjectiveFunction(trained_model_path=file_path,
174
174
  add_noise=add_noise,
175
175
  task=task,
@@ -42,7 +42,7 @@ class DragonEnsembleInferenceHandler:
42
42
  self.verbose = verbose
43
43
  self._feature_names: Optional[List[str]] = None
44
44
 
45
- model_files = list_files_by_extension(directory=models_dir, extension="joblib")
45
+ model_files = list_files_by_extension(directory=models_dir, extension="joblib", raise_on_empty=True)
46
46
 
47
47
  for fname, fpath in model_files.items():
48
48
  try:
@@ -269,7 +269,7 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path],
269
269
  output_path = make_fullpath(results_path / "DistributionPlots", make=True)
270
270
 
271
271
  # Check that the directory contains csv files
272
- list_csv_paths(results_path, verbose=False)
272
+ list_csv_paths(results_path, verbose=False, raise_on_empty=True)
273
273
 
274
274
  # --- Data Loading and Preparation ---
275
275
  _LOGGER.debug(f"📁 Starting analysis from results in: '{results_dir}'")
@@ -436,35 +436,28 @@ def sanitize_filename(filename: str) -> str:
436
436
  return sanitized
437
437
 
438
438
 
439
- def list_csv_paths(directory: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
439
+ def list_csv_paths(directory: Union[str, Path], verbose: bool = True, raise_on_empty: bool = True) -> dict[str, Path]:
440
440
  """
441
441
  Lists all `.csv` files in the specified directory and returns a mapping: filenames (without extensions) to their absolute paths.
442
442
 
443
443
  Parameters:
444
444
  directory (str | Path): Path to the directory containing `.csv` files.
445
+ verbose (bool): If True, prints found files.
446
+ raise_on_empty (bool): If True, raises IOError if no files are found.
445
447
 
446
448
  Returns:
447
449
  (dict[str, Path]): Dictionary mapping {filename: filepath}.
448
450
  """
449
- dir_path = make_fullpath(directory)
451
+ # wraps the more general function
452
+ return list_files_by_extension(directory=directory, extension="csv", verbose=verbose, raise_on_empty=raise_on_empty)
450
453
 
451
- csv_paths = list(dir_path.glob("*.csv"))
452
- if not csv_paths:
453
- _LOGGER.error(f"No CSV files found in directory: {dir_path.name}")
454
- raise IOError()
455
-
456
- # make a dictionary of paths and names
457
- name_path_dict = {p.stem: p for p in csv_paths}
458
-
459
- if verbose:
460
- _LOGGER.info("🗂️ CSV files found:")
461
- for name in name_path_dict.keys():
462
- print(f"\t{name}")
463
454
 
464
- return name_path_dict
465
-
466
-
467
- def list_files_by_extension(directory: Union[str,Path], extension: str, verbose: bool=True) -> dict[str, Path]:
455
+ def list_files_by_extension(
456
+ directory: Union[str, Path],
457
+ extension: str,
458
+ verbose: bool = True,
459
+ raise_on_empty: bool = True
460
+ ) -> dict[str, Path]:
468
461
  """
469
462
  Lists all files with the specified extension in the given directory and returns a mapping:
470
463
  filenames (without extensions) to their absolute paths.
@@ -472,20 +465,29 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
472
465
  Parameters:
473
466
  directory (str | Path): Path to the directory to search in.
474
467
  extension (str): File extension to search for (e.g., 'json', 'txt').
468
+ verbose (bool): If True, logs the files found.
469
+ raise_on_empty (bool): If True, raises IOError if no matching files are found.
475
470
 
476
471
  Returns:
477
- (dict[str, Path]): Dictionary mapping {filename: filepath}.
472
+ (dict[str, Path]): Dictionary mapping {filename: filepath}. Returns empty dict if none found and raise_on_empty is False.
478
473
  """
479
- dir_path = make_fullpath(directory)
474
+ dir_path = make_fullpath(directory, enforce="directory")
480
475
 
481
476
  # Normalize the extension (remove leading dot if present)
482
477
  normalized_ext = extension.lstrip(".").lower()
483
478
  pattern = f"*.{normalized_ext}"
484
479
 
485
480
  matched_paths = list(dir_path.glob(pattern))
481
+
486
482
  if not matched_paths:
487
- _LOGGER.error(f"No '.{normalized_ext}' files found in directory: {dir_path}.")
488
- raise IOError()
483
+ msg = f"No '.{normalized_ext}' files found in directory: {dir_path}."
484
+ if raise_on_empty:
485
+ _LOGGER.error(msg)
486
+ raise IOError()
487
+ else:
488
+ if verbose:
489
+ _LOGGER.warning(msg)
490
+ return {}
489
491
 
490
492
  name_path_dict = {p.stem: p for p in matched_paths}
491
493
 
@@ -497,13 +499,18 @@ def list_files_by_extension(directory: Union[str,Path], extension: str, verbose:
497
499
  return name_path_dict
498
500
 
499
501
 
500
- def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[str, Path]:
502
+ def list_subdirectories(
503
+ root_dir: Union[str, Path],
504
+ verbose: bool = True,
505
+ raise_on_empty: bool = True
506
+ ) -> dict[str, Path]:
501
507
  """
502
508
  Scans a directory and returns a dictionary of its immediate subdirectories.
503
509
 
504
510
  Args:
505
511
  root_dir (str | Path): The path to the directory to scan.
506
512
  verbose (bool): If True, prints the number of directories found.
513
+ raise_on_empty (bool): If True, raises IOError if no subdirectories are found.
507
514
 
508
515
  Returns:
509
516
  dict[str, Path]: A dictionary mapping subdirectory names (str) to their full Path objects.
@@ -513,8 +520,14 @@ def list_subdirectories(root_dir: Union[str,Path], verbose: bool=True) -> dict[s
513
520
  directories = [p.resolve() for p in root_path.iterdir() if p.is_dir()]
514
521
 
515
522
  if len(directories) < 1:
516
- _LOGGER.error(f"No subdirectories found inside '{root_path}'")
517
- raise IOError()
523
+ msg = f"No subdirectories found inside '{root_path}'"
524
+ if raise_on_empty:
525
+ _LOGGER.error(msg)
526
+ raise IOError()
527
+ else:
528
+ if verbose:
529
+ _LOGGER.warning(msg)
530
+ return {}
518
531
 
519
532
  if verbose:
520
533
  count = len(directories)
@@ -166,8 +166,12 @@ def load_dataframe_greedy(directory: Union[str, Path],
166
166
  dir_path = make_fullpath(directory, enforce="directory")
167
167
 
168
168
  # list all csv files and grab one (should be the only one)
169
- csv_dict = list_csv_paths(directory=dir_path, verbose=False)
169
+ csv_dict = list_csv_paths(directory=dir_path, verbose=False, raise_on_empty=True)
170
170
 
171
+ # explicitly check that there is only one csv file
172
+ if len(csv_dict) > 1:
173
+ _LOGGER.warning(f"Multiple CSV files found in '{dir_path}'. Only one will be loaded.")
174
+
171
175
  for df_path in csv_dict.values():
172
176
  df , _df_name = load_dataframe(df_path=df_path,
173
177
  use_columns=use_columns,
@@ -260,7 +264,7 @@ def yield_dataframes_from_dir(datasets_dir: Union[str,Path], verbose: bool=True)
260
264
  - Output is streamed via a generator to support lazy loading of multiple datasets.
261
265
  """
262
266
  datasets_path = make_fullpath(datasets_dir)
263
- files_dict = list_csv_paths(datasets_path, verbose=verbose)
267
+ files_dict = list_csv_paths(datasets_path, verbose=verbose, raise_on_empty=True)
264
268
  for df_name, df_path in files_dict.items():
265
269
  df: pd.DataFrame
266
270
  df, _ = load_dataframe(df_path, kind="pandas", verbose=verbose) # type: ignore
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "19.10.0"
3
+ version = "19.11.0"
4
4
  description = "Complete pipelines and helper tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Luigi Loza Vidaurre", email = "luigiloza@gmail.com" }