dragon-ml-toolbox 19.10.0__py3-none-any.whl → 19.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,17 +46,24 @@ class ArtifactFinder:
46
46
  └── FeatureSchema.json (Required if `load_schema` is True)
47
47
  ```
48
48
  """
49
- def __init__(self, directory: Union[str, Path], load_scaler: bool, load_schema: bool) -> None:
49
+ def __init__(self,
50
+ directory: Union[str, Path],
51
+ load_scaler: bool,
52
+ load_schema: bool,
53
+ strict: bool=False,
54
+ verbose: bool=True) -> None:
50
55
  """
51
56
  Args:
52
57
  directory (str | Path): The path to the directory that contains training artifacts.
53
58
  load_scaler (bool): If True, requires and searches for a scaler file `scaler_*.pth`.
54
59
  load_schema (bool): If True, requires and searches for a FeatureSchema file `FeatureSchema.json`.
60
+ strict (bool): If True, raises an error if any artifact is missing. If False, returns None for missing artifacts silently.
61
+ verbose (bool): Displays the missing artifacts in the directory or a success message.
55
62
  """
56
63
  # validate directory
57
64
  dir_path = make_fullpath(directory, enforce="directory")
58
65
 
59
- parsing_dict = _find_model_artifacts(target_directory=dir_path, load_scaler=load_scaler, verbose=False)
66
+ parsing_dict = _find_model_artifacts(target_directory=dir_path, load_scaler=load_scaler, verbose=False, strict=strict)
60
67
 
61
68
  self._weights_path = parsing_dict[PytorchArtifactPathKeys.WEIGHTS_PATH]
62
69
  self._feature_names_path = parsing_dict[PytorchArtifactPathKeys.FEATURES_PATH]
@@ -64,71 +71,121 @@ class ArtifactFinder:
64
71
  self._model_architecture_path = parsing_dict[PytorchArtifactPathKeys.ARCHITECTURE_PATH]
65
72
  self._scaler_path = None
66
73
  self._schema = None
74
+ self._strict = strict
67
75
 
68
76
  if load_scaler:
69
77
  self._scaler_path = parsing_dict[PytorchArtifactPathKeys.SCALER_PATH]
70
78
 
71
79
  if load_schema:
72
- self._schema = FeatureSchema.from_json(directory=dir_path)
80
+ try:
81
+ self._schema = FeatureSchema.from_json(directory=dir_path)
82
+ except Exception:
83
+ if strict:
84
+ # FeatureSchema logs its own error details
85
+ # _LOGGER.error(f"Failed to load FeatureSchema from '{dir_path.name}': {e}")
86
+ raise FileNotFoundError()
87
+ else:
88
+ # _LOGGER.warning(f"Could not load FeatureSchema from '{dir_path.name}': {e}")
89
+ self._schema = None
90
+
91
+ # Process feature names
92
+ if self._feature_names_path is not None:
93
+ self._feature_names = self._process_text(self._feature_names_path)
94
+ else:
95
+ self._feature_names = None
96
+ # Process target names
97
+ if self._target_names_path is not None:
98
+ self._target_names = self._process_text(self._target_names_path)
99
+ else:
100
+ self._target_names = None
101
+
102
+ if verbose:
103
+ # log missing artifacts
104
+ missing_artifacts = []
105
+ if self._feature_names is None:
106
+ missing_artifacts.append("Feature Names")
107
+ if self._target_names is None:
108
+ missing_artifacts.append("Target Names")
109
+ if self._weights_path is None:
110
+ missing_artifacts.append("Weights File")
111
+ if self._model_architecture_path is None:
112
+ missing_artifacts.append("Model Architecture File")
113
+ if load_scaler and self._scaler_path is None:
114
+ missing_artifacts.append("Scaler File")
115
+ if load_schema and self._schema is None:
116
+ missing_artifacts.append("FeatureSchema File")
117
+
118
+ if missing_artifacts:
119
+ _LOGGER.warning(f"Missing artifacts in '{dir_path.name}': {', '.join(missing_artifacts)}.")
120
+ else:
121
+ _LOGGER.info(f"All artifacts successfully loaded from '{dir_path.name}'.")
73
122
 
74
- # Process text files
75
- self._feature_names = self._process_text(self._feature_names_path)
76
- self._target_names = self._process_text(self._target_names_path)
77
-
78
123
  def _process_text(self, text_file_path: Path):
79
124
  list_strings = load_list_strings(text_file=text_file_path, verbose=False)
80
125
  return list_strings
81
126
 
82
127
  @property
83
- def feature_names(self) -> list[str]:
128
+ def feature_names(self) -> Union[list[str], None]:
84
129
  """Returns the feature names as a list of strings."""
130
+ if self._strict and not self._feature_names:
131
+ _LOGGER.error("No feature names loaded for Strict mode.")
132
+ raise ValueError()
85
133
  return self._feature_names
86
134
 
87
135
  @property
88
- def target_names(self) -> list[str]:
136
+ def target_names(self) -> Union[list[str], None]:
89
137
  """Returns the target names as a list of strings."""
138
+ if self._strict and not self._target_names:
139
+ _LOGGER.error("No target names loaded for Strict mode.")
140
+ raise ValueError()
90
141
  return self._target_names
91
142
 
92
143
  @property
93
- def weights_path(self) -> Path:
144
+ def weights_path(self) -> Union[Path, None]:
94
145
  """Returns the path to the state dictionary pth file."""
146
+ if self._strict and self._weights_path is None:
147
+ _LOGGER.error("No weights file loaded for Strict mode.")
148
+ raise ValueError()
95
149
  return self._weights_path
96
150
 
97
151
  @property
98
- def model_architecture_path(self) -> Path:
152
+ def model_architecture_path(self) -> Union[Path, None]:
99
153
  """Returns the path to the model architecture json file."""
154
+ if self._strict and self._model_architecture_path is None:
155
+ _LOGGER.error("No model architecture file loaded for Strict mode.")
156
+ raise ValueError()
100
157
  return self._model_architecture_path
101
158
 
102
159
  @property
103
- def scaler_path(self) -> Path:
160
+ def scaler_path(self) -> Union[Path, None]:
104
161
  """Returns the path to the scaler file."""
105
- if self._scaler_path is None:
106
- _LOGGER.error("No scaler file loaded. Set 'load_scaler=True'.")
162
+ if self._strict and self._scaler_path is None:
163
+ _LOGGER.error("No scaler file loaded for Strict mode.")
107
164
  raise ValueError()
108
165
  else:
109
166
  return self._scaler_path
110
167
 
111
168
  @property
112
- def feature_schema(self) -> FeatureSchema:
169
+ def feature_schema(self) -> Union[FeatureSchema, None]:
113
170
  """Returns the FeatureSchema object."""
114
- if self._schema is None:
115
- _LOGGER.error("No FeatureSchema loaded. Set 'load_schema=True'.")
171
+ if self._strict and self._schema is None:
172
+ _LOGGER.error("No FeatureSchema loaded for Strict mode.")
116
173
  raise ValueError()
117
174
  else:
118
175
  return self._schema
119
176
 
120
177
  def __repr__(self) -> str:
121
- dir_name = self._weights_path.parent.name
122
- n_features = len(self._feature_names)
123
- n_targets = len(self._target_names)
178
+ dir_name = self._weights_path.parent.name if self._weights_path else "Unknown"
179
+ n_features = len(self._feature_names) if self._feature_names else "None"
180
+ n_targets = len(self._target_names) if self._target_names else "None"
124
181
  scaler_status = self._scaler_path.name if self._scaler_path else "None"
125
182
  schema_status = "Loaded" if self._schema else "None"
126
183
 
127
184
  return (
128
185
  f"{self.__class__.__name__}\n"
129
186
  f" directory='{dir_name}'\n"
130
- f" weights='{self._weights_path.name}'\n"
131
- f" architecture='{self._model_architecture_path.name}'\n"
187
+ f" weights='{self._weights_path.name if self._weights_path else 'None'}'\n"
188
+ f" architecture='{self._model_architecture_path.name if self._model_architecture_path else 'None'}'\n"
132
189
  f" scaler='{scaler_status}'\n"
133
190
  f" schema='{schema_status}'\n"
134
191
  f" features={n_features}\n"
@@ -136,7 +193,7 @@ class ArtifactFinder:
136
193
  )
137
194
 
138
195
 
139
- def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, verbose: bool=False) -> dict[str, Path]:
196
+ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool, verbose: bool=True, strict:bool=True) -> dict[str, Union[Path, None]]:
140
197
  """
141
198
  Scans a directory to find paths to model weights, target names, feature names, and model architecture. Optionally an scaler path if `load_scaler` is True.
142
199
 
@@ -155,41 +212,70 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
155
212
  target_directory (str | Path): The path to the directory that contains training artifacts.
156
213
  load_scaler (bool): If True, the function requires and searches for a scaler file `scaler_*.pth`.
157
214
  verbose (bool): If True, enables detailed logging during the search process.
215
+ strict (bool): If True, raises errors on missing files. If False, returns None for missing files.
158
216
  """
159
217
  # validate directory
160
218
  dir_path = make_fullpath(target_directory, enforce="directory")
161
219
  dir_name = dir_path.name
162
220
 
163
221
  # find files
164
- model_pth_dict = list_files_by_extension(directory=dir_path, extension="pth", verbose=verbose)
222
+ model_pth_dict = list_files_by_extension(directory=dir_path, extension="pth", verbose=False, raise_on_empty=False)
165
223
 
166
- # restriction
167
- if load_scaler:
168
- if len(model_pth_dict) != 2:
169
- _LOGGER.error(f"Directory '{dir_name}' should contain exactly 2 '.pth' files: scaler and weights.")
170
- raise IOError()
171
- else:
172
- if len(model_pth_dict) != 1:
173
- _LOGGER.error(f"Directory '{dir_name}' should contain exactly 1 '.pth' file for weights.")
224
+ if not model_pth_dict:
225
+ pth_msg=f"No '.pth' files found in directory: {dir_name}."
226
+ if strict:
227
+ _LOGGER.error(pth_msg)
174
228
  raise IOError()
229
+ else:
230
+ if verbose:
231
+ _LOGGER.warning(pth_msg)
232
+ model_pth_dict = None
233
+
234
+ # restriction
235
+ if model_pth_dict is not None:
236
+ valid_count = False
237
+ msg = ""
238
+
239
+ if load_scaler:
240
+ if len(model_pth_dict) == 2:
241
+ valid_count = True
242
+ else:
243
+ msg = f"Directory '{dir_name}' should contain exactly 2 '.pth' files: scaler and weights. Found {len(model_pth_dict)}."
244
+ else:
245
+ if len(model_pth_dict) == 1:
246
+ valid_count = True
247
+ else:
248
+ msg = f"Directory '{dir_name}' should contain exactly 1 '.pth' file for weights. Found {len(model_pth_dict)}."
249
+
250
+ # Respect strict mode for count mismatch
251
+ if not valid_count:
252
+ if strict:
253
+ _LOGGER.error(msg)
254
+ raise IOError()
255
+ else:
256
+ if verbose:
257
+ _LOGGER.warning(msg)
258
+ # Invalidate dictionary
259
+ model_pth_dict = None
175
260
 
176
261
  ##### Scaler and Weights #####
177
262
  scaler_path = None
178
263
  weights_path = None
179
264
 
180
265
  # load weights and scaler if present
181
- for pth_filename, pth_path in model_pth_dict.items():
182
- if load_scaler and pth_filename.lower().startswith(DatasetKeys.SCALER_PREFIX):
183
- scaler_path = pth_path
184
- else:
185
- weights_path = pth_path
266
+ if model_pth_dict is not None:
267
+ for pth_filename, pth_path in model_pth_dict.items():
268
+ if load_scaler and pth_filename.lower().startswith(DatasetKeys.SCALER_PREFIX):
269
+ scaler_path = pth_path
270
+ else:
271
+ weights_path = pth_path
186
272
 
187
273
  # validation
188
- if not weights_path:
274
+ if not weights_path and strict:
189
275
  _LOGGER.error(f"Error parsing the model weights path from '{dir_name}'")
190
276
  raise IOError()
191
277
 
192
- if load_scaler and not scaler_path:
278
+ if strict and load_scaler and not scaler_path:
193
279
  _LOGGER.error(f"Error parsing the scaler path from '{dir_name}'")
194
280
  raise IOError()
195
281
 
@@ -198,32 +284,44 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
198
284
  feature_names_path = None
199
285
 
200
286
  # load feature and target names
201
- model_txt_dict = list_files_by_extension(directory=dir_path, extension="txt", verbose=verbose)
287
+ model_txt_dict = list_files_by_extension(directory=dir_path, extension="txt", verbose=False, raise_on_empty=False)
202
288
 
289
+ # if the directory has no txt files, the loop is skipped
203
290
  for txt_filename, txt_path in model_txt_dict.items():
204
291
  if txt_filename == DatasetKeys.FEATURE_NAMES:
205
292
  feature_names_path = txt_path
206
293
  elif txt_filename == DatasetKeys.TARGET_NAMES:
207
294
  target_names_path = txt_path
208
295
 
209
- # validation
210
- if not target_names_path or not feature_names_path:
211
- _LOGGER.error(f"Error parsing features path or targets path from '{dir_name}'")
296
+ # validation per case
297
+ if strict and not target_names_path:
298
+ _LOGGER.error(f"Error parsing the target names path from '{dir_name}'")
212
299
  raise IOError()
300
+ elif verbose and not target_names_path:
301
+ _LOGGER.warning(f"Target names file not found in '{dir_name}'.")
213
302
 
303
+ if strict and not feature_names_path:
304
+ _LOGGER.error(f"Error parsing the feature names path from '{dir_name}'")
305
+ raise IOError()
306
+ elif verbose and not feature_names_path:
307
+ _LOGGER.warning(f"Feature names file not found in '{dir_name}'.")
308
+
214
309
  ##### load model architecture path #####
215
310
  architecture_path = None
216
311
 
217
- model_json_dict = list_files_by_extension(directory=dir_path, extension="json", verbose=verbose)
312
+ model_json_dict = list_files_by_extension(directory=dir_path, extension="json", verbose=False, raise_on_empty=False)
218
313
 
314
+ # if the directory has no json files, the loop is skipped
219
315
  for json_filename, json_path in model_json_dict.items():
220
316
  if json_filename == PytorchModelArchitectureKeys.SAVENAME:
221
317
  architecture_path = json_path
222
318
 
223
319
  # validation
224
- if not architecture_path:
320
+ if strict and not architecture_path:
225
321
  _LOGGER.error(f"Error parsing the model architecture path from '{dir_name}'")
226
322
  raise IOError()
323
+ elif verbose and not architecture_path:
324
+ _LOGGER.warning(f"Model architecture file not found in '{dir_name}'.")
227
325
 
228
326
  ##### Paths dictionary #####
229
327
  parsing_dict = {
@@ -233,7 +331,7 @@ def _find_model_artifacts(target_directory: Union[str,Path], load_scaler: bool,
233
331
  PytorchArtifactPathKeys.TARGETS_PATH: target_names_path,
234
332
  }
235
333
 
236
- if scaler_path is not None:
334
+ if load_scaler:
237
335
  parsing_dict[PytorchArtifactPathKeys.SCALER_PATH] = scaler_path
238
336
 
239
337
  return parsing_dict
@@ -246,6 +344,9 @@ def find_model_artifacts_multi(target_directory: Union[str,Path], load_scaler: b
246
344
  This function operates on a specific directory structure. It expects the
247
345
  `target_directory` to contain one or more subdirectories, where each
248
346
  subdirectory represents a single trained model result.
347
+
348
+ This function works using a strict mode, meaning that it will raise errors if
349
+ any required artifacts are missing in a model's subdirectory.
249
350
 
250
351
  The expected directory structure for each model is as follows:
251
352
  ```
@@ -278,14 +379,16 @@ def find_model_artifacts_multi(target_directory: Union[str,Path], load_scaler: b
278
379
  all_artifacts: list[dict[str, Path]] = list()
279
380
 
280
381
  # find model directories
281
- result_dirs_dict = list_subdirectories(root_dir=root_path, verbose=verbose)
382
+ result_dirs_dict = list_subdirectories(root_dir=root_path, verbose=verbose, raise_on_empty=True)
282
383
  for _dir_name, dir_path in result_dirs_dict.items():
283
384
 
284
385
  parsing_dict = _find_model_artifacts(target_directory=dir_path,
285
386
  load_scaler=load_scaler,
286
- verbose=verbose)
387
+ verbose=verbose,
388
+ strict=True)
287
389
 
288
- all_artifacts.append(parsing_dict)
390
+ # parsing_dict is guaranteed to have all required paths due to strict=True
391
+ all_artifacts.append(parsing_dict) # type: ignore
289
392
 
290
393
  return all_artifacts
291
394
 
@@ -721,7 +824,7 @@ def select_features_by_shap(
721
824
  root_path = make_fullpath(root_directory, enforce="directory")
722
825
 
723
826
  # --- Step 2: Directory and File Discovery ---
724
- subdirectories = list_subdirectories(root_dir=root_path, verbose=False)
827
+ subdirectories = list_subdirectories(root_dir=root_path, verbose=False, raise_on_empty=True)
725
828
 
726
829
  shap_filename = SHAPKeys.SAVENAME + ".csv"
727
830
 
@@ -169,7 +169,7 @@ def multiple_objective_functions_from_dir(directory: Union[str,Path], add_noise:
169
169
  """
170
170
  objective_functions = list()
171
171
  objective_function_names = list()
172
- for file_name, file_path in list_files_by_extension(directory=directory, extension='joblib').items():
172
+ for file_name, file_path in list_files_by_extension(directory=directory, extension='joblib', raise_on_empty=True).items():
173
173
  current_objective = ObjectiveFunction(trained_model_path=file_path,
174
174
  add_noise=add_noise,
175
175
  task=task,
@@ -42,7 +42,7 @@ class DragonEnsembleInferenceHandler:
42
42
  self.verbose = verbose
43
43
  self._feature_names: Optional[List[str]] = None
44
44
 
45
- model_files = list_files_by_extension(directory=models_dir, extension="joblib")
45
+ model_files = list_files_by_extension(directory=models_dir, extension="joblib", raise_on_empty=True)
46
46
 
47
47
  for fname, fpath in model_files.items():
48
48
  try:
ml_tools/_core/_keys.py CHANGED
@@ -1,6 +1,6 @@
1
1
  class MagicWords:
2
2
  """General purpose keys"""
3
- LATEST = "latest"
3
+ BEST = "best"
4
4
  CURRENT = "current"
5
5
  RENAME = "rename"
6
6
  UNKNOWN = "unknown"
@@ -200,6 +200,37 @@ class MLTaskKeys:
200
200
  ALL_BINARY_TASKS = [BINARY_CLASSIFICATION, MULTILABEL_BINARY_CLASSIFICATION, BINARY_IMAGE_CLASSIFICATION, BINARY_SEGMENTATION]
201
201
 
202
202
 
203
+ class _PublicTaskKeys:
204
+ """
205
+ Task keys used in the Dragon ML pipeline:
206
+
207
+ 1. REGRESSION
208
+ 2. MULTITARGET_REGRESSION
209
+ 3. BINARY_CLASSIFICATION
210
+ 4. MULTICLASS_CLASSIFICATION
211
+ 5. MULTILABEL_BINARY_CLASSIFICATION
212
+ 6. BINARY_IMAGE_CLASSIFICATION
213
+ 7. MULTICLASS_IMAGE_CLASSIFICATION
214
+ 8. BINARY_SEGMENTATION
215
+ 9. MULTICLASS_SEGMENTATION
216
+ 10. OBJECT_DETECTION
217
+ 11. SEQUENCE_SEQUENCE
218
+ 12. SEQUENCE_VALUE
219
+ """
220
+ REGRESSION = MLTaskKeys.REGRESSION
221
+ MULTITARGET_REGRESSION = MLTaskKeys.MULTITARGET_REGRESSION
222
+ BINARY_CLASSIFICATION = MLTaskKeys.BINARY_CLASSIFICATION
223
+ MULTICLASS_CLASSIFICATION = MLTaskKeys.MULTICLASS_CLASSIFICATION
224
+ MULTILABEL_BINARY_CLASSIFICATION = MLTaskKeys.MULTILABEL_BINARY_CLASSIFICATION
225
+ BINARY_IMAGE_CLASSIFICATION = MLTaskKeys.BINARY_IMAGE_CLASSIFICATION
226
+ MULTICLASS_IMAGE_CLASSIFICATION = MLTaskKeys.MULTICLASS_IMAGE_CLASSIFICATION
227
+ BINARY_SEGMENTATION = MLTaskKeys.BINARY_SEGMENTATION
228
+ MULTICLASS_SEGMENTATION = MLTaskKeys.MULTICLASS_SEGMENTATION
229
+ OBJECT_DETECTION = MLTaskKeys.OBJECT_DETECTION
230
+ SEQUENCE_SEQUENCE = MLTaskKeys.SEQUENCE_SEQUENCE
231
+ SEQUENCE_VALUE = MLTaskKeys.SEQUENCE_VALUE
232
+
233
+
203
234
  class DragonTrainerKeys:
204
235
  VALIDATION_METRICS_DIR = "Validation_Metrics"
205
236
  TEST_METRICS_DIR = "Test_Metrics"
@@ -269,7 +269,7 @@ def plot_optimal_feature_distributions(results_dir: Union[str, Path],
269
269
  output_path = make_fullpath(results_path / "DistributionPlots", make=True)
270
270
 
271
271
  # Check that the directory contains csv files
272
- list_csv_paths(results_path, verbose=False)
272
+ list_csv_paths(results_path, verbose=False, raise_on_empty=True)
273
273
 
274
274
  # --- Data Loading and Preparation ---
275
275
  _LOGGER.debug(f"📁 Starting analysis from results in: '{results_dir}'")