dragon-ml-toolbox 19.12.0__py3-none-any.whl → 19.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 19.12.0
3
+ Version: 19.12.2
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-19.12.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-19.12.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
1
+ dragon_ml_toolbox-19.12.2.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-19.12.2.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
3
3
  ml_tools/ETL_cleaning.py,sha256=cKXyRFaaFs_beAGDnQM54xnML671kq-yJEGjHafW-20,351
4
4
  ml_tools/ETL_engineering.py,sha256=cwh1FhtNdUHllUDvho-x3SIVj4KwG_rFQR6VYzWUg0U,898
5
5
  ml_tools/GUI_tools.py,sha256=O89rG8WQv6GY1DiphQjIsPzXFCQID6te7q_Sgt1iTkQ,294
@@ -38,7 +38,7 @@ ml_tools/SQL.py,sha256=ZYlY5L-k2mkDckOhNPtJEof2L7ePe_KBpgx55WG5NKs,84
38
38
  ml_tools/VIF_factor.py,sha256=xGUbnfhh1eqUiHX-tIpJBn_3Y_h3SOuNfVKkpsQXc7w,184
39
39
  ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
41
- ml_tools/data_exploration.py,sha256=qgo4hfuLnj-GzEpnOi4AzaX5xFjwM5Ox-uRjMaR4dug,1468
41
+ ml_tools/data_exploration.py,sha256=vwCgOHhpPku2uuRVscrco6sXwkjc3ruOlfO002OQ74M,1468
42
42
  ml_tools/ensemble_evaluation.py,sha256=P26vyS2fMV3Pm_4w2MN1z1eS7aVJzYagsyLmqC-Io6Q,468
43
43
  ml_tools/ensemble_inference.py,sha256=sl_Dq9KaN0SrtZmyiVrrhWd6lSjdQangSIUUUIFvfj4,178
44
44
  ml_tools/ensemble_learning.py,sha256=BLPnpfJWCly-D75mkRP1FE5TExoWAAlAHR89KAzW9iU,336
@@ -54,18 +54,18 @@ ml_tools/utilities.py,sha256=dHNjGPH3Ck9V41IRFbRojE_RW6lACdxrNZz0FxI5SQY,691
54
54
  ml_tools/_core/_ETL_cleaning.py,sha256=_pTNKuapNHgWErmxvsXW-2YzCm4BaTshKV627A38RuA,28748
55
55
  ml_tools/_core/_ETL_engineering.py,sha256=JgIWrQGyNjmLrbyv5Kh0EHKBLmYlyrGKSnKRxGzxSco,57930
56
56
  ml_tools/_core/_GUI_tools.py,sha256=kpvk18Eb4vdLzo-I5mBV1yuwPXs-NJJ01rn-iCXHvIY,49079
57
- ml_tools/_core/_IO_tools.py,sha256=oWaYa_OVO-8ANVt_a9F1QPMvyOcI2yLbtq7LoVHlqek,16625
57
+ ml_tools/_core/_IO_tools.py,sha256=sEbtzDHkc9GNkXvsFS9ic038LzAW-rxXPuLwtoHXzGw,17107
58
58
  ml_tools/_core/_MICE_imputation.py,sha256=64l20duGWt93Q2MbqcWqrA1s99JPRf5AJACb1CZi2xI,21149
59
59
  ml_tools/_core/_ML_callbacks.py,sha256=T0PjptlpC75_Tp3bWIMPTYhxsMX-8z4YtDT4FJ3p8jg,27988
60
60
  ml_tools/_core/_ML_chaining_inference.py,sha256=vXUPZzuQ2yKU71kkvUsE0xPo0hN-Yu6gfnL0JbXoRjI,7783
61
61
  ml_tools/_core/_ML_chaining_utilities.py,sha256=nsYowgRbkIYuzRiHlqsM3tnC3c-8O73CY8DHUF14XL0,19248
62
- ml_tools/_core/_ML_configuration.py,sha256=hwnDCo9URsFqRCgLuFJhGTtoOqbE1XJreNY8B_3spTg,52693
62
+ ml_tools/_core/_ML_configuration.py,sha256=olRcam2s-Y5oUr8BAcmhwIBQDmaQZm2RHTv5sK2HeOU,53151
63
63
  ml_tools/_core/_ML_configuration_pytab.py,sha256=C3e4iScqdRePVDoqnic6xXMOW7DNYqpgTCeaFDyMdL4,3286
64
64
  ml_tools/_core/_ML_datasetmaster.py,sha256=yU1BMtzz6XumMWCetVACrRLk7WJQwmYhaQ-VAWu9Ots,32043
65
65
  ml_tools/_core/_ML_evaluation.py,sha256=bu8qlYzhWSC1B7wNfCC5TSF-oed-uP8EF7TV45VTiBM,37325
66
66
  ml_tools/_core/_ML_evaluation_captum.py,sha256=a69jnghIzE9qppuw2vzTBMdTErnZkDkTA3MPUUYjsS4,19212
67
67
  ml_tools/_core/_ML_evaluation_multi.py,sha256=n_AJbKF58DMUrYqJutwPFV5z6sNssDPA1Gl05IfPG5s,23647
68
- ml_tools/_core/_ML_finalize_handler.py,sha256=0eZ_0N2L5aUUIJUgvhAQ-rbd8XbE9UmNqTKSJq09uTI,6987
68
+ ml_tools/_core/_ML_finalize_handler.py,sha256=1__wG3Jcr9h1a99F-CmHezhEw1_Ojxh3aDHNyJN2S5w,7127
69
69
  ml_tools/_core/_ML_inference.py,sha256=5swm2lnsrDLalBnCm7gZPlDucX4yNCq5vn7ck3SW_4Q,29791
70
70
  ml_tools/_core/_ML_models.py,sha256=8FUx4-TVghlBF9srh1_5UxovrWPU7YEZ6XXLqwJei88,27974
71
71
  ml_tools/_core/_ML_models_advanced.py,sha256=oU6M5FEBMQ9yPp32cziWh3bz8SXRho07vFMC8ZDVcuU,45002
@@ -77,7 +77,7 @@ ml_tools/_core/_ML_sequence_datasetmaster.py,sha256=0YVOPf-y4ZNdgUxropXUWrmInNyG
77
77
  ml_tools/_core/_ML_sequence_evaluation.py,sha256=AiPHtZ9DRpE6zL9n3Tp5eGGD9vrYRkLbZ0Nc274mL7I,8069
78
78
  ml_tools/_core/_ML_sequence_inference.py,sha256=zd3hBwOtLmjAV4JtdB2qFY9GxhysajFufATdy8fjGTE,16316
79
79
  ml_tools/_core/_ML_sequence_models.py,sha256=5qcEYLU6wDePBITnikBrj_H9mCvyJmElKa3HiWGXhZs,5639
80
- ml_tools/_core/_ML_trainer.py,sha256=EeNqZ0pCWrBxGaYgOVmDxofMBQhV56Bvsj-VuBwBgHQ,117580
80
+ ml_tools/_core/_ML_trainer.py,sha256=ZYDH-P8GJhFe0vpeMtgLS0O3Fz0d4qr8zcTm-C30T1I,117595
81
81
  ml_tools/_core/_ML_utilities.py,sha256=elLGD0QYh148_9iNLlqGe1vz-wCFspJa6CWtWTfA3jY,35594
82
82
  ml_tools/_core/_ML_vision_datasetmaster.py,sha256=8EsE7luzphVlwBXdOsOwsFfz1D4UIUSEQtqHlM0Vf-o,67084
83
83
  ml_tools/_core/_ML_vision_evaluation.py,sha256=BSLf9xrGpaR02Dhkf-fAbgxSpwRjf7DruNIcQadl7qg,11631
@@ -88,7 +88,7 @@ ml_tools/_core/_PSO_optimization.py,sha256=W3g5xw2v2eOUQadv8KHFkt5HNm9AiY3ZUk-Te
88
88
  ml_tools/_core/_SQL.py,sha256=zX_8EgYfmLmvvrnL851KMkI4w9kdkjHJ997BTvS5aig,11556
89
89
  ml_tools/_core/_VIF_factor.py,sha256=BM0mTowBqt45PXFy9oJLhT9C-CTWWo0TQhgCyWYLHtQ,10457
90
90
  ml_tools/_core/__init__.py,sha256=d4IG0OxUXj2HffepzQcYixHlZeuuuDMAFa09H_6LtmU,12
91
- ml_tools/_core/_data_exploration.py,sha256=-g_e4Lox4LN8c2AfhpcPmnI9TNIZGl84O8hWEVH5asA,77438
91
+ ml_tools/_core/_data_exploration.py,sha256=uynIjMppbr5nFJ-7wag0R0HDQCp2rTXqz_IpgNKKNOM,77508
92
92
  ml_tools/_core/_ensemble_evaluation.py,sha256=17lWl4bWLT1BAMv_fhGf2D3wy-F4jx0HgnJ79lYkRuE,28419
93
93
  ml_tools/_core/_ensemble_inference.py,sha256=9UpARSETzmqPdQmxqizD768tjkqldxHw1ER_hM9Kx9M,8631
94
94
  ml_tools/_core/_ensemble_learning.py,sha256=X8ghbjDOLMENCWdISXLhDlHQtR3C6SW1tkTBAcfRRPY,22016
@@ -104,8 +104,8 @@ ml_tools/_core/_plot_fonts.py,sha256=CjYXW2gZ9AUaGkyX8_WOXXNYs6d1PTK-nEJBrv_Zb2o
104
104
  ml_tools/_core/_schema.py,sha256=TM5WVVMoKOvr_Bc2z34sU_gzKlM465PRKTgdZaEOkGY,14076
105
105
  ml_tools/_core/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
106
106
  ml_tools/_core/_serde.py,sha256=tsI4EO2Y7jrBMmbQ1pinDsPOrOg-SaPuB-Dt40q0taE,5609
107
- ml_tools/_core/_utilities.py,sha256=D7FGyEszcMHxGkMW4aqN7JUwabTICCcQz9qsGtOj97o,22787
108
- dragon_ml_toolbox-19.12.0.dist-info/METADATA,sha256=MoUoxvRMHfPhedd87f68yWWlPJFEGVFCOa1OpgKwpHE,8193
109
- dragon_ml_toolbox-19.12.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
110
- dragon_ml_toolbox-19.12.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
111
- dragon_ml_toolbox-19.12.0.dist-info/RECORD,,
107
+ ml_tools/_core/_utilities.py,sha256=oU-0hBipE96bXox66NG-hFuEMMNkKa9MkAy1yJGCSIA,22779
108
+ dragon_ml_toolbox-19.12.2.dist-info/METADATA,sha256=Nuk7YVRdDotD_TURCpIFKqcDuTlkBs9fpTv-8jCm5aU,8193
109
+ dragon_ml_toolbox-19.12.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
110
+ dragon_ml_toolbox-19.12.2.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
111
+ dragon_ml_toolbox-19.12.2.dist-info/RECORD,,
@@ -434,8 +434,8 @@ def train_logger(train_config: Union[dict, Any],
434
434
  Logs training data to JSON, adding a timestamp to the filename.
435
435
 
436
436
  Args:
437
- train_config (dict | Any): Training configuration parameters.
438
- model_parameters (dict | Any): Model parameters.
437
+ train_config (dict | Any): Training configuration parameters. If object, must have a `.to_log()` method returning a dict.
438
+ model_parameters (dict | Any): Model parameters. If object, must have a `.to_log()` method returning a dict.
439
439
  train_history (dict | None): Training history log.
440
440
  save_directory (str | Path): Directory to save the log file.
441
441
  """
@@ -443,6 +443,9 @@ def train_logger(train_config: Union[dict, Any],
443
443
  if not isinstance(train_config, dict):
444
444
  if hasattr(train_config, "to_log") and callable(getattr(train_config, "to_log")):
445
445
  train_config_dict: dict = train_config.to_log()
446
+ if not isinstance(train_config_dict, dict):
447
+ _LOGGER.error("'train_config.to_log()' did not return a dictionary.")
448
+ raise ValueError()
446
449
  else:
447
450
  _LOGGER.error("'train_config' must be a dict or an object with a 'to_log()' method.")
448
451
  raise ValueError()
@@ -458,6 +461,9 @@ def train_logger(train_config: Union[dict, Any],
458
461
  if not isinstance(model_parameters, dict):
459
462
  if hasattr(model_parameters, "to_log") and callable(getattr(model_parameters, "to_log")):
460
463
  model_parameters_dict: dict = model_parameters.to_log()
464
+ if not isinstance(model_parameters_dict, dict):
465
+ _LOGGER.error("'model_parameters.to_log()' did not return a dictionary.")
466
+ raise ValueError()
461
467
  else:
462
468
  _LOGGER.error("'model_parameters' must be a dict or an object with a 'to_log()' method.")
463
469
  raise ValueError()
@@ -660,18 +660,27 @@ class DragonTrainingConfig(_BaseModelParams):
660
660
  initial_learning_rate: float,
661
661
  batch_size: int,
662
662
  random_state: int = 101,
663
- early_stop_patience: Optional[int] = None,
664
- scheduler_patience: Optional[int] = None,
665
- scheduler_lr_factor: Optional[float] = None,
663
+ # early_stop_patience: Optional[int] = None,
664
+ # scheduler_patience: Optional[int] = None,
665
+ # scheduler_lr_factor: Optional[float] = None,
666
666
  **kwargs: Any) -> None:
667
+ """
668
+ Args:
669
+ validation_size (float): Proportion of data for validation set.
670
+ test_size (float): Proportion of data for test set.
671
+ initial_learning_rate (float): Starting learning rate.
672
+ batch_size (int): Number of samples per training batch.
673
+ random_state (int): Seed for reproducibility.
674
+ **kwargs: Additional training parameters as key-value pairs.
675
+ """
667
676
  self.validation_size = validation_size
668
677
  self.test_size = test_size
669
678
  self.initial_learning_rate = initial_learning_rate
670
679
  self.batch_size = batch_size
671
680
  self.random_state = random_state
672
- self.early_stop_patience = early_stop_patience
673
- self.scheduler_patience = scheduler_patience
674
- self.scheduler_lr_factor = scheduler_lr_factor
681
+ # self.early_stop_patience = early_stop_patience
682
+ # self.scheduler_patience = scheduler_patience
683
+ # self.scheduler_lr_factor = scheduler_lr_factor
675
684
 
676
685
  # Process kwargs with validation
677
686
  for key, value in kwargs.items():
@@ -51,7 +51,7 @@ class FinalizedFileHandler:
51
51
  self._initial_sequence: Optional[np.ndarray] = None
52
52
  self._target_name: Optional[str] = None
53
53
  self._target_names: Optional[list[str]] = None
54
- self._model_state_dict: Optional[Any] = None
54
+ self._model_state_dict: Optional[dict[str, Any]] = None
55
55
 
56
56
  # Set warning outputs
57
57
  self._verbose: bool=True
@@ -90,7 +90,7 @@ class FinalizedFileHandler:
90
90
 
91
91
  else:
92
92
  # It is a dict, but missing the keys, assume it is the raw state dict
93
- _LOGGER.info(f"File '{pth_path.name}' does not have the required keys for a finalized-file. Treating it as raw PyTorch state dictionary.")
93
+ _LOGGER.warning(f"File '{pth_path.name}' does not have the required keys for a Dragon-ML finalized-file. Keys found:\n {list(pth_file_content.keys())}")
94
94
  self._model_state_dict = pth_file_content
95
95
 
96
96
 
@@ -113,9 +113,10 @@ class FinalizedFileHandler:
113
113
  return self._task
114
114
 
115
115
  @property
116
- def model_state_dict(self):
116
+ def model_state_dict(self) -> dict[str, Any]:
117
117
  """Returns the model state dictionary."""
118
- return self._model_state_dict
118
+ # No need to check for None, as it is guaranteed to be set in __init__
119
+ return self._model_state_dict # type: ignore
119
120
 
120
121
  @property
121
122
  def epoch(self) -> Optional[int]:
@@ -663,7 +663,7 @@ class DragonTrainer(_BaseDragonTrainer):
663
663
  Evaluates the model, routing to the correct evaluation function based on task `kind`.
664
664
 
665
665
  Args:
666
- model_checkpoint ('auto' | Path | None):
666
+ model_checkpoint (Path | "best" | "current"):
667
667
  - Path to a valid checkpoint for the model. The state of the trained model will be overwritten in place.
668
668
  - If 'best', the best checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
669
669
  - If 'current', use the current state of the trained model up the latest trained epoch.
@@ -1608,7 +1608,7 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
1608
1608
 
1609
1609
  Args:
1610
1610
  save_dir (str | Path): Directory to save all reports and plots.
1611
- model_checkpoint ('auto' | Path | None):
1611
+ model_checkpoint (Path | "best" | "current"):
1612
1612
  - Path to a valid checkpoint for the model. The state of the trained model will be overwritten in place.
1613
1613
  - If 'best', the best checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
1614
1614
  - If 'current', use the current state of the trained model up the latest trained epoch.
@@ -2046,7 +2046,7 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
2046
2046
  Evaluates the model, routing to the correct evaluation function.
2047
2047
 
2048
2048
  Args:
2049
- model_checkpoint ('auto' | Path | None):
2049
+ model_checkpoint (Path | "best" | "current"):
2050
2050
  - Path to a valid checkpoint for the model.
2051
2051
  - If 'best', the best checkpoint will be loaded.
2052
2052
  - If 'current', use the current state of the trained model.
@@ -17,7 +17,6 @@ from ._schema import FeatureSchema
17
17
  _LOGGER = get_logger("Data Exploration")
18
18
 
19
19
 
20
- # Keep track of all available tools, show using `info()`
21
20
  __all__ = [
22
21
  "summarize_dataframe",
23
22
  "drop_constant_columns",
@@ -754,6 +753,8 @@ def plot_categorical_vs_target(
754
753
  This function is a core EDA step for regression tasks to understand the
755
754
  relationship between a categorical independent variable and a continuous
756
755
  dependent variable.
756
+
757
+ Plots are saved as individual .svg files in a structured way, with a subdirectory created for each target.
757
758
 
758
759
  Args:
759
760
  df (pd.DataFrame): The input DataFrame.
@@ -1167,7 +1168,7 @@ def clip_outliers_single(
1167
1168
 
1168
1169
  def clip_outliers_multi(
1169
1170
  df: pd.DataFrame,
1170
- clip_dict: Dict[str, Tuple[Union[int, float], Union[int, float]]],
1171
+ clip_dict: Union[Dict[str, Tuple[int, int]], Dict[str, Tuple[float, float]]],
1171
1172
  verbose: bool=False
1172
1173
  ) -> pd.DataFrame:
1173
1174
  """
@@ -396,7 +396,7 @@ def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
396
396
  if not isinstance(full_path, Path) or not full_path.suffix.endswith(".csv"):
397
397
  _LOGGER.error('A path object pointing to a .csv file must be provided.')
398
398
  raise ValueError()
399
-
399
+
400
400
  save_dataframe_filename(df=df,
401
401
  save_dir=full_path.parent,
402
402
  filename=full_path.name)
@@ -37,18 +37,18 @@ __all__ = [
37
37
  "plot_value_distributions",
38
38
  "plot_continuous_vs_target",
39
39
  "plot_categorical_vs_target",
40
- "encode_categorical_features",
41
40
  "split_features_targets",
42
- "split_continuous_binary",
41
+ "encode_categorical_features",
43
42
  "clip_outliers_single",
44
43
  "clip_outliers_multi",
45
44
  "drop_outlier_samples",
46
45
  "plot_correlation_heatmap",
46
+ "finalize_feature_schema",
47
47
  "match_and_filter_columns_by_regex",
48
48
  "standardize_percentages",
49
49
  "reconstruct_one_hot",
50
50
  "reconstruct_binary",
51
51
  "reconstruct_multibinary",
52
- "finalize_feature_schema",
52
+ "split_continuous_binary",
53
53
  "apply_feature_schema",
54
54
  ]