dragon-ml-toolbox 13.3.0__tar.gz → 13.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show
  1. {dragon_ml_toolbox-13.3.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-13.3.2}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_models.py +20 -6
  4. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/math_utilities.py +1 -1
  5. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/serde.py +2 -2
  6. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/pyproject.toml +1 -1
  7. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/LICENSE +0 -0
  8. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/LICENSE-THIRD-PARTY.md +0 -0
  9. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/README.md +0 -0
  10. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  11. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  12. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  13. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  14. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ETL_cleaning.py +0 -0
  15. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ETL_engineering.py +0 -0
  16. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/GUI_tools.py +0 -0
  17. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/MICE_imputation.py +0 -0
  18. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_callbacks.py +0 -0
  19. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_datasetmaster.py +0 -0
  20. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_evaluation.py +0 -0
  21. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_evaluation_multi.py +0 -0
  22. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_inference.py +0 -0
  23. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_optimization.py +0 -0
  24. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_scaler.py +0 -0
  25. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_trainer.py +0 -0
  26. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_utilities.py +0 -0
  27. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/PSO_optimization.py +0 -0
  28. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/RNN_forecast.py +0 -0
  29. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/SQL.py +0 -0
  30. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/VIF_factor.py +0 -0
  31. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/__init__.py +0 -0
  32. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/_logger.py +0 -0
  33. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/_schema.py +0 -0
  34. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/_script_info.py +0 -0
  35. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/constants.py +0 -0
  36. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/custom_logger.py +0 -0
  37. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/data_exploration.py +0 -0
  38. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ensemble_evaluation.py +0 -0
  39. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ensemble_inference.py +0 -0
  40. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ensemble_learning.py +0 -0
  41. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/handle_excel.py +0 -0
  42. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/keys.py +0 -0
  43. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/optimization_tools.py +0 -0
  44. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/path_manager.py +0 -0
  45. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/utilities.py +0 -0
  46. {dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 13.3.0
3
+ Version: 13.3.2
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 13.3.0
3
+ Version: 13.3.2
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -306,10 +306,10 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
306
306
  def __init__(self, *,
307
307
  schema: FeatureSchema,
308
308
  out_targets: int,
309
- embedding_dim: int = 32,
309
+ embedding_dim: int = 256,
310
310
  num_heads: int = 8,
311
311
  num_layers: int = 6,
312
- dropout: float = 0.1):
312
+ dropout: float = 0.2):
313
313
  """
314
314
  Args:
315
315
  schema (FeatureSchema):
@@ -317,14 +317,28 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
317
317
  out_targets (int):
318
318
  Number of output targets (1 for regression).
319
319
  embedding_dim (int):
320
- The dimension for all feature embeddings. Must be divisible
321
- by num_heads.
320
+ The dimension for all feature embeddings. Must be divisible by num_heads. Common values: (64, 128, 192, 256, etc.)
322
321
  num_heads (int):
323
- The number of heads in the multi-head attention mechanism.
322
+ The number of heads in the multi-head attention mechanism. Common values: (4, 8, 16)
324
323
  num_layers (int):
325
- The number of sub-encoder-layers in the transformer encoder.
324
+ The number of sub-encoder-layers in the transformer encoder. Common values: (4, 8, 12)
326
325
  dropout (float):
327
326
  The dropout value.
327
+
328
+ ## Note:
329
+
330
+ **Embedding Dimension:** "Width" of the model. It's the N-dimension vector that will be used to represent each one of the features.
331
+ - Each continuous feature gets its own learnable N-dimension vector.
332
+ - Each categorical feature gets an embedding table that maps every category (e.g., "color=red", "color=blue") to a unique N-dimension vector.
333
+
334
+ **Attention Heads:** Controls the "Multi-Head Attention" mechanism. Instead of looking at all the feature interactions at once, the model splits its attention into N parallel heads.
335
+ - Embedding Dimensions get divided by the number of Attention Heads, resulting in the dimensions assigned per head.
336
+
337
+ **Number of Layers:** "Depth" of the model. Number of identical `TransformerEncoderLayer` blocks that are stacked on top of each other.
338
+ - Layer 1: The attention heads find simple, direct interactions between the features.
339
+ - Layer 2: Takes the output of Layer 1 and finds interactions between those interactions and so on.
340
+ - Trade-off: More layers are more powerful but are slower to train and more prone to overfitting. If the training loss goes down but the validation loss goes up, you might have too many layers (or need more dropout).
341
+
328
342
  """
329
343
  super().__init__()
330
344
 
@@ -219,7 +219,7 @@ def discretize_categorical_values(
219
219
  _LOGGER.error(f"'categorical_info' is not a dictionary, or is empty.")
220
220
  raise ValueError()
221
221
 
222
- _, total_features = input_array.shape
222
+ _, total_features = working_array.shape
223
223
  for col_idx, cardinality in categorical_info.items():
224
224
  if not isinstance(col_idx, int):
225
225
  _LOGGER.error(f"Column index key {col_idx} is not an integer.")
@@ -85,7 +85,7 @@ def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_e
85
85
  return None
86
86
  else:
87
87
  if verbose:
88
- if isinstance(obj, _SIMPLE_TYPES):
88
+ if type(obj) in _SIMPLE_TYPES:
89
89
  _LOGGER.info(f"Object of type '{type(obj)}' saved to '{file_path}'")
90
90
  else:
91
91
  _LOGGER.info(f"Object '{obj}' saved to '{file_path}'")
@@ -140,7 +140,7 @@ def deserialize_object(
140
140
 
141
141
  if verbose:
142
142
  # log special objects
143
- if isinstance(obj, _SIMPLE_TYPES):
143
+ if type(obj) in _SIMPLE_TYPES:
144
144
  _LOGGER.info(f"Loaded object of type '{type(obj)}' from '{true_filepath}'.")
145
145
  else:
146
146
  _LOGGER.info(f"Loaded object '{obj}' from '{true_filepath}'.")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "13.3.0"
3
+ version = "13.3.2"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }