PyPI - dragon-ml-toolbox - Versions diffs - 13.3.0__tar.gz → 13.3.2__tar.gz - Mend

dragon-ml-toolbox 13.3.0tar.gz → 13.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show

{dragon_ml_toolbox-13.3.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-13.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 13.3.0
+Version: 13.3.2
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 13.3.0
+Version: 13.3.2
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/ML_models.py RENAMED Viewed

@@ -306,10 +306,10 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
     def __init__(self, *,
                  schema: FeatureSchema,
                  out_targets: int,
-                 embedding_dim: int = 32,
+                 embedding_dim: int = 256,
                  num_heads: int = 8,
                  num_layers: int = 6,
-                 dropout: float = 0.1):
+                 dropout: float = 0.2):
         """
         Args:
             schema (FeatureSchema):
@@ -317,14 +317,28 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
             out_targets (int):
                 Number of output targets (1 for regression).
             embedding_dim (int):
-                The dimension for all feature embeddings. Must be divisible
-                by num_heads.
+                The dimension for all feature embeddings. Must be divisible by num_heads. Common values: (64, 128, 192, 256, etc.)
             num_heads (int):
-                The number of heads in the multi-head attention mechanism.
+                The number of heads in the multi-head attention mechanism. Common values: (4, 8, 16)
             num_layers (int):
-                The number of sub-encoder-layers in the transformer encoder.
+                The number of sub-encoder-layers in the transformer encoder. Common values: (4, 8, 12)
             dropout (float):
                 The dropout value.
+        ## Note:
+        **Embedding Dimension:** "Width" of the model. It's the N-dimension vector that will be used to represent each one of the features.
+            - Each continuous feature gets its own learnable N-dimension vector.
+            - Each categorical feature gets an embedding table that maps every category (e.g., "color=red", "color=blue") to a unique N-dimension vector.
+        **Attention Heads:** Controls the "Multi-Head Attention" mechanism. Instead of looking at all the feature interactions at once, the model splits its attention into N parallel heads.
+            - Embedding Dimensions get divided by the number of Attention Heads, resulting in the dimensions assigned per head.
+        **Number of Layers:** "Depth" of the model. Number of identical `TransformerEncoderLayer` blocks that are stacked on top of each other.
+            - Layer 1: The attention heads find simple, direct interactions between the features.
+            - Layer 2: Takes the output of Layer 1 and finds interactions between those interactions and so on.
+            - Trade-off: More layers are more powerful but are slower to train and more prone to overfitting. If the training loss goes down but the validation loss goes up, you might have too many layers (or need more dropout).
         """
         super().__init__()

{dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/math_utilities.py RENAMED Viewed

@@ -219,7 +219,7 @@ def discretize_categorical_values(
         _LOGGER.error(f"'categorical_info' is not a dictionary, or is empty.")
         raise ValueError()
-    _, total_features = input_array.shape
+    _, total_features = working_array.shape
     for col_idx, cardinality in categorical_info.items():
         if not isinstance(col_idx, int):
              _LOGGER.error(f"Column index key {col_idx} is not an integer.")

{dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/ml_tools/serde.py RENAMED Viewed

@@ -85,7 +85,7 @@ def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_e
         return None
     else:
         if verbose:
-            if isinstance(obj, _SIMPLE_TYPES):
+            if type(obj) in _SIMPLE_TYPES:
                 _LOGGER.info(f"Object of type '{type(obj)}' saved to '{file_path}'")
             else:
                 _LOGGER.info(f"Object '{obj}' saved to '{file_path}'")
@@ -140,7 +140,7 @@ def deserialize_object(
         if verbose:
             # log special objects
-            if isinstance(obj, _SIMPLE_TYPES):
+            if type(obj) in _SIMPLE_TYPES:
                 _LOGGER.info(f"Loaded object of type '{type(obj)}' from '{true_filepath}'.")
             else:
                 _LOGGER.info(f"Loaded object '{obj}' from '{true_filepath}'.")

{dragon_ml_toolbox-13.3.0 → dragon_ml_toolbox-13.3.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "13.3.0"
+version = "13.3.2"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }