dragon-ml-toolbox 13.3.0__py3-none-any.whl → 13.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 13.3.0
3
+ Version: 13.3.2
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-13.3.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-13.3.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
1
+ dragon_ml_toolbox-13.3.2.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-13.3.2.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
3
3
  ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
4
4
  ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
5
5
  ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
@@ -9,7 +9,7 @@ ml_tools/ML_datasetmaster.py,sha256=7QJnOM6GWFklKt2fiukITM3DK49i3ThK8wazb5szwpE,
9
9
  ml_tools/ML_evaluation.py,sha256=3u5dOhS77gn3kAshKr2GwSa5xZBF0YM77ZkFevqNPvA,18528
10
10
  ml_tools/ML_evaluation_multi.py,sha256=L6Ub_uObXsI7ToVCF6DtmAFekHRcga5wWMOnRYRR-BY,16121
11
11
  ml_tools/ML_inference.py,sha256=yq2gdN6s_OUYC5ZLQrIJC5BA5H33q8UKODXwb-_0M2c,23549
12
- ml_tools/ML_models.py,sha256=4Kb23pSusPMRH8h-R9ztK6JoH1lMuckxq7ihorll-H8,29965
12
+ ml_tools/ML_models.py,sha256=UVWJHPLVIvFno_csCHH1FwBfTwQ5nX0V8F1TbOByZ4I,31388
13
13
  ml_tools/ML_optimization.py,sha256=P0zkhKAwTpkorIBtR0AOIDcyexo5ngmvFUzo3DfNO-E,22692
14
14
  ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
15
15
  ml_tools/ML_trainer.py,sha256=9BP6JFClqGfe7GL-FGG3n5e-no9ssjEOLol7P6baGrI,29019
@@ -30,12 +30,12 @@ ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGT
30
30
  ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
31
31
  ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
32
32
  ml_tools/keys.py,sha256=oykUVLB4Wos3AZomowjtI8AFFC5xnMUH-icNHydRpOk,2275
33
- ml_tools/math_utilities.py,sha256=PxoOrnuj6Ntp7_TJqyDWi0JX03WpAO5iaFNK2Oeq5I4,8800
33
+ ml_tools/math_utilities.py,sha256=xeKq1quR_3DYLgowcp4Uam_4s3JltUyOnqMOGuAiYWU,8802
34
34
  ml_tools/optimization_tools.py,sha256=TYFQ2nSnp7xxs-VyoZISWgnGJghFbsWasHjruegyJRs,12763
35
35
  ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
36
- ml_tools/serde.py,sha256=CmdJmQCPdrm2RQA1hWLsGxU_B3aClQoQ9B4vcQtIrEs,6951
36
+ ml_tools/serde.py,sha256=c8uDYjYry_VrLvoG4ixqDj5pij88lVn6Tu4NHcPkwDU,6943
37
37
  ml_tools/utilities.py,sha256=OcAyV1tEcYAfOWlGjRgopsjDLxU3DcI5EynzvWV4q3A,15754
38
- dragon_ml_toolbox-13.3.0.dist-info/METADATA,sha256=m2RVQa8YeN6e4hnsg6TwAMjymhTrburFXbmw-yB8JeQ,6166
39
- dragon_ml_toolbox-13.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- dragon_ml_toolbox-13.3.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
- dragon_ml_toolbox-13.3.0.dist-info/RECORD,,
38
+ dragon_ml_toolbox-13.3.2.dist-info/METADATA,sha256=RMnB45xVa4W8DibE8KTKn-Au62avG72w_ujDIsWnZBM,6166
39
+ dragon_ml_toolbox-13.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ dragon_ml_toolbox-13.3.2.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
+ dragon_ml_toolbox-13.3.2.dist-info/RECORD,,
ml_tools/ML_models.py CHANGED
@@ -306,10 +306,10 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
306
306
  def __init__(self, *,
307
307
  schema: FeatureSchema,
308
308
  out_targets: int,
309
- embedding_dim: int = 32,
309
+ embedding_dim: int = 256,
310
310
  num_heads: int = 8,
311
311
  num_layers: int = 6,
312
- dropout: float = 0.1):
312
+ dropout: float = 0.2):
313
313
  """
314
314
  Args:
315
315
  schema (FeatureSchema):
@@ -317,14 +317,28 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
317
317
  out_targets (int):
318
318
  Number of output targets (1 for regression).
319
319
  embedding_dim (int):
320
- The dimension for all feature embeddings. Must be divisible
321
- by num_heads.
320
+ The dimension for all feature embeddings. Must be divisible by num_heads. Common values: (64, 128, 192, 256, etc.)
322
321
  num_heads (int):
323
- The number of heads in the multi-head attention mechanism.
322
+ The number of heads in the multi-head attention mechanism. Common values: (4, 8, 16)
324
323
  num_layers (int):
325
- The number of sub-encoder-layers in the transformer encoder.
324
+ The number of sub-encoder-layers in the transformer encoder. Common values: (4, 8, 12)
326
325
  dropout (float):
327
326
  The dropout value.
327
+
328
+ ## Note:
329
+
330
+ **Embedding Dimension:** "Width" of the model. It's the N-dimension vector that will be used to represent each one of the features.
331
+ - Each continuous feature gets its own learnable N-dimension vector.
332
+ - Each categorical feature gets an embedding table that maps every category (e.g., "color=red", "color=blue") to a unique N-dimension vector.
333
+
334
+ **Attention Heads:** Controls the "Multi-Head Attention" mechanism. Instead of looking at all the feature interactions at once, the model splits its attention into N parallel heads.
335
+ - Embedding Dimensions get divided by the number of Attention Heads, resulting in the dimensions assigned per head.
336
+
337
+ **Number of Layers:** "Depth" of the model. Number of identical `TransformerEncoderLayer` blocks that are stacked on top of each other.
338
+ - Layer 1: The attention heads find simple, direct interactions between the features.
339
+ - Layer 2: Takes the output of Layer 1 and finds interactions between those interactions and so on.
340
+ - Trade-off: More layers are more powerful but are slower to train and more prone to overfitting. If the training loss goes down but the validation loss goes up, you might have too many layers (or need more dropout).
341
+
328
342
  """
329
343
  super().__init__()
330
344
 
@@ -219,7 +219,7 @@ def discretize_categorical_values(
219
219
  _LOGGER.error(f"'categorical_info' is not a dictionary, or is empty.")
220
220
  raise ValueError()
221
221
 
222
- _, total_features = input_array.shape
222
+ _, total_features = working_array.shape
223
223
  for col_idx, cardinality in categorical_info.items():
224
224
  if not isinstance(col_idx, int):
225
225
  _LOGGER.error(f"Column index key {col_idx} is not an integer.")
ml_tools/serde.py CHANGED
@@ -85,7 +85,7 @@ def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_e
85
85
  return None
86
86
  else:
87
87
  if verbose:
88
- if isinstance(obj, _SIMPLE_TYPES):
88
+ if type(obj) in _SIMPLE_TYPES:
89
89
  _LOGGER.info(f"Object of type '{type(obj)}' saved to '{file_path}'")
90
90
  else:
91
91
  _LOGGER.info(f"Object '{obj}' saved to '{file_path}'")
@@ -140,7 +140,7 @@ def deserialize_object(
140
140
 
141
141
  if verbose:
142
142
  # log special objects
143
- if isinstance(obj, _SIMPLE_TYPES):
143
+ if type(obj) in _SIMPLE_TYPES:
144
144
  _LOGGER.info(f"Loaded object of type '{type(obj)}' from '{true_filepath}'.")
145
145
  else:
146
146
  _LOGGER.info(f"Loaded object '{obj}' from '{true_filepath}'.")