likelihood 1.5.7__tar.gz → 1.5.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {likelihood-1.5.7 → likelihood-1.5.8}/PKG-INFO +1 -1
  2. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/deep/autoencoders.py +50 -27
  3. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/deep/gan.py +3 -3
  4. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/deep/predictor.py +9 -8
  5. likelihood-1.5.8/likelihood/tools/cat_embed.py +213 -0
  6. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood.egg-info/PKG-INFO +1 -1
  7. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood.egg-info/SOURCES.txt +1 -0
  8. {likelihood-1.5.7 → likelihood-1.5.8}/LICENSE +0 -0
  9. {likelihood-1.5.7 → likelihood-1.5.8}/README.md +0 -0
  10. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/__init__.py +0 -0
  11. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/graph/__init__.py +0 -0
  12. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/graph/graph.py +0 -0
  13. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/graph/nn.py +0 -0
  14. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/main.py +0 -0
  15. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/__init__.py +0 -0
  16. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/deep/__init__.py +0 -0
  17. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/hmm.py +0 -0
  18. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/regression.py +0 -0
  19. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/simulation.py +0 -0
  20. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/utils.py +0 -0
  21. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/__init__.py +0 -0
  22. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/figures.py +0 -0
  23. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/impute.py +0 -0
  24. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/models_tools.py +0 -0
  25. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/numeric_tools.py +0 -0
  26. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/tools.py +0 -0
  27. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood.egg-info/dependency_links.txt +0 -0
  28. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood.egg-info/requires.txt +0 -0
  29. {likelihood-1.5.7 → likelihood-1.5.8}/likelihood.egg-info/top_level.txt +0 -0
  30. {likelihood-1.5.7 → likelihood-1.5.8}/setup.cfg +0 -0
  31. {likelihood-1.5.7 → likelihood-1.5.8}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: likelihood
3
- Version: 1.5.7
3
+ Version: 1.5.8
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -277,7 +277,8 @@ class AutoClassifier(tf.keras.Model):
277
277
  activation=self.activation,
278
278
  kernel_regularizer=l2(self.l2_reg),
279
279
  ),
280
- ]
280
+ ],
281
+ name="encoder",
281
282
  )
282
283
  if not self.encoder
283
284
  else self.encoder
@@ -296,7 +297,8 @@ class AutoClassifier(tf.keras.Model):
296
297
  activation=self.activation,
297
298
  kernel_regularizer=l2(self.l2_reg),
298
299
  ),
299
- ]
300
+ ],
301
+ name="decoder",
300
302
  )
301
303
  if not self.decoder
302
304
  else self.decoder
@@ -326,7 +328,7 @@ class AutoClassifier(tf.keras.Model):
326
328
  log_var = tf.keras.layers.Lambda(lambda x: x + 1e-7)(log_var)
327
329
 
328
330
  self.encoder = (
329
- tf.keras.Model(inputs, [mean, log_var], name="encoder")
331
+ tf.keras.Model(inputs, [mean, log_var], name="vae_encoder")
330
332
  if not self.encoder
331
333
  else self.encoder
332
334
  )
@@ -345,7 +347,8 @@ class AutoClassifier(tf.keras.Model):
345
347
  ),
346
348
  tf.keras.layers.BatchNormalization(),
347
349
  tf.keras.layers.Activation(self.activation),
348
- ]
350
+ ],
351
+ name="vae_decoder",
349
352
  )
350
353
  if not self.decoder
351
354
  else self.decoder
@@ -366,13 +369,7 @@ class AutoClassifier(tf.keras.Model):
366
369
  )
367
370
  if self.dropout:
368
371
  self.classifier.add(tf.keras.layers.Dropout(self.dropout))
369
- self.classifier.add(
370
- tf.keras.layers.Dense(
371
- units=self.num_classes,
372
- activation=self.classifier_activation,
373
- kernel_regularizer=l2(self.l2_reg),
374
- )
375
- )
372
+
376
373
  elif self.lora_mode:
377
374
  for _ in range(self.num_layers - 1):
378
375
  self.classifier.add(
@@ -381,21 +378,14 @@ class AutoClassifier(tf.keras.Model):
381
378
  self.classifier.add(tf.keras.layers.Activation(self.activation))
382
379
  if self.dropout:
383
380
  self.classifier.add(tf.keras.layers.Dropout(self.dropout))
384
- self.classifier.add(
385
- tf.keras.layers.Dense(
386
- units=self.num_classes,
387
- activation=self.classifier_activation,
388
- kernel_regularizer=l2(self.l2_reg),
389
- )
390
- )
391
- else:
392
- self.classifier.add(
393
- tf.keras.layers.Dense(
394
- units=self.num_classes,
395
- activation=self.classifier_activation,
396
- kernel_regularizer=l2(self.l2_reg),
397
- )
381
+
382
+ self.classifier.add(
383
+ tf.keras.layers.Dense(
384
+ units=self.num_classes,
385
+ activation=self.classifier_activation,
386
+ kernel_regularizer=l2(self.l2_reg),
398
387
  )
388
+ )
399
389
 
400
390
  def train_encoder_decoder(
401
391
  self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
@@ -610,6 +600,13 @@ def call_existing_code(
610
600
  num_layers : `int`
611
601
  The number of hidden layers in the classifier. Default is 1.
612
602
 
603
+ Keyword Arguments:
604
+ ----------
605
+ vae_mode : `bool`
606
+ Whether to use variational autoencoder mode. Default is False.
607
+ vae_units : `int`
608
+ The number of units in the variational autoencoder. Default is 2.
609
+
613
610
  Returns
614
611
  -------
615
612
  `AutoClassifier`
@@ -617,6 +614,8 @@ def call_existing_code(
617
614
  """
618
615
  dropout = kwargs.get("dropout", None)
619
616
  l2_reg = kwargs.get("l2_reg", 0.0)
617
+ vae_mode = kwargs.get("vae_mode", False)
618
+ vae_units = kwargs.get("vae_units", 2)
620
619
  model = AutoClassifier(
621
620
  input_shape_parm=input_shape_parm,
622
621
  num_classes=num_classes,
@@ -625,6 +624,8 @@ def call_existing_code(
625
624
  num_layers=num_layers,
626
625
  dropout=dropout,
627
626
  l2_reg=l2_reg,
627
+ vae_mode=vae_mode,
628
+ vae_units=vae_units,
628
629
  )
629
630
  model.compile(
630
631
  optimizer=optimizer,
@@ -731,6 +732,24 @@ def build_model(
731
732
  else hyperparameters["l2_reg"]
732
733
  )
733
734
  )
735
+ vae_mode = (
736
+ hp.Choice("vae_mode", [True, False])
737
+ if "vae_mode" not in hyperparameters_keys
738
+ else hyperparameters["vae_mode"]
739
+ )
740
+
741
+ try:
742
+ vae_units = (
743
+ hp.Int("vae_units", min_value=2, max_value=10, step=1)
744
+ if ("vae_units" not in hyperparameters_keys) and vae_mode
745
+ else (
746
+ hp.Choice("vae_units", hyperparameters["vae_units"])
747
+ if isinstance(hyperparameters["vae_units"], list)
748
+ else hyperparameters["vae_units"]
749
+ )
750
+ )
751
+ except KeyError:
752
+ vae_units = None
734
753
 
735
754
  model = call_existing_code(
736
755
  units=units,
@@ -742,6 +761,8 @@ def build_model(
742
761
  num_layers=num_layers,
743
762
  dropout=dropout,
744
763
  l2_reg=l2_reg,
764
+ vae_mode=vae_mode,
765
+ vae_units=vae_units,
745
766
  )
746
767
  return model
747
768
 
@@ -876,6 +897,8 @@ def setup_model(
876
897
  tuner.results_summary()
877
898
  else:
878
899
  best_model = tf.keras.models.load_model(filepath)
879
-
880
900
  best_hps = tuner.get_best_hyperparameters(1)[0].values
881
- return best_model, pd.DataFrame(best_hps, index=["Value"])
901
+ vae_mode = best_hps.get("vae_mode", hyperparameters.get("vae_mode", False))
902
+ best_hps["vae_units"] = None if not vae_mode else best_hps["vae_units"]
903
+
904
+ return best_model, pd.DataFrame(best_hps, index=["Value"]).dropna(axis=1)
@@ -41,7 +41,7 @@ class GANRegressor(tf.keras.Model):
41
41
  self.build(dummy_input.shape)
42
42
 
43
43
  def build(self, input_shape):
44
- self.gan = tf.keras.models.Sequential([self.generator, self.discriminator])
44
+ self.gan = tf.keras.models.Sequential([self.generator, self.discriminator], name="gan")
45
45
 
46
46
  self.generator.compile(
47
47
  optimizer=self.optimizer,
@@ -57,7 +57,7 @@ class GANRegressor(tf.keras.Model):
57
57
  super(GANRegressor, self).build(input_shape)
58
58
 
59
59
  def _build_generator(self):
60
- generator = tf.keras.Sequential()
60
+ generator = tf.keras.Sequential(name="generator")
61
61
  generator.add(
62
62
  tf.keras.layers.Dense(
63
63
  self.num_neurons,
@@ -78,7 +78,7 @@ class GANRegressor(tf.keras.Model):
78
78
  return generator
79
79
 
80
80
  def _build_discriminator(self):
81
- discriminator = tf.keras.Sequential()
81
+ discriminator = tf.keras.Sequential(name="discriminator")
82
82
  for _ in range(self.depth):
83
83
  discriminator.add(
84
84
  tf.keras.layers.Dense(
@@ -109,15 +109,16 @@ class GetInsights:
109
109
  "in the model's transformation.</p>"
110
110
  )
111
111
  )
112
- self.viz_encoder_decoder_graphs(threshold_factor=threshold_factor, top_k=top_k)
113
-
114
- display(HTML("<h2 style='margin-top:30px;'>🧠 Classifier Layer Graphs</h2>"))
115
- display(
116
- HTML(
117
- "<p>This visualization shows how features propagate through each dense layer in the classifier. "
118
- "Only the strongest weighted connections are shown to highlight influential paths through the network.</p>"
112
+ if not self.model.encoder.name.startswith("vae"):
113
+ self.viz_encoder_decoder_graphs(threshold_factor=threshold_factor, top_k=top_k)
114
+
115
+ display(HTML("<h2 style='margin-top:30px;'>🧠 Classifier Layer Graphs</h2>"))
116
+ display(
117
+ HTML(
118
+ "<p>This visualization shows how features propagate through each dense layer in the classifier. "
119
+ "Only the strongest weighted connections are shown to highlight influential paths through the network.</p>"
120
+ )
119
121
  )
120
- )
121
122
  self.viz_classifier_graphs(threshold_factor=threshold_factor, top_k=top_k)
122
123
 
123
124
  display(HTML("<h2 style='margin-top:30px;'>📈 Statistical Summary</h2>"))
@@ -0,0 +1,213 @@
1
+ import logging
2
+ import os
3
+ from typing import List
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
9
+ logging.getLogger("tensorflow").setLevel(logging.ERROR)
10
+ import tensorflow as tf
11
+ from pandas.core.frame import DataFrame
12
+ from sklearn.preprocessing import LabelEncoder
13
+
14
+ tf.get_logger().setLevel("ERROR")
15
+
16
+
17
+ class CategoricalEmbedder:
18
+ def __init__(self, embedding_dim=32):
19
+ self.embedding_dim = embedding_dim
20
+ self.label_encoders = {}
21
+ self.embeddings = {}
22
+
23
+ def fit(self, df: DataFrame, categorical_cols: List):
24
+ """
25
+ Fit the embeddings on the given data.
26
+
27
+ Parameters
28
+ ----------
29
+ df : `DataFrame`
30
+ Pandas DataFrame containing the tabular data.
31
+ categorical_cols : `List`
32
+ List of column names representing categorical features.
33
+
34
+ Returns
35
+ -------
36
+ `None`
37
+ """
38
+ df_processed = df.copy()
39
+ for col in categorical_cols:
40
+ if col not in df_processed.columns:
41
+ raise ValueError(f"Column {col} not found in DataFrame")
42
+
43
+ for col in categorical_cols:
44
+ mode_val = df_processed[col].mode()
45
+ if not mode_val.empty:
46
+ df_processed[col] = df_processed[col].fillna(mode_val[0])
47
+
48
+ for col in categorical_cols:
49
+ le = LabelEncoder()
50
+ df_processed[col] = le.fit_transform(df_processed[col])
51
+ self.label_encoders[col] = le
52
+
53
+ vocab_size = len(le.classes_)
54
+ embedding_matrix = np.random.rand(vocab_size, self.embedding_dim)
55
+ self.embeddings[col] = tf.Variable(embedding_matrix, dtype=tf.float32)
56
+
57
+ def transform(self, df: DataFrame, categorical_cols: List[str]):
58
+ """
59
+ Transform the data using the fitted embeddings.
60
+
61
+ Parameters
62
+ ----------
63
+ df : `DataFrame`
64
+ Pandas DataFrame containing the tabular data.
65
+ categorical_cols : `List[str]`
66
+ List of column names representing categorical features.
67
+
68
+ Returns
69
+ -------
70
+ Transformed Pandas DataFrame with original columns except `categorical_cols` replaced by their embedding representations.
71
+ """
72
+
73
+ df_processed = df.copy()
74
+
75
+ for col in categorical_cols:
76
+ if col not in self.label_encoders:
77
+ raise ValueError(
78
+ f"Column {col} has not been fitted. Please call fit() on this column first."
79
+ )
80
+ mode_val = df_processed[col].mode()
81
+ if not mode_val.empty:
82
+ df_processed[col] = df_processed[col].fillna(mode_val[0])
83
+ le = self.label_encoders[col]
84
+ df_processed[col] = le.transform(df_processed[col])
85
+
86
+ for col in categorical_cols:
87
+ indices_tensor = tf.constant(df_processed[col], dtype=tf.int32)
88
+ embedding_layer = tf.nn.embedding_lookup(
89
+ params=self.embeddings[col], ids=indices_tensor
90
+ )
91
+ if len(embedding_layer.shape) == 1:
92
+ embedding_layer = tf.expand_dims(embedding_layer, axis=0)
93
+
94
+ for i in range(self.embedding_dim):
95
+ df_processed[f"{col}_embed_{i}"] = embedding_layer[:, i]
96
+ df_processed.drop(columns=[col], inplace=True)
97
+
98
+ return df_processed
99
+
100
+ def inverse_transform(self, df: pd.DataFrame, categorical_cols: List[str]):
101
+ """
102
+ Inverse transform the data using the fitted embeddings.
103
+
104
+ Parameters
105
+ ----------
106
+ df : `DataFrame`
107
+ Pandas DataFrame containing the tabular data with embedded representations.
108
+ categorical_cols : `List[str]`
109
+ List of column names representing categorical features.
110
+
111
+ Returns
112
+ -------
113
+ Transformed Pandas DataFrame with original columns replaced by their categorical labels.
114
+ """
115
+
116
+ df_processed = df.copy()
117
+
118
+ for col in categorical_cols:
119
+ if col not in self.label_encoders:
120
+ raise ValueError(
121
+ f"Column {col} has not been fitted. Please call fit() on this column first."
122
+ )
123
+
124
+ embedding_matrix = self.embeddings[col].numpy()
125
+ label_encoder = self.label_encoders[col]
126
+
127
+ embedded_columns = [f"{col}_embed_{i}" for i in range(self.embedding_dim)]
128
+ embeddings = df_processed[embedded_columns].values
129
+
130
+ distances = np.linalg.norm(embedding_matrix - embeddings[:, np.newaxis], axis=2)
131
+ original_indices = np.argmin(distances, axis=1)
132
+ original_labels = label_encoder.inverse_transform(original_indices)
133
+
134
+ df_processed[col] = original_labels
135
+ df_processed.drop(columns=embedded_columns, inplace=True)
136
+
137
+ return df_processed
138
+
139
+ def save_embeddings(self, path: str):
140
+ """
141
+ Save the embeddings to a directory.
142
+
143
+ Parameters
144
+ ----------
145
+ path : `str`
146
+ Path to the directory where embeddings will be saved.
147
+ """
148
+
149
+ os.makedirs(path, exist_ok=True)
150
+ for col, embedding in self.embeddings.items():
151
+ np.save(os.path.join(path, f"{col}_embedding.npy"), embedding.numpy())
152
+
153
+ def load_embeddings(self, path: str):
154
+ """
155
+ Load the embeddings from a directory.
156
+
157
+ Parameters
158
+ ----------
159
+ path : `str`
160
+ Path to the directory where embeddings are saved.
161
+ """
162
+
163
+ for col in self.label_encoders.keys():
164
+ embedding_path = os.path.join(path, f"{col}_embedding.npy")
165
+ if not os.path.exists(embedding_path):
166
+ raise FileNotFoundError(f"Embedding file {embedding_path} not found.")
167
+ embedding_matrix = np.load(embedding_path)
168
+ self.embeddings[col] = tf.Variable(embedding_matrix, dtype=tf.float32)
169
+
170
+
171
+ if __name__ == "__main__":
172
+ data = {
173
+ "color": ["red", "blue", None, "green", "blue"],
174
+ "size": ["S", "M", "XL", "XS", None],
175
+ "price": [10.99, 25.50, 30.00, 8.75, 12.25],
176
+ }
177
+ df = pd.DataFrame(data)
178
+
179
+ # Initialize the embedder
180
+ embedder = CategoricalEmbedder(embedding_dim=3)
181
+
182
+ # Fit the embeddings on the data
183
+ embedder.fit(df, categorical_cols=["color", "size"])
184
+
185
+ # Transform the data using the fitted embeddings
186
+ processed_df = embedder.transform(df, categorical_cols=["color", "size"])
187
+
188
+ print("Processed DataFrame:")
189
+ print(processed_df.head())
190
+
191
+ # Save the embeddings to disk
192
+ embedder.save_embeddings("./embeddings")
193
+
194
+ # Load the embeddings from disk
195
+ new_embedder = CategoricalEmbedder(embedding_dim=3)
196
+ new_embedder.label_encoders = (
197
+ embedder.label_encoders
198
+ ) # Assuming label encodings are consistent across runs
199
+ new_embedder.load_embeddings("./embeddings")
200
+
201
+ # Transform the data using the loaded embeddings
202
+ processed_df_loaded = new_embedder.transform(df, categorical_cols=["color", "size"])
203
+ print("\nProcessed DataFrame with Loaded Embeddings:")
204
+ print(processed_df_loaded.head())
205
+
206
+ # Inverse transform the data
207
+ df_loaded = new_embedder.inverse_transform(
208
+ processed_df_loaded, categorical_cols=["color", "size"]
209
+ )
210
+ print("\nOriginal DataFrame:")
211
+ print(df.head())
212
+ print("\nProcessed DataFrame with Inverse Transform:")
213
+ print(df_loaded.head())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: likelihood
3
- Version: 1.5.7
3
+ Version: 1.5.8
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -21,6 +21,7 @@ likelihood/models/deep/autoencoders.py
21
21
  likelihood/models/deep/gan.py
22
22
  likelihood/models/deep/predictor.py
23
23
  likelihood/tools/__init__.py
24
+ likelihood/tools/cat_embed.py
24
25
  likelihood/tools/figures.py
25
26
  likelihood/tools/impute.py
26
27
  likelihood/tools/models_tools.py
File without changes
File without changes
File without changes
File without changes