likelihood 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,9 +19,9 @@ from functools import wraps
19
19
 
20
20
  import keras_tuner
21
21
  import tensorflow as tf
22
- from keras.src.engine.input_layer import InputLayer
23
22
  from pandas.core.frame import DataFrame
24
23
  from sklearn.manifold import TSNE
24
+ from tensorflow.keras.layers import InputLayer
25
25
  from tensorflow.keras.regularizers import l2
26
26
 
27
27
  from likelihood.tools import OneHotEncoder
@@ -39,53 +39,227 @@ def suppress_warnings(func):
39
39
  return wrapper
40
40
 
41
41
 
42
+ class EarlyStopping:
43
+ def __init__(self, patience=10, min_delta=0.001):
44
+ self.patience = patience
45
+ self.min_delta = min_delta
46
+ self.best_loss = np.inf
47
+ self.counter = 0
48
+ self.stop_training = False
49
+
50
+ def __call__(self, current_loss):
51
+ if self.best_loss - current_loss > self.min_delta:
52
+ self.best_loss = current_loss
53
+ self.counter = 0
54
+ else:
55
+ self.counter += 1
56
+
57
+ if self.counter >= self.patience:
58
+ self.stop_training = True
59
+
60
+
61
+ def mse_loss(y_true, y_pred):
62
+ """
63
+ Mean squared error loss function.
64
+
65
+ Parameters
66
+ ----------
67
+ y_true : `tf.Tensor`
68
+ The true values.
69
+ y_pred : `tf.Tensor`
70
+ The predicted values.
71
+
72
+ Returns
73
+ -------
74
+ `tf.Tensor`
75
+ """
76
+ return tf.reduce_mean(tf.square(y_true - y_pred))
77
+
78
+
79
+ def kl_loss(mean, log_var):
80
+ """
81
+ Kullback-Leibler divergence loss function.
82
+
83
+ Parameters
84
+ ----------
85
+ mean : `tf.Tensor`
86
+ The mean of the distribution.
87
+ log_var : `tf.Tensor`
88
+ The log variance of the distribution.
89
+
90
+ Returns
91
+ -------
92
+ `tf.Tensor`
93
+ """
94
+ return -0.5 * tf.reduce_mean(1 + log_var - tf.square(mean) - tf.exp(log_var))
95
+
96
+
97
+ def vae_loss(y_true, y_pred, mean, log_var):
98
+ """
99
+ Variational autoencoder loss function.
100
+
101
+ Parameters
102
+ ----------
103
+ y_true : `tf.Tensor`
104
+ The true values.
105
+ y_pred : `tf.Tensor`
106
+ The predicted values.
107
+ mean : `tf.Tensor`
108
+ The mean of the distribution.
109
+ log_var : `tf.Tensor`
110
+ The log variance of the distribution.
111
+
112
+ Returns
113
+ -------
114
+ `tf.Tensor`
115
+ """
116
+ return mse_loss(y_true, y_pred) + kl_loss(mean, log_var)
117
+
118
+
119
+ def sampling(mean, log_var, epsilon_value=1e-8):
120
+ """
121
+ Samples from the distribution.
122
+
123
+ Parameters
124
+ ----------
125
+ mean : `tf.Tensor`
126
+ The mean of the distribution.
127
+ log_var : `tf.Tensor`
128
+ The log variance of the distribution.
129
+ epsilon_value : float
130
+ A small value to avoid numerical instability.
131
+
132
+ Returns
133
+ -------
134
+ `tf.Tensor`
135
+ """
136
+ epsilon = tf.random.normal(shape=tf.shape(mean), mean=0.0, stddev=1.0)
137
+ stddev = tf.exp(0.5 * log_var) + epsilon_value
138
+ epsilon = tf.random.normal(shape=tf.shape(mean), mean=0.0, stddev=1.0)
139
+ return mean + stddev * epsilon
140
+
141
+
142
+ def check_for_nans(tensors, name="Tensor"):
143
+ for t in tensors:
144
+ if tf.reduce_any(tf.math.is_nan(t)) or tf.reduce_any(tf.math.is_inf(t)):
145
+ print(f"Warning: {name} contains NaNs or Infs")
146
+ return True
147
+ return False
148
+
149
+
150
+ def cal_loss_step(batch, encoder, decoder, vae_mode=False, training=True):
151
+ """
152
+ Calculates the loss value on a batch of data.
153
+
154
+ Parameters
155
+ ----------
156
+ batch : `tf.Tensor`
157
+ The batch of data.
158
+ encoder : `tf.keras.Model`
159
+ The encoder model.
160
+ decoder : `tf.keras.Model`
161
+ The decoder model.
162
+ optimizer : `tf.keras.optimizers.Optimizer`
163
+ The optimizer to use.
164
+ vae_mode : `bool`
165
+ Whether to use variational autoencoder mode. Default is False.
166
+ training : `bool`
167
+ Whether the model is in training mode. Default is True.
168
+
169
+ Returns
170
+ -------
171
+ `tf.Tensor`
172
+ The loss value.
173
+ """
174
+ if vae_mode:
175
+ mean, log_var = encoder(batch, training=training)
176
+ log_var = tf.clip_by_value(log_var, clip_value_min=1e-8, clip_value_max=tf.float32.max)
177
+ decoded = decoder(sampling(mean, log_var), training=training)
178
+ loss = vae_loss(batch, decoded, mean, log_var)
179
+ else:
180
+ encoded = encoder(batch, training=training)
181
+ decoded = decoder(encoded, training=training)
182
+ loss = mse_loss(batch, decoded)
183
+
184
+ return loss
185
+
186
+
187
+ @tf.function
188
+ def train_step(batch, encoder, decoder, optimizer, vae_mode=False):
189
+ """
190
+ Trains the model on a batch of data.
191
+
192
+ Parameters
193
+ ----------
194
+ mean : `tf.Tensor`
195
+ The mean of the distribution.
196
+ log_var : `tf.Tensor`
197
+ The log variance of the distribution.
198
+ batch : `tf.Tensor`
199
+ The batch of data.
200
+ encoder : `tf.keras.Model`
201
+ The encoder model.
202
+ decoder : `tf.keras.Model`
203
+ The decoder model.
204
+ optimizer : `tf.keras.optimizers.Optimizer`
205
+ The optimizer to use.
206
+ vae_mode : `bool`
207
+ Whether to use variational autoencoder mode. Default is False.
208
+
209
+ Returns
210
+ -------
211
+ `tf.Tensor`
212
+ The loss value.
213
+ """
214
+ optimizer.build(encoder.trainable_variables + decoder.trainable_variables)
215
+
216
+ with tf.GradientTape() as encoder_tape, tf.GradientTape() as decoder_tape:
217
+ loss = cal_loss_step(batch, encoder, decoder, vae_mode=vae_mode)
218
+
219
+ gradients_of_encoder = encoder_tape.gradient(loss, encoder.trainable_variables)
220
+ gradients_of_decoder = decoder_tape.gradient(loss, decoder.trainable_variables)
221
+
222
+ optimizer.apply_gradients(zip(gradients_of_encoder, encoder.trainable_variables))
223
+ optimizer.apply_gradients(zip(gradients_of_decoder, decoder.trainable_variables))
224
+
225
+ return loss
226
+
227
+
42
228
  @tf.keras.utils.register_keras_serializable(package="Custom", name="AutoClassifier")
43
229
  class AutoClassifier(tf.keras.Model):
44
230
  """
45
231
  An auto-classifier model that automatically determines the best classification strategy based on the input data.
46
232
 
47
- Attributes:
48
- - input_shape_parm: The shape of the input data.
49
- - num_classes: The number of classes in the dataset.
50
- - units: The number of neurons in each hidden layer.
51
- - activation: The type of activation function to use for the neural network layers.
52
-
53
- Methods:
54
- __init__(self, input_shape_parm, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
55
- build(self, input_shape_parm): Builds the model architecture based on input_shape_parm.
56
- call(self, x): Defines the forward pass of the model.
57
- get_config(self): Returns the configuration of the model.
58
- from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
59
- """
60
-
61
- def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
62
- """
63
- Initializes an AutoClassifier instance with the given parameters.
233
+ Parameters
234
+ ----------
235
+ input_shape_parm : `int`
236
+ The shape of the input data.
237
+ num_classes : `int`
238
+ The number of classes in the dataset.
239
+ units : `int`
240
+ The number of neurons in each hidden layer.
241
+ activation : `str`
242
+ The type of activation function to use for the neural network layers.
64
243
 
65
- Parameters
66
- ----------
67
- input_shape_parm : `int`
68
- The shape of the input data.
69
- num_classes : `int`
70
- The number of classes in the dataset.
71
- units : `int`
72
- The number of neurons in each hidden layer.
73
- activation : `str`
74
- The type of activation function to use for the neural network layers.
244
+ Keyword Arguments:
245
+ ----------
246
+ Additional keyword arguments to pass to the model.
75
247
 
76
- Keyword Arguments:
77
- ----------
78
- Additional keyword arguments to pass to the model.
248
+ classifier_activation : `str`
249
+ The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
250
+ num_layers : `int`
251
+ The number of hidden layers in the classifier. Default is 1.
252
+ dropout : `float`
253
+ The dropout rate to use in the classifier. Default is None.
254
+ l2_reg : `float`
255
+ The L2 regularization parameter. Default is 0.0.
256
+ vae_mode : `bool`
257
+ Whether to use variational autoencoder mode. Default is False.
258
+ vae_units : `int`
259
+ The number of units in the variational autoencoder. Default is 2.
260
+ """
79
261
 
80
- classifier_activation : `str`
81
- The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
82
- num_layers : `int`
83
- The number of hidden layers in the classifier. Default is 1.
84
- dropout : `float`
85
- The dropout rate to use in the classifier. Default is None.
86
- l2_reg : `float`
87
- The L2 regularization parameter. Default is 0.0.
88
- """
262
+ def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
89
263
  super(AutoClassifier, self).__init__()
90
264
  self.input_shape_parm = input_shape_parm
91
265
  self.num_classes = num_classes
@@ -99,9 +273,10 @@ class AutoClassifier(tf.keras.Model):
99
273
  self.num_layers = kwargs.get("num_layers", 1)
100
274
  self.dropout = kwargs.get("dropout", None)
101
275
  self.l2_reg = kwargs.get("l2_reg", 0.0)
276
+ self.vae_mode = kwargs.get("vae_mode", False)
277
+ self.vae_units = kwargs.get("vae_units", 2)
102
278
 
103
- def build(self, input_shape):
104
- # Encoder with L2 regularization
279
+ def build_encoder_decoder(self, input_shape):
105
280
  self.encoder = (
106
281
  tf.keras.Sequential(
107
282
  [
@@ -121,7 +296,6 @@ class AutoClassifier(tf.keras.Model):
121
296
  else self.encoder
122
297
  )
123
298
 
124
- # Decoder with L2 regularization
125
299
  self.decoder = (
126
300
  tf.keras.Sequential(
127
301
  [
@@ -141,6 +315,58 @@ class AutoClassifier(tf.keras.Model):
141
315
  else self.decoder
142
316
  )
143
317
 
318
+ def build(self, input_shape):
319
+ if self.vae_mode:
320
+ inputs = tf.keras.Input(shape=self.input_shape_parm, name="encoder_input")
321
+ x = tf.keras.layers.Dense(
322
+ units=self.units,
323
+ kernel_regularizer=l2(self.l2_reg),
324
+ kernel_initializer="he_normal",
325
+ )(inputs)
326
+ x = tf.keras.layers.BatchNormalization()(x)
327
+ x = tf.keras.layers.Activation(self.activation)(x)
328
+ x = tf.keras.layers.Dense(
329
+ units=int(self.units / 2),
330
+ kernel_regularizer=l2(self.l2_reg),
331
+ kernel_initializer="he_normal",
332
+ name="encoder_hidden",
333
+ )(x)
334
+ x = tf.keras.layers.BatchNormalization()(x)
335
+ x = tf.keras.layers.Activation(self.activation)(x)
336
+
337
+ mean = tf.keras.layers.Dense(2, name="mean")(x)
338
+ log_var = tf.keras.layers.Dense(2, name="log_var")(x)
339
+ log_var = tf.keras.layers.Lambda(lambda x: x + 1e-7)(log_var)
340
+
341
+ self.encoder = (
342
+ tf.keras.Model(inputs, [mean, log_var], name="encoder")
343
+ if not self.encoder
344
+ else self.encoder
345
+ )
346
+ self.decoder = (
347
+ tf.keras.Sequential(
348
+ [
349
+ tf.keras.layers.Dense(
350
+ units=self.units,
351
+ kernel_regularizer=l2(self.l2_reg),
352
+ ),
353
+ tf.keras.layers.BatchNormalization(),
354
+ tf.keras.layers.Activation(self.activation),
355
+ tf.keras.layers.Dense(
356
+ units=self.input_shape_parm,
357
+ kernel_regularizer=l2(self.l2_reg),
358
+ ),
359
+ tf.keras.layers.BatchNormalization(),
360
+ tf.keras.layers.Activation(self.activation),
361
+ ]
362
+ )
363
+ if not self.decoder
364
+ else self.decoder
365
+ )
366
+
367
+ else:
368
+ self.build_encoder_decoder(input_shape)
369
+
144
370
  # Classifier with L2 regularization
145
371
  self.classifier = tf.keras.Sequential()
146
372
  if self.num_layers > 1:
@@ -162,8 +388,75 @@ class AutoClassifier(tf.keras.Model):
162
388
  )
163
389
  )
164
390
 
391
+ def train_encoder_decoder(
392
+ self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
393
+ ):
394
+ """
395
+ Trains the encoder and decoder on the input data.
396
+
397
+ Parameters
398
+ ----------
399
+ data : `tf.data.Dataset`, `np.ndarray`
400
+ The input data.
401
+ epochs : `int`
402
+ The number of epochs to train for.
403
+ batch_size : `int`
404
+ The batch size to use.
405
+ validation_split : `float`
406
+ The proportion of the dataset to use for validation. Default is 0.2.
407
+ patience : `int`
408
+ The number of epochs to wait before early stopping. Default is 10.
409
+
410
+ Keyword Arguments:
411
+ ----------
412
+ Additional keyword arguments to pass to the model.
413
+ """
414
+ verbose = kwargs.get("verbose", True)
415
+ optimizer = kwargs.get("optimizer", tf.keras.optimizers.Adam())
416
+ dummy_input = tf.convert_to_tensor(tf.random.normal([1, self.input_shape_parm]))
417
+ self.build(dummy_input.shape)
418
+ if not self.vae_mode:
419
+ dummy_output = self.encoder(dummy_input)
420
+ self.decoder(dummy_output)
421
+ else:
422
+ mean, log_var = self.encoder(dummy_input)
423
+ dummy_output = sampling(mean, log_var)
424
+ self.decoder(dummy_output)
425
+
426
+ if isinstance(data, np.ndarray):
427
+ data = tf.data.Dataset.from_tensor_slices(data).batch(batch_size)
428
+ data = data.map(lambda x: tf.cast(x, tf.float32))
429
+
430
+ early_stopping = EarlyStopping(patience=patience)
431
+ train_batches = data.take(int((1 - validation_split) * len(data)))
432
+ val_batches = data.skip(int((1 - validation_split) * len(data)))
433
+ for epoch in range(epochs):
434
+ for train_batch, val_batch in zip(train_batches, val_batches):
435
+ loss_train = train_step(
436
+ train_batch, self.encoder, self.decoder, optimizer, self.vae_mode
437
+ )
438
+ loss_val = cal_loss_step(
439
+ val_batch, self.encoder, self.decoder, self.vae_mode, False
440
+ )
441
+
442
+ early_stopping(loss_train)
443
+
444
+ if early_stopping.stop_training:
445
+ print(f"Early stopping triggered at epoch {epoch}.")
446
+ break
447
+
448
+ if epoch % 10 == 0 and verbose:
449
+ print(
450
+ f"Epoch {epoch}: Train Loss: {loss_train:.6f} Validation Loss: {loss_val:.6f}"
451
+ )
452
+ self.freeze_encoder_decoder()
453
+
165
454
  def call(self, x):
166
- encoded = self.encoder(x)
455
+ if self.vae_mode:
456
+ mean, log_var = self.encoder(x)
457
+ encoded = sampling(mean, log_var)
458
+ else:
459
+ encoded = self.encoder(x)
167
460
  decoded = self.decoder(encoded)
168
461
  combined = tf.concat([decoded, encoded], axis=1)
169
462
  classification = self.classifier(combined)
@@ -190,7 +483,7 @@ class AutoClassifier(tf.keras.Model):
190
483
  def set_encoder_decoder(self, source_model):
191
484
  """
192
485
  Sets the encoder and decoder layers from another AutoClassifier instance,
193
- ensuring compatibility in dimensions.
486
+ ensuring compatibility in dimensions. Only works if vae_mode is False.
194
487
 
195
488
  Parameters:
196
489
  -----------
@@ -257,6 +550,8 @@ class AutoClassifier(tf.keras.Model):
257
550
  "num_layers": self.num_layers,
258
551
  "dropout": self.dropout,
259
552
  "l2_reg": self.l2_reg,
553
+ "vae_mode": self.vae_mode,
554
+ "vae_units": self.vae_units,
260
555
  }
261
556
  base_config = super(AutoClassifier, self).get_config()
262
557
  return dict(list(base_config.items()) + list(config.items()))
@@ -272,6 +567,8 @@ class AutoClassifier(tf.keras.Model):
272
567
  num_layers=config["num_layers"],
273
568
  dropout=config["dropout"],
274
569
  l2_reg=config["l2_reg"],
570
+ vae_mode=config["vae_mode"],
571
+ vae_units=config["vae_units"],
275
572
  )
276
573
 
277
574
 
@@ -302,6 +599,8 @@ def call_existing_code(
302
599
  The shape of the input data.
303
600
  num_classes : `int`
304
601
  The number of classes in the dataset.
602
+ num_layers : `int`
603
+ The number of hidden layers in the classifier. Default is 1.
305
604
 
306
605
  Returns
307
606
  -------
@@ -578,7 +877,10 @@ class GetInsights:
578
877
  def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
579
878
  self.inputs = inputs
580
879
  self.model = model
581
- self.encoder_layer = self.model.encoder.layers[0]
880
+ if isinstance(self.model.encoder.layers[0], InputLayer):
881
+ self.encoder_layer = self.model.encoder.layers[1]
882
+ else:
883
+ self.encoder_layer = self.model.encoder.layers[0]
582
884
  self.decoder_layer = self.model.decoder.layers[0]
583
885
  self.encoder_weights = self.encoder_layer.get_weights()[0]
584
886
  self.decoder_weights = self.decoder_layer.get_weights()[0]
@@ -607,7 +909,12 @@ class GetInsights:
607
909
  indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
608
910
  inputs = inputs[indexes]
609
911
  inputs[np.isnan(inputs)] = 0.0
610
- encoded = self.model.encoder(inputs)
912
+ # check if self.model.encoder(inputs) has two outputs
913
+ try:
914
+ mean, log_var = self.model.encoder(inputs)
915
+ encoded = sampling(mean, log_var)
916
+ except:
917
+ encoded = self.model.encoder(inputs)
611
918
  reconstructed = self.model.decoder(encoded)
612
919
  combined = tf.concat([reconstructed, encoded], axis=1)
613
920
  self.classification = self.model.classifier(combined).numpy().argmax(axis=1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: likelihood
3
- Version: 1.3.1
3
+ Version: 1.4.0
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -9,12 +9,12 @@ likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0
9
9
  likelihood/models/simulation.py,sha256=LFyE_szo7sDukviMLeg_6RoyAaI7yMXUy8f4mDOrGoc,8460
10
10
  likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
11
11
  likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
12
- likelihood/models/deep/autoencoders.py,sha256=S11ARmoROTNFC4AZLuTcB-ymbm14NUH-a0Dg861fsYM,28203
12
+ likelihood/models/deep/autoencoders.py,sha256=OKS-Hudn4gxm7ttu8cjJ0PX7RGlmFpN2Xd-FEIvkagU,37866
13
13
  likelihood/tools/__init__.py,sha256=MCjsCWfBNKE2uMN0VizDN1uFzZ_md0X2WZeBdWhrCR8,50
14
14
  likelihood/tools/numeric_tools.py,sha256=FA44kbiAcxcquz1el_g3Pqsp5ii8XFkAIrsMs5bGkj0,11445
15
15
  likelihood/tools/tools.py,sha256=6JLZBHxc4f1lJfw4aBwdS2s16EpydFNqLZF73I7wddQ,44412
16
- likelihood-1.3.1.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
17
- likelihood-1.3.1.dist-info/METADATA,sha256=CuHvFiy8Pr1ToXw2oCvqoEsnImRtcifH2Mn7HQnFEkc,2822
18
- likelihood-1.3.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
- likelihood-1.3.1.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
20
- likelihood-1.3.1.dist-info/RECORD,,
16
+ likelihood-1.4.0.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
17
+ likelihood-1.4.0.dist-info/METADATA,sha256=5rLf_PhvtIGUNcqh6rp1YP7IW27UbxcRKdm87j9-1qU,2822
18
+ likelihood-1.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
+ likelihood-1.4.0.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
20
+ likelihood-1.4.0.dist-info/RECORD,,