likelihood 1.3.2__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
likelihood/graph/nn.py CHANGED
@@ -5,7 +5,7 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
5
5
  logging.getLogger("tensorflow").setLevel(logging.ERROR)
6
6
 
7
7
  import warnings
8
- from typing import List, Tuple
8
+ from typing import Any, List, Tuple
9
9
 
10
10
  import numpy as np
11
11
  import pandas as pd
@@ -15,48 +15,43 @@ from pandas.core.frame import DataFrame
15
15
  from sklearn.metrics import f1_score
16
16
  from sklearn.model_selection import train_test_split
17
17
 
18
- from likelihood.tools import generate_feature_yaml
19
-
20
18
  tf.get_logger().setLevel("ERROR")
21
19
 
20
+ from likelihood.tools import LoRALayer
22
21
 
23
- def compare_similarity(arr1: np.ndarray, arr2: np.ndarray) -> int:
24
- """Compares the similarity between two arrays of categories.
25
22
 
26
- Parameters
27
- ----------
28
- arr1 : `ndarray`
29
- The first array of categories.
30
- arr2 : `ndarray`
31
- The second array of categories.
23
+ def compare_similarity(arr1: List[Any], arr2: List[Any], threshold: float = 0.05) -> int:
24
+ """Calculate the similarity between two arrays considering numeric values near to 1 in ratio."""
32
25
 
33
- Returns
34
- -------
35
- count: `int`
36
- The number of categories that are the same in both arrays.
37
- """
26
+ def is_similar(a: Any, b: Any) -> bool:
27
+ if isinstance(a, (int, float)) and isinstance(b, (int, float)):
28
+ if a == 0 and b == 0:
29
+ return True
30
+ if a == 0 or b == 0:
31
+ return False
32
+ # For numeric values, check if their ratio is within the threshold range
33
+ ratio = max(a, b) / min(a, b)
34
+ return 1 - threshold <= ratio <= 1 + threshold
35
+ else:
36
+ return a == b
38
37
 
39
- count = 0
40
- for i in range(len(arr1)):
41
- if arr1[i] == arr2[i]:
42
- count += 1
43
- return count
38
+ return sum(is_similar(a, b) for a, b in zip(arr1, arr2))
44
39
 
45
40
 
46
41
  def cal_adjacency_matrix(
47
42
  df: DataFrame, exclude_subset: List[str] = [], sparse: bool = True, **kwargs
48
43
  ) -> Tuple[dict, np.ndarray]:
49
44
  """Calculates the adjacency matrix for a given DataFrame.
50
- The adjacency matrix is a matrix that represents the similarity between each pair of categories.
45
+ The adjacency matrix is a matrix that represents the similarity between each pair of features.
51
46
  The similarity is calculated using the `compare_similarity` function.
52
- The resulting matrix is a square matrix with the same number of rows and columns as the input DataFrame.
47
+ The resulting matrix is a square matrix with the same number of rows and columns as the rows of the input DataFrame.
53
48
 
54
49
  Parameters
55
50
  ----------
56
51
  df : `DataFrame`
57
- The input DataFrame containing the categories.
52
+ The input DataFrame containing the features.
58
53
  exclude_subset : `List[str]`, optional
59
- A list of categories to exclude from the calculation of the adjacency matrix.
54
+ A list of features to exclude from the calculation of the adjacency matrix.
60
55
  sparse : `bool`, optional
61
56
  Whether to return a sparse matrix or a dense matrix.
62
57
  **kwargs : `dict`
@@ -65,48 +60,33 @@ def cal_adjacency_matrix(
65
60
  Keyword Arguments:
66
61
  ----------
67
62
  similarity: `int`
68
- The minimum number of categories that must be the same in both arrays to be considered similar.
63
+ The minimum number of features that must be the same in both arrays to be considered similar.
69
64
 
70
65
  Returns
71
66
  -------
72
67
  adj_dict : `dict`
73
- A dictionary containing the categories.
68
+ A dictionary containing the features.
74
69
  adjacency_matrix : `ndarray`
75
70
  The adjacency matrix.
76
71
  """
77
72
 
78
- yaml_ = generate_feature_yaml(df)
79
- categorical_columns = yaml_["categorical_features"]
80
73
  if len(exclude_subset) > 0:
81
- categorical_columns = [col for col in categorical_columns if col not in exclude_subset]
82
-
83
- if len(categorical_columns) > 1:
84
- df_categorical = df[categorical_columns].copy()
74
+ columns = [col for col in df.columns if col not in exclude_subset]
75
+ df_ = df[columns].copy()
85
76
  else:
86
- categorical_columns = [
87
- col
88
- for col in df.columns
89
- if (
90
- col not in exclude_subset
91
- and pd.api.types.is_integer_dtype(df[col])
92
- and len(df[col].unique()) > 2
93
- )
94
- ]
95
- df_categorical = df[categorical_columns].copy()
77
+ df_ = df.copy()
96
78
 
97
- assert len(df_categorical) > 0
79
+ assert len(df_) > 0
98
80
 
99
- similarity = kwargs.get("similarity", len(df_categorical.columns) - 1)
100
- assert similarity <= df_categorical.shape[1]
81
+ similarity = kwargs.get("similarity", len(df_.columns) - 1)
82
+ assert similarity <= df_.shape[1]
101
83
 
102
- adj_dict = {}
103
- for index, row in df_categorical.iterrows():
104
- adj_dict[index] = row.to_list()
84
+ adj_dict = {index: row.tolist() for index, row in df_.iterrows()}
105
85
 
106
- adjacency_matrix = np.zeros((len(df_categorical), len(df_categorical)))
86
+ adjacency_matrix = np.zeros((len(df_), len(df_)))
107
87
 
108
- for i in range(len(df_categorical)):
109
- for j in range(len(df_categorical)):
88
+ for i in range(len(df_)):
89
+ for j in range(len(df_)):
110
90
  if compare_similarity(adj_dict[i], adj_dict[j]) >= similarity:
111
91
  adjacency_matrix[i][j] = 1
112
92
 
@@ -131,8 +111,10 @@ class Data:
131
111
  df: DataFrame,
132
112
  target: str | None = None,
133
113
  exclude_subset: List[str] = [],
114
+ **kwargs,
134
115
  ):
135
- _, adjacency = cal_adjacency_matrix(df, exclude_subset=exclude_subset, sparse=True)
116
+ sparse = kwargs.get("sparse", True)
117
+ _, adjacency = cal_adjacency_matrix(df, exclude_subset=exclude_subset, sparse=sparse)
136
118
  if target is not None:
137
119
  X = df.drop(columns=[target] + exclude_subset)
138
120
  else:
@@ -147,16 +129,20 @@ class Data:
147
129
 
148
130
  @tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNNLayer")
149
131
  class VanillaGNNLayer(tf.keras.layers.Layer):
150
- def __init__(self, dim_in, dim_out, kernel_initializer="glorot_uniform", **kwargs):
132
+ def __init__(self, dim_in, dim_out, rank=None, kernel_initializer="glorot_uniform", **kwargs):
151
133
  super(VanillaGNNLayer, self).__init__(**kwargs)
152
134
  self.dim_out = dim_out
135
+ self.rank = rank
153
136
  self.kernel_initializer = kernel_initializer
154
137
  self.linear = None
155
138
 
156
139
  def build(self, input_shape):
157
- self.linear = tf.keras.layers.Dense(
158
- self.dim_out, use_bias=False, kernel_initializer=self.kernel_initializer
159
- )
140
+ if self.rank:
141
+ self.linear = LoRALayer(self.dim_out, rank=self.rank)
142
+ else:
143
+ self.linear = tf.keras.layers.Dense(
144
+ self.dim_out, use_bias=False, kernel_initializer=self.kernel_initializer
145
+ )
160
146
  super(VanillaGNNLayer, self).build(input_shape)
161
147
 
162
148
  def call(self, x, adjacency):
@@ -169,8 +155,11 @@ class VanillaGNNLayer(tf.keras.layers.Layer):
169
155
  config.update(
170
156
  {
171
157
  "dim_out": self.dim_out,
172
- "kernel_initializer": tf.keras.initializers.serialize(
173
- self.linear.kernel_initializer
158
+ "rank": self.rank,
159
+ "kernel_initializer": (
160
+ None
161
+ if self.rank
162
+ else tf.keras.initializers.serialize(self.linear.kernel_initializer)
174
163
  ),
175
164
  }
176
165
  )
@@ -179,14 +168,16 @@ class VanillaGNNLayer(tf.keras.layers.Layer):
179
168
 
180
169
  @tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNN")
181
170
  class VanillaGNN(tf.keras.Model):
182
- def __init__(self, dim_in, dim_h, dim_out, **kwargs):
171
+ def __init__(self, dim_in, dim_h, dim_out, rank=2, **kwargs):
183
172
  super(VanillaGNN, self).__init__(**kwargs)
184
173
  self.dim_in = dim_in
185
174
  self.dim_h = dim_h
186
175
  self.dim_out = dim_out
187
- self.gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h)
188
- self.gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h)
189
- self.gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out)
176
+ self.rank = rank
177
+
178
+ self.gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h, self.rank)
179
+ self.gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h, self.rank)
180
+ self.gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out, None)
190
181
 
191
182
  def call(self, x, adjacency):
192
183
  h = self.gnn1(x, adjacency)
@@ -208,13 +199,13 @@ class VanillaGNN(tf.keras.Model):
208
199
  out = self(x, adjacency)
209
200
  loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=out)
210
201
  loss = tf.reduce_mean(loss)
211
- f1 = self.compute_f1_score(out, y)
202
+ f1 = round(self.compute_f1_score(out, y), 4)
212
203
  return loss.numpy(), f1
213
204
 
214
205
  def test(self, data):
215
206
  out = self(data.x, data.adjacency)
216
207
  test_f1 = self.compute_f1_score(out, data.y)
217
- return test_f1
208
+ return round(test_f1, 4)
218
209
 
219
210
  def predict(self, data):
220
211
  out = self(data.x, data.adjacency)
@@ -225,6 +216,7 @@ class VanillaGNN(tf.keras.Model):
225
216
  "dim_in": self.dim_in,
226
217
  "dim_h": self.dim_h,
227
218
  "dim_out": self.dim_out,
219
+ "rank": self.rank,
228
220
  }
229
221
  base_config = super(VanillaGNN, self).get_config()
230
222
  return dict(list(base_config.items()) + list(config.items()))
@@ -235,6 +227,7 @@ class VanillaGNN(tf.keras.Model):
235
227
  dim_in=config["dim_in"],
236
228
  dim_h=config["dim_h"],
237
229
  dim_out=config["dim_out"],
230
+ rank=config["rank"],
238
231
  )
239
232
 
240
233
  @tf.function
@@ -248,10 +241,6 @@ class VanillaGNN(tf.keras.Model):
248
241
  return loss
249
242
 
250
243
  def fit(self, data, epochs, batch_size, test_size=0.2, optimizer="adam"):
251
- warnings.warn(
252
- "It is normal for validation metrics to underperform. Use the test method to validate after training.",
253
- UserWarning,
254
- )
255
244
  optimizers = {
256
245
  "sgd": tf.keras.optimizers.SGD(),
257
246
  "adam": tf.keras.optimizers.Adam(),
@@ -290,56 +279,20 @@ class VanillaGNN(tf.keras.Model):
290
279
  train_f1_scores.append(train_f1)
291
280
 
292
281
  if epoch % 5 == 0:
282
+ clear_output(wait=True)
283
+ warnings.warn(
284
+ "It is normal for validation metrics to underperform during training. Use the test method to validate after training.",
285
+ UserWarning,
286
+ )
293
287
  val_loss, val_f1 = self.evaluate(X_test, adjacency_test, y_test)
294
288
  val_losses.append(val_loss)
295
289
  val_f1_scores.append(val_f1)
296
- clear_output(wait=True)
297
290
  print(
298
- f"Epoch {epoch:>3} | Train Loss: {train_loss:.3f} | Train F1: {train_f1:.3f} | Val Loss: {val_loss:.3f} | Val F1: {val_f1:.3f}"
291
+ f"Epoch {epoch:>3} | Train Loss: {train_loss:.4f} | Train F1: {train_f1:.4f} | Val Loss: {val_loss:.4f} | Val F1: {val_f1:.4f}"
299
292
  )
300
293
 
301
294
  return train_losses, train_f1_scores, val_losses, val_f1_scores
302
295
 
303
296
 
304
297
  if __name__ == "__main__":
305
- # Example usage
306
- import pandas as pd
307
- from sklearn.datasets import load_iris
308
-
309
- # Load the dataset
310
- iris = load_iris()
311
-
312
- # Convert to a DataFrame for easy exploration
313
- iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
314
- iris_df["species"] = iris.target
315
-
316
- iris_df["sepal length (cm)"] = iris_df["sepal length (cm)"].astype("category")
317
- iris_df["sepal width (cm)"] = iris_df["sepal width (cm)"].astype("category")
318
- iris_df["petal length (cm)"] = iris_df["petal length (cm)"].astype("category")
319
- iris_df["petal width (cm)"] = iris_df["petal width (cm)"].astype("category")
320
-
321
- # Display the first few rows of the dataset
322
- print(iris_df.head())
323
-
324
- iris_df = iris_df.sample(frac=1, replace=False).reset_index(drop=True)
325
-
326
- data = Data(iris_df, "species")
327
-
328
- model = VanillaGNN(dim_in=data.x.shape[1], dim_h=8, dim_out=len(iris_df["species"].unique()))
329
- print("Before training F1:", model.test(data))
330
- model.fit(data, epochs=200, batch_size=32, test_size=0.5)
331
- model.save("./best_model", save_format="tf")
332
- print("After training F1:", model.test(data))
333
- best_model = tf.keras.models.load_model("./best_model")
334
-
335
- print("After loading F1:", best_model.test(data))
336
- df_results = pd.DataFrame()
337
-
338
- # Suppose we have a new dataset without the target variable
339
- iris_df = iris_df.drop(columns=["species"])
340
- data_new = Data(iris_df)
341
- print("Predictions:", best_model.predict(data_new))
342
- df_results["predicted"] = list(model.predict(data))
343
- df_results["actual"] = list(data.y)
344
- # df_results.to_csv("results.csv", index=False)
345
- breakpoint()
298
+ print("Examples will be running below")
@@ -24,7 +24,7 @@ from sklearn.manifold import TSNE
24
24
  from tensorflow.keras.layers import InputLayer
25
25
  from tensorflow.keras.regularizers import l2
26
26
 
27
- from likelihood.tools import OneHotEncoder
27
+ from likelihood.tools import LoRALayer, OneHotEncoder
28
28
 
29
29
  tf.get_logger().setLevel("ERROR")
30
30
 
@@ -39,53 +39,231 @@ def suppress_warnings(func):
39
39
  return wrapper
40
40
 
41
41
 
42
+ class EarlyStopping:
43
+ def __init__(self, patience=10, min_delta=0.001):
44
+ self.patience = patience
45
+ self.min_delta = min_delta
46
+ self.best_loss = np.inf
47
+ self.counter = 0
48
+ self.stop_training = False
49
+
50
+ def __call__(self, current_loss):
51
+ if self.best_loss - current_loss > self.min_delta:
52
+ self.best_loss = current_loss
53
+ self.counter = 0
54
+ else:
55
+ self.counter += 1
56
+
57
+ if self.counter >= self.patience:
58
+ self.stop_training = True
59
+
60
+
61
+ def mse_loss(y_true, y_pred):
62
+ """
63
+ Mean squared error loss function.
64
+
65
+ Parameters
66
+ ----------
67
+ y_true : `tf.Tensor`
68
+ The true values.
69
+ y_pred : `tf.Tensor`
70
+ The predicted values.
71
+
72
+ Returns
73
+ -------
74
+ `tf.Tensor`
75
+ """
76
+ return tf.reduce_mean(tf.square(y_true - y_pred))
77
+
78
+
79
+ def kl_loss(mean, log_var):
80
+ """
81
+ Kullback-Leibler divergence loss function.
82
+
83
+ Parameters
84
+ ----------
85
+ mean : `tf.Tensor`
86
+ The mean of the distribution.
87
+ log_var : `tf.Tensor`
88
+ The log variance of the distribution.
89
+
90
+ Returns
91
+ -------
92
+ `tf.Tensor`
93
+ """
94
+ return -0.5 * tf.reduce_mean(1 + log_var - tf.square(mean) - tf.exp(log_var))
95
+
96
+
97
+ def vae_loss(y_true, y_pred, mean, log_var):
98
+ """
99
+ Variational autoencoder loss function.
100
+
101
+ Parameters
102
+ ----------
103
+ y_true : `tf.Tensor`
104
+ The true values.
105
+ y_pred : `tf.Tensor`
106
+ The predicted values.
107
+ mean : `tf.Tensor`
108
+ The mean of the distribution.
109
+ log_var : `tf.Tensor`
110
+ The log variance of the distribution.
111
+
112
+ Returns
113
+ -------
114
+ `tf.Tensor`
115
+ """
116
+ return mse_loss(y_true, y_pred) + kl_loss(mean, log_var)
117
+
118
+
119
+ def sampling(mean, log_var, epsilon_value=1e-8):
120
+ """
121
+ Samples from the distribution.
122
+
123
+ Parameters
124
+ ----------
125
+ mean : `tf.Tensor`
126
+ The mean of the distribution.
127
+ log_var : `tf.Tensor`
128
+ The log variance of the distribution.
129
+ epsilon_value : float
130
+ A small value to avoid numerical instability.
131
+
132
+ Returns
133
+ -------
134
+ `tf.Tensor`
135
+ """
136
+ epsilon = tf.random.normal(shape=tf.shape(mean), mean=0.0, stddev=1.0)
137
+ stddev = tf.exp(0.5 * log_var) + epsilon_value
138
+ epsilon = tf.random.normal(shape=tf.shape(mean), mean=0.0, stddev=1.0)
139
+ return mean + stddev * epsilon
140
+
141
+
142
+ def check_for_nans(tensors, name="Tensor"):
143
+ for t in tensors:
144
+ if tf.reduce_any(tf.math.is_nan(t)) or tf.reduce_any(tf.math.is_inf(t)):
145
+ print(f"Warning: {name} contains NaNs or Infs")
146
+ return True
147
+ return False
148
+
149
+
150
+ def cal_loss_step(batch, encoder, decoder, vae_mode=False, training=True):
151
+ """
152
+ Calculates the loss value on a batch of data.
153
+
154
+ Parameters
155
+ ----------
156
+ batch : `tf.Tensor`
157
+ The batch of data.
158
+ encoder : `tf.keras.Model`
159
+ The encoder model.
160
+ decoder : `tf.keras.Model`
161
+ The decoder model.
162
+ optimizer : `tf.keras.optimizers.Optimizer`
163
+ The optimizer to use.
164
+ vae_mode : `bool`
165
+ Whether to use variational autoencoder mode. Default is False.
166
+ training : `bool`
167
+ Whether the model is in training mode. Default is True.
168
+
169
+ Returns
170
+ -------
171
+ `tf.Tensor`
172
+ The loss value.
173
+ """
174
+ if vae_mode:
175
+ mean, log_var = encoder(batch, training=training)
176
+ log_var = tf.clip_by_value(log_var, clip_value_min=1e-8, clip_value_max=tf.float32.max)
177
+ decoded = decoder(sampling(mean, log_var), training=training)
178
+ loss = vae_loss(batch, decoded, mean, log_var)
179
+ else:
180
+ encoded = encoder(batch, training=training)
181
+ decoded = decoder(encoded, training=training)
182
+ loss = mse_loss(batch, decoded)
183
+
184
+ return loss
185
+
186
+
187
+ @tf.function
188
+ def train_step(batch, encoder, decoder, optimizer, vae_mode=False):
189
+ """
190
+ Trains the model on a batch of data.
191
+
192
+ Parameters
193
+ ----------
194
+ mean : `tf.Tensor`
195
+ The mean of the distribution.
196
+ log_var : `tf.Tensor`
197
+ The log variance of the distribution.
198
+ batch : `tf.Tensor`
199
+ The batch of data.
200
+ encoder : `tf.keras.Model`
201
+ The encoder model.
202
+ decoder : `tf.keras.Model`
203
+ The decoder model.
204
+ optimizer : `tf.keras.optimizers.Optimizer`
205
+ The optimizer to use.
206
+ vae_mode : `bool`
207
+ Whether to use variational autoencoder mode. Default is False.
208
+
209
+ Returns
210
+ -------
211
+ `tf.Tensor`
212
+ The loss value.
213
+ """
214
+ optimizer.build(encoder.trainable_variables + decoder.trainable_variables)
215
+
216
+ with tf.GradientTape() as encoder_tape, tf.GradientTape() as decoder_tape:
217
+ loss = cal_loss_step(batch, encoder, decoder, vae_mode=vae_mode)
218
+
219
+ gradients_of_encoder = encoder_tape.gradient(loss, encoder.trainable_variables)
220
+ gradients_of_decoder = decoder_tape.gradient(loss, decoder.trainable_variables)
221
+
222
+ optimizer.apply_gradients(zip(gradients_of_encoder, encoder.trainable_variables))
223
+ optimizer.apply_gradients(zip(gradients_of_decoder, decoder.trainable_variables))
224
+
225
+ return loss
226
+
227
+
42
228
  @tf.keras.utils.register_keras_serializable(package="Custom", name="AutoClassifier")
43
229
  class AutoClassifier(tf.keras.Model):
44
230
  """
45
231
  An auto-classifier model that automatically determines the best classification strategy based on the input data.
46
232
 
47
- Attributes:
48
- - input_shape_parm: The shape of the input data.
49
- - num_classes: The number of classes in the dataset.
50
- - units: The number of neurons in each hidden layer.
51
- - activation: The type of activation function to use for the neural network layers.
52
-
53
- Methods:
54
- __init__(self, input_shape_parm, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
55
- build(self, input_shape_parm): Builds the model architecture based on input_shape_parm.
56
- call(self, x): Defines the forward pass of the model.
57
- get_config(self): Returns the configuration of the model.
58
- from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
59
- """
60
-
61
- def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
62
- """
63
- Initializes an AutoClassifier instance with the given parameters.
233
+ Parameters
234
+ ----------
235
+ input_shape_parm : `int`
236
+ The shape of the input data.
237
+ num_classes : `int`
238
+ The number of classes in the dataset.
239
+ units : `int`
240
+ The number of neurons in each hidden layer.
241
+ activation : `str`
242
+ The type of activation function to use for the neural network layers.
64
243
 
65
- Parameters
66
- ----------
67
- input_shape_parm : `int`
68
- The shape of the input data.
69
- num_classes : `int`
70
- The number of classes in the dataset.
71
- units : `int`
72
- The number of neurons in each hidden layer.
73
- activation : `str`
74
- The type of activation function to use for the neural network layers.
244
+ Keyword Arguments:
245
+ ----------
246
+ Additional keyword arguments to pass to the model.
75
247
 
76
- Keyword Arguments:
77
- ----------
78
- Additional keyword arguments to pass to the model.
248
+ classifier_activation : `str`
249
+ The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
250
+ num_layers : `int`
251
+ The number of hidden layers in the classifier. Default is 1.
252
+ dropout : `float`
253
+ The dropout rate to use in the classifier. Default is None.
254
+ l2_reg : `float`
255
+ The L2 regularization parameter. Default is 0.0.
256
+ vae_mode : `bool`
257
+ Whether to use variational autoencoder mode. Default is False.
258
+ vae_units : `int`
259
+ The number of units in the variational autoencoder. Default is 2.
260
+ lora_mode : `bool`
261
+ Whether to use LoRA layers. Default is False.
262
+ lora_rank : `int`
263
+ The rank of the LoRA layer. Default is 4.
264
+ """
79
265
 
80
- classifier_activation : `str`
81
- The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
82
- num_layers : `int`
83
- The number of hidden layers in the classifier. Default is 1.
84
- dropout : `float`
85
- The dropout rate to use in the classifier. Default is None.
86
- l2_reg : `float`
87
- The L2 regularization parameter. Default is 0.0.
88
- """
266
+ def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
89
267
  super(AutoClassifier, self).__init__()
90
268
  self.input_shape_parm = input_shape_parm
91
269
  self.num_classes = num_classes
@@ -99,9 +277,12 @@ class AutoClassifier(tf.keras.Model):
99
277
  self.num_layers = kwargs.get("num_layers", 1)
100
278
  self.dropout = kwargs.get("dropout", None)
101
279
  self.l2_reg = kwargs.get("l2_reg", 0.0)
280
+ self.vae_mode = kwargs.get("vae_mode", False)
281
+ self.vae_units = kwargs.get("vae_units", 2)
282
+ self.lora_mode = kwargs.get("lora_mode", False)
283
+ self.lora_rank = kwargs.get("lora_rank", 4)
102
284
 
103
- def build(self, input_shape):
104
- # Encoder with L2 regularization
285
+ def build_encoder_decoder(self, input_shape):
105
286
  self.encoder = (
106
287
  tf.keras.Sequential(
107
288
  [
@@ -121,7 +302,6 @@ class AutoClassifier(tf.keras.Model):
121
302
  else self.encoder
122
303
  )
123
304
 
124
- # Decoder with L2 regularization
125
305
  self.decoder = (
126
306
  tf.keras.Sequential(
127
307
  [
@@ -141,9 +321,61 @@ class AutoClassifier(tf.keras.Model):
141
321
  else self.decoder
142
322
  )
143
323
 
324
+ def build(self, input_shape):
325
+ if self.vae_mode:
326
+ inputs = tf.keras.Input(shape=self.input_shape_parm, name="encoder_input")
327
+ x = tf.keras.layers.Dense(
328
+ units=self.units,
329
+ kernel_regularizer=l2(self.l2_reg),
330
+ kernel_initializer="he_normal",
331
+ )(inputs)
332
+ x = tf.keras.layers.BatchNormalization()(x)
333
+ x = tf.keras.layers.Activation(self.activation)(x)
334
+ x = tf.keras.layers.Dense(
335
+ units=int(self.units / 2),
336
+ kernel_regularizer=l2(self.l2_reg),
337
+ kernel_initializer="he_normal",
338
+ name="encoder_hidden",
339
+ )(x)
340
+ x = tf.keras.layers.BatchNormalization()(x)
341
+ x = tf.keras.layers.Activation(self.activation)(x)
342
+
343
+ mean = tf.keras.layers.Dense(2, name="mean")(x)
344
+ log_var = tf.keras.layers.Dense(2, name="log_var")(x)
345
+ log_var = tf.keras.layers.Lambda(lambda x: x + 1e-7)(log_var)
346
+
347
+ self.encoder = (
348
+ tf.keras.Model(inputs, [mean, log_var], name="encoder")
349
+ if not self.encoder
350
+ else self.encoder
351
+ )
352
+ self.decoder = (
353
+ tf.keras.Sequential(
354
+ [
355
+ tf.keras.layers.Dense(
356
+ units=self.units,
357
+ kernel_regularizer=l2(self.l2_reg),
358
+ ),
359
+ tf.keras.layers.BatchNormalization(),
360
+ tf.keras.layers.Activation(self.activation),
361
+ tf.keras.layers.Dense(
362
+ units=self.input_shape_parm,
363
+ kernel_regularizer=l2(self.l2_reg),
364
+ ),
365
+ tf.keras.layers.BatchNormalization(),
366
+ tf.keras.layers.Activation(self.activation),
367
+ ]
368
+ )
369
+ if not self.decoder
370
+ else self.decoder
371
+ )
372
+
373
+ else:
374
+ self.build_encoder_decoder(input_shape)
375
+
144
376
  # Classifier with L2 regularization
145
377
  self.classifier = tf.keras.Sequential()
146
- if self.num_layers > 1:
378
+ if self.num_layers > 1 and not self.lora_mode:
147
379
  for _ in range(self.num_layers - 1):
148
380
  self.classifier.add(
149
381
  tf.keras.layers.Dense(
@@ -154,16 +386,106 @@ class AutoClassifier(tf.keras.Model):
154
386
  )
155
387
  if self.dropout:
156
388
  self.classifier.add(tf.keras.layers.Dropout(self.dropout))
157
- self.classifier.add(
158
- tf.keras.layers.Dense(
159
- units=self.num_classes,
160
- activation=self.classifier_activation,
161
- kernel_regularizer=l2(self.l2_reg),
389
+ self.classifier.add(
390
+ tf.keras.layers.Dense(
391
+ units=self.num_classes,
392
+ activation=self.classifier_activation,
393
+ kernel_regularizer=l2(self.l2_reg),
394
+ )
162
395
  )
163
- )
396
+ elif self.lora_mode:
397
+ for _ in range(self.num_layers - 1):
398
+ self.classifier.add(
399
+ LoRALayer(units=self.units, rank=self.lora_rank, name=f"LoRA_{_}")
400
+ )
401
+ self.classifier.add(tf.keras.layers.Activation(self.activation))
402
+ if self.dropout:
403
+ self.classifier.add(tf.keras.layers.Dropout(self.dropout))
404
+ self.classifier.add(
405
+ tf.keras.layers.Dense(
406
+ units=self.num_classes,
407
+ activation=self.classifier_activation,
408
+ kernel_regularizer=l2(self.l2_reg),
409
+ )
410
+ )
411
+ else:
412
+ self.classifier.add(
413
+ tf.keras.layers.Dense(
414
+ units=self.num_classes,
415
+ activation=self.classifier_activation,
416
+ kernel_regularizer=l2(self.l2_reg),
417
+ )
418
+ )
419
+
420
+ def train_encoder_decoder(
421
+ self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
422
+ ):
423
+ """
424
+ Trains the encoder and decoder on the input data.
425
+
426
+ Parameters
427
+ ----------
428
+ data : `tf.data.Dataset`, `np.ndarray`
429
+ The input data.
430
+ epochs : `int`
431
+ The number of epochs to train for.
432
+ batch_size : `int`
433
+ The batch size to use.
434
+ validation_split : `float`
435
+ The proportion of the dataset to use for validation. Default is 0.2.
436
+ patience : `int`
437
+ The number of epochs to wait before early stopping. Default is 10.
438
+
439
+ Keyword Arguments:
440
+ ----------
441
+ Additional keyword arguments to pass to the model.
442
+ """
443
+ verbose = kwargs.get("verbose", True)
444
+ optimizer = kwargs.get("optimizer", tf.keras.optimizers.Adam())
445
+ dummy_input = tf.convert_to_tensor(tf.random.normal([1, self.input_shape_parm]))
446
+ self.build(dummy_input.shape)
447
+ if not self.vae_mode:
448
+ dummy_output = self.encoder(dummy_input)
449
+ self.decoder(dummy_output)
450
+ else:
451
+ mean, log_var = self.encoder(dummy_input)
452
+ dummy_output = sampling(mean, log_var)
453
+ self.decoder(dummy_output)
454
+
455
+ if isinstance(data, np.ndarray):
456
+ data = tf.data.Dataset.from_tensor_slices(data).batch(batch_size)
457
+ data = data.map(lambda x: tf.cast(x, tf.float32))
458
+
459
+ early_stopping = EarlyStopping(patience=patience)
460
+ train_batches = data.take(int((1 - validation_split) * len(data)))
461
+ val_batches = data.skip(int((1 - validation_split) * len(data)))
462
+ for epoch in range(epochs):
463
+ for train_batch, val_batch in zip(train_batches, val_batches):
464
+ loss_train = train_step(
465
+ train_batch, self.encoder, self.decoder, optimizer, self.vae_mode
466
+ )
467
+ loss_val = cal_loss_step(
468
+ val_batch, self.encoder, self.decoder, self.vae_mode, False
469
+ )
470
+
471
+ early_stopping(loss_train)
472
+
473
+ if early_stopping.stop_training:
474
+ print(f"Early stopping triggered at epoch {epoch}.")
475
+ break
476
+
477
+ if epoch % 10 == 0 and verbose:
478
+ print(
479
+ f"Epoch {epoch}: Train Loss: {loss_train:.6f} Validation Loss: {loss_val:.6f}"
480
+ )
481
+ self.freeze_encoder_decoder()
164
482
 
165
483
  def call(self, x):
166
- encoded = self.encoder(x)
484
+ if self.vae_mode:
485
+ mean, log_var = self.encoder(x)
486
+ encoded = sampling(mean, log_var)
487
+ else:
488
+ encoded = self.encoder(x)
167
489
  decoded = self.decoder(encoded)
168
490
  combined = tf.concat([decoded, encoded], axis=1)
169
491
  classification = self.classifier(combined)
@@ -190,7 +512,7 @@ class AutoClassifier(tf.keras.Model):
190
512
  def set_encoder_decoder(self, source_model):
191
513
  """
192
514
  Sets the encoder and decoder layers from another AutoClassifier instance,
193
- ensuring compatibility in dimensions.
515
+ ensuring compatibility in dimensions. Only works if vae_mode is False.
194
516
 
195
517
  Parameters:
196
518
  -----------
@@ -257,6 +579,10 @@ class AutoClassifier(tf.keras.Model):
257
579
  "num_layers": self.num_layers,
258
580
  "dropout": self.dropout,
259
581
  "l2_reg": self.l2_reg,
582
+ "vae_mode": self.vae_mode,
583
+ "vae_units": self.vae_units,
584
+ "lora_mode": self.lora_mode,
585
+ "lora_rank": self.lora_rank,
260
586
  }
261
587
  base_config = super(AutoClassifier, self).get_config()
262
588
  return dict(list(base_config.items()) + list(config.items()))
@@ -272,6 +598,10 @@ class AutoClassifier(tf.keras.Model):
272
598
  num_layers=config["num_layers"],
273
599
  dropout=config["dropout"],
274
600
  l2_reg=config["l2_reg"],
601
+ vae_mode=config["vae_mode"],
602
+ vae_units=config["vae_units"],
603
+ lora_mode=config["lora_mode"],
604
+ lora_rank=config["lora_rank"],
275
605
  )
276
606
 
277
607
 
@@ -302,6 +632,8 @@ def call_existing_code(
302
632
  The shape of the input data.
303
633
  num_classes : `int`
304
634
  The number of classes in the dataset.
635
+ num_layers : `int`
636
+ The number of hidden layers in the classifier. Default is 1.
305
637
 
306
638
  Returns
307
639
  -------
@@ -578,7 +910,10 @@ class GetInsights:
578
910
  def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
579
911
  self.inputs = inputs
580
912
  self.model = model
581
- self.encoder_layer = self.model.encoder.layers[0]
913
+ if isinstance(self.model.encoder.layers[0], InputLayer):
914
+ self.encoder_layer = self.model.encoder.layers[1]
915
+ else:
916
+ self.encoder_layer = self.model.encoder.layers[0]
582
917
  self.decoder_layer = self.model.decoder.layers[0]
583
918
  self.encoder_weights = self.encoder_layer.get_weights()[0]
584
919
  self.decoder_weights = self.decoder_layer.get_weights()[0]
@@ -607,7 +942,12 @@ class GetInsights:
607
942
  indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
608
943
  inputs = inputs[indexes]
609
944
  inputs[np.isnan(inputs)] = 0.0
610
- encoded = self.model.encoder(inputs)
945
+ # check if self.model.encoder(inputs) has two outputs
946
+ try:
947
+ mean, log_var = self.model.encoder(inputs)
948
+ encoded = sampling(mean, log_var)
949
+ except:
950
+ encoded = self.model.encoder(inputs)
611
951
  reconstructed = self.model.decoder(encoded)
612
952
  combined = tf.concat([reconstructed, encoded], axis=1)
613
953
  self.classification = self.model.classifier(combined).numpy().argmax(axis=1)
@@ -1,2 +1,3 @@
1
+ from .models_tools import *
1
2
  from .numeric_tools import *
2
3
  from .tools import *
@@ -0,0 +1,101 @@
1
+ import logging
2
+ import os
3
+
4
+ import networkx as nx
5
+ import pandas as pd
6
+
7
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
8
+ logging.getLogger("tensorflow").setLevel(logging.ERROR)
9
+
10
+ import tensorflow as tf
11
+
12
+
13
+ @tf.keras.utils.register_keras_serializable(package="Custom", name="LoRALayer")
14
+ class LoRALayer(tf.keras.layers.Layer):
15
+ def __init__(self, units, rank=4, **kwargs):
16
+ super(LoRALayer, self).__init__(**kwargs)
17
+ self.units = units
18
+ self.rank = rank
19
+
20
+ def build(self, input_shape):
21
+ input_dim = input_shape[-1]
22
+ print(f"Input shape: {input_shape}")
23
+
24
+ if self.rank > input_dim:
25
+ raise ValueError(
26
+ f"Rank ({self.rank}) cannot be greater than input dimension ({input_dim})."
27
+ )
28
+ if self.rank > self.units:
29
+ raise ValueError(
30
+ f"Rank ({self.rank}) cannot be greater than number of units ({self.units})."
31
+ )
32
+
33
+ self.A = self.add_weight(
34
+ shape=(input_dim, self.rank), initializer="random_normal", trainable=True, name="A"
35
+ )
36
+ self.B = self.add_weight(
37
+ shape=(self.rank, self.units), initializer="random_normal", trainable=True, name="B"
38
+ )
39
+ print(f"Dense weights shape: {input_dim}x{self.units}")
40
+ print(f"LoRA weights shape: A{self.A.shape}, B{self.B.shape}")
41
+
42
+ def call(self, inputs):
43
+ lora_output = tf.matmul(tf.matmul(inputs, self.A), self.B)
44
+ return lora_output
45
+
46
+
47
+ def apply_lora(model, rank=4):
48
+ inputs = tf.keras.Input(shape=model.input_shape[1:])
49
+ x = inputs
50
+
51
+ for layer in model.layers:
52
+ if isinstance(layer, tf.keras.layers.Dense):
53
+ print(f"Applying LoRA to layer {layer.name}")
54
+ x = LoRALayer(units=layer.units, rank=rank)(x)
55
+ else:
56
+ x = layer(x)
57
+ new_model = tf.keras.Model(inputs=inputs, outputs=x)
58
+ return new_model
59
+
60
+
61
+ def graph_metrics(adj_matrix, eigenvector_threshold=1e-6):
62
+ """
63
+ This function calculates the following graph metrics using the adjacency matrix:
64
+ 1. Degree Centrality
65
+ 2. Clustering Coefficient
66
+ 3. Eigenvector Centrality
67
+ 4. Degree
68
+ 5. Betweenness Centrality
69
+ 6. Closeness Centrality
70
+ 7. Assortativity
71
+ """
72
+ adj_matrix = adj_matrix.astype(int)
73
+ G = nx.from_numpy_array(adj_matrix)
74
+ degree_centrality = nx.degree_centrality(G)
75
+ clustering_coeff = nx.clustering(G)
76
+ try:
77
+ eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=500)
78
+ except nx.PowerIterationFailedConvergence:
79
+ print("Power iteration failed to converge. Returning NaN for eigenvector centrality.")
80
+ eigenvector_centrality = {node: float("nan") for node in G.nodes()}
81
+
82
+ for node, centrality in eigenvector_centrality.items():
83
+ if centrality < eigenvector_threshold:
84
+ eigenvector_centrality[node] = 0.0
85
+ degree = dict(G.degree())
86
+ betweenness_centrality = nx.betweenness_centrality(G)
87
+ closeness_centrality = nx.closeness_centrality(G)
88
+ assortativity = nx.degree_assortativity_coefficient(G)
89
+ metrics_df = pd.DataFrame(
90
+ {
91
+ "Degree": degree,
92
+ "Degree Centrality": degree_centrality,
93
+ "Clustering Coefficient": clustering_coeff,
94
+ "Eigenvector Centrality": eigenvector_centrality,
95
+ "Betweenness Centrality": betweenness_centrality,
96
+ "Closeness Centrality": closeness_centrality,
97
+ }
98
+ )
99
+ metrics_df["Assortativity"] = assortativity
100
+
101
+ return metrics_df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: likelihood
3
- Version: 1.3.2
3
+ Version: 1.4.1
4
4
  Summary: A package that performs the maximum likelihood algorithm.
5
5
  Home-page: https://github.com/jzsmoreno/likelihood/
6
6
  Author: J. A. Moreno-Guerra
@@ -2,19 +2,20 @@ likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
2
2
  likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
3
3
  likelihood/graph/__init__.py,sha256=6TuFDfmXTwpLyHl7_KqBfdzW6zqHjGzIFvymjFPlvjI,21
4
4
  likelihood/graph/graph.py,sha256=bLrNMvIh7GOTdPTwnNss8oPZ7cbSHQScAsH_ttmVUK0,3294
5
- likelihood/graph/nn.py,sha256=-OvHAeB3l2nd0ZeAk03cVDGBgaTn-WyGIsj5Rq7XeCY,12237
5
+ likelihood/graph/nn.py,sha256=MD2M-KgQnrlHg3iS42vrdOnD51-GRk3CJ5CCMQ0DNWI,10763
6
6
  likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
7
7
  likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
8
8
  likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
9
9
  likelihood/models/simulation.py,sha256=LFyE_szo7sDukviMLeg_6RoyAaI7yMXUy8f4mDOrGoc,8460
10
10
  likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
11
11
  likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
12
- likelihood/models/deep/autoencoders.py,sha256=BSAnopJYJ_lYRcRYT5ZoUVjfrAPlsjdAOjNb6mUD6Ds,28198
13
- likelihood/tools/__init__.py,sha256=MCjsCWfBNKE2uMN0VizDN1uFzZ_md0X2WZeBdWhrCR8,50
12
+ likelihood/models/deep/autoencoders.py,sha256=O-H5KLmJvYjuE-b6l97esruihK6djocgxbkO2N1X2RM,39306
13
+ likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
14
+ likelihood/tools/models_tools.py,sha256=bjwoBlDeW1fUi58yJsuKcaTUTgWhOCNsc24_ESYI3BI,3502
14
15
  likelihood/tools/numeric_tools.py,sha256=FA44kbiAcxcquz1el_g3Pqsp5ii8XFkAIrsMs5bGkj0,11445
15
16
  likelihood/tools/tools.py,sha256=6JLZBHxc4f1lJfw4aBwdS2s16EpydFNqLZF73I7wddQ,44412
16
- likelihood-1.3.2.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
17
- likelihood-1.3.2.dist-info/METADATA,sha256=x-4GMzzwrsMNQocGRo57TUlUUSY2tBppmOzeRPaapIc,2822
18
- likelihood-1.3.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
- likelihood-1.3.2.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
20
- likelihood-1.3.2.dist-info/RECORD,,
17
+ likelihood-1.4.1.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
18
+ likelihood-1.4.1.dist-info/METADATA,sha256=6otKXhthH5ZSUvYfcghD6CaC1skWZ0FBouXsGXuJfZw,2822
19
+ likelihood-1.4.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
20
+ likelihood-1.4.1.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
21
+ likelihood-1.4.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (75.8.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5