likelihood 1.3.2__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- likelihood/graph/nn.py +65 -112
- likelihood/models/deep/autoencoders.py +394 -54
- likelihood/tools/__init__.py +1 -0
- likelihood/tools/models_tools.py +101 -0
- {likelihood-1.3.2.dist-info → likelihood-1.4.1.dist-info}/METADATA +1 -1
- {likelihood-1.3.2.dist-info → likelihood-1.4.1.dist-info}/RECORD +9 -8
- {likelihood-1.3.2.dist-info → likelihood-1.4.1.dist-info}/WHEEL +1 -1
- {likelihood-1.3.2.dist-info → likelihood-1.4.1.dist-info}/LICENSE +0 -0
- {likelihood-1.3.2.dist-info → likelihood-1.4.1.dist-info}/top_level.txt +0 -0
likelihood/graph/nn.py
CHANGED
|
@@ -5,7 +5,7 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
|
5
5
|
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
|
6
6
|
|
|
7
7
|
import warnings
|
|
8
|
-
from typing import List, Tuple
|
|
8
|
+
from typing import Any, List, Tuple
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import pandas as pd
|
|
@@ -15,48 +15,43 @@ from pandas.core.frame import DataFrame
|
|
|
15
15
|
from sklearn.metrics import f1_score
|
|
16
16
|
from sklearn.model_selection import train_test_split
|
|
17
17
|
|
|
18
|
-
from likelihood.tools import generate_feature_yaml
|
|
19
|
-
|
|
20
18
|
tf.get_logger().setLevel("ERROR")
|
|
21
19
|
|
|
20
|
+
from likelihood.tools import LoRALayer
|
|
22
21
|
|
|
23
|
-
def compare_similarity(arr1: np.ndarray, arr2: np.ndarray) -> int:
|
|
24
|
-
"""Compares the similarity between two arrays of categories.
|
|
25
22
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
arr1 : `ndarray`
|
|
29
|
-
The first array of categories.
|
|
30
|
-
arr2 : `ndarray`
|
|
31
|
-
The second array of categories.
|
|
23
|
+
def compare_similarity(arr1: List[Any], arr2: List[Any], threshold: float = 0.05) -> int:
|
|
24
|
+
"""Calculate the similarity between two arrays considering numeric values near to 1 in ratio."""
|
|
32
25
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
26
|
+
def is_similar(a: Any, b: Any) -> bool:
|
|
27
|
+
if isinstance(a, (int, float)) and isinstance(b, (int, float)):
|
|
28
|
+
if a == 0 and b == 0:
|
|
29
|
+
return True
|
|
30
|
+
if a == 0 or b == 0:
|
|
31
|
+
return False
|
|
32
|
+
# For numeric values, check if their ratio is within the threshold range
|
|
33
|
+
ratio = max(a, b) / min(a, b)
|
|
34
|
+
return 1 - threshold <= ratio <= 1 + threshold
|
|
35
|
+
else:
|
|
36
|
+
return a == b
|
|
38
37
|
|
|
39
|
-
|
|
40
|
-
for i in range(len(arr1)):
|
|
41
|
-
if arr1[i] == arr2[i]:
|
|
42
|
-
count += 1
|
|
43
|
-
return count
|
|
38
|
+
return sum(is_similar(a, b) for a, b in zip(arr1, arr2))
|
|
44
39
|
|
|
45
40
|
|
|
46
41
|
def cal_adjacency_matrix(
|
|
47
42
|
df: DataFrame, exclude_subset: List[str] = [], sparse: bool = True, **kwargs
|
|
48
43
|
) -> Tuple[dict, np.ndarray]:
|
|
49
44
|
"""Calculates the adjacency matrix for a given DataFrame.
|
|
50
|
-
The adjacency matrix is a matrix that represents the similarity between each pair of
|
|
45
|
+
The adjacency matrix is a matrix that represents the similarity between each pair of features.
|
|
51
46
|
The similarity is calculated using the `compare_similarity` function.
|
|
52
|
-
The resulting matrix is a square matrix with the same number of rows and columns as the input DataFrame.
|
|
47
|
+
The resulting matrix is a square matrix with the same number of rows and columns as the rows of the input DataFrame.
|
|
53
48
|
|
|
54
49
|
Parameters
|
|
55
50
|
----------
|
|
56
51
|
df : `DataFrame`
|
|
57
|
-
The input DataFrame containing the
|
|
52
|
+
The input DataFrame containing the features.
|
|
58
53
|
exclude_subset : `List[str]`, optional
|
|
59
|
-
A list of
|
|
54
|
+
A list of features to exclude from the calculation of the adjacency matrix.
|
|
60
55
|
sparse : `bool`, optional
|
|
61
56
|
Whether to return a sparse matrix or a dense matrix.
|
|
62
57
|
**kwargs : `dict`
|
|
@@ -65,48 +60,33 @@ def cal_adjacency_matrix(
|
|
|
65
60
|
Keyword Arguments:
|
|
66
61
|
----------
|
|
67
62
|
similarity: `int`
|
|
68
|
-
The minimum number of
|
|
63
|
+
The minimum number of features that must be the same in both arrays to be considered similar.
|
|
69
64
|
|
|
70
65
|
Returns
|
|
71
66
|
-------
|
|
72
67
|
adj_dict : `dict`
|
|
73
|
-
A dictionary containing the
|
|
68
|
+
A dictionary containing the features.
|
|
74
69
|
adjacency_matrix : `ndarray`
|
|
75
70
|
The adjacency matrix.
|
|
76
71
|
"""
|
|
77
72
|
|
|
78
|
-
yaml_ = generate_feature_yaml(df)
|
|
79
|
-
categorical_columns = yaml_["categorical_features"]
|
|
80
73
|
if len(exclude_subset) > 0:
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
if len(categorical_columns) > 1:
|
|
84
|
-
df_categorical = df[categorical_columns].copy()
|
|
74
|
+
columns = [col for col in df.columns if col not in exclude_subset]
|
|
75
|
+
df_ = df[columns].copy()
|
|
85
76
|
else:
|
|
86
|
-
|
|
87
|
-
col
|
|
88
|
-
for col in df.columns
|
|
89
|
-
if (
|
|
90
|
-
col not in exclude_subset
|
|
91
|
-
and pd.api.types.is_integer_dtype(df[col])
|
|
92
|
-
and len(df[col].unique()) > 2
|
|
93
|
-
)
|
|
94
|
-
]
|
|
95
|
-
df_categorical = df[categorical_columns].copy()
|
|
77
|
+
df_ = df.copy()
|
|
96
78
|
|
|
97
|
-
assert len(
|
|
79
|
+
assert len(df_) > 0
|
|
98
80
|
|
|
99
|
-
similarity = kwargs.get("similarity", len(
|
|
100
|
-
assert similarity <=
|
|
81
|
+
similarity = kwargs.get("similarity", len(df_.columns) - 1)
|
|
82
|
+
assert similarity <= df_.shape[1]
|
|
101
83
|
|
|
102
|
-
adj_dict = {}
|
|
103
|
-
for index, row in df_categorical.iterrows():
|
|
104
|
-
adj_dict[index] = row.to_list()
|
|
84
|
+
adj_dict = {index: row.tolist() for index, row in df_.iterrows()}
|
|
105
85
|
|
|
106
|
-
adjacency_matrix = np.zeros((len(
|
|
86
|
+
adjacency_matrix = np.zeros((len(df_), len(df_)))
|
|
107
87
|
|
|
108
|
-
for i in range(len(
|
|
109
|
-
for j in range(len(
|
|
88
|
+
for i in range(len(df_)):
|
|
89
|
+
for j in range(len(df_)):
|
|
110
90
|
if compare_similarity(adj_dict[i], adj_dict[j]) >= similarity:
|
|
111
91
|
adjacency_matrix[i][j] = 1
|
|
112
92
|
|
|
@@ -131,8 +111,10 @@ class Data:
|
|
|
131
111
|
df: DataFrame,
|
|
132
112
|
target: str | None = None,
|
|
133
113
|
exclude_subset: List[str] = [],
|
|
114
|
+
**kwargs,
|
|
134
115
|
):
|
|
135
|
-
|
|
116
|
+
sparse = kwargs.get("sparse", True)
|
|
117
|
+
_, adjacency = cal_adjacency_matrix(df, exclude_subset=exclude_subset, sparse=sparse)
|
|
136
118
|
if target is not None:
|
|
137
119
|
X = df.drop(columns=[target] + exclude_subset)
|
|
138
120
|
else:
|
|
@@ -147,16 +129,20 @@ class Data:
|
|
|
147
129
|
|
|
148
130
|
@tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNNLayer")
|
|
149
131
|
class VanillaGNNLayer(tf.keras.layers.Layer):
|
|
150
|
-
def __init__(self, dim_in, dim_out, kernel_initializer="glorot_uniform", **kwargs):
|
|
132
|
+
def __init__(self, dim_in, dim_out, rank=None, kernel_initializer="glorot_uniform", **kwargs):
|
|
151
133
|
super(VanillaGNNLayer, self).__init__(**kwargs)
|
|
152
134
|
self.dim_out = dim_out
|
|
135
|
+
self.rank = rank
|
|
153
136
|
self.kernel_initializer = kernel_initializer
|
|
154
137
|
self.linear = None
|
|
155
138
|
|
|
156
139
|
def build(self, input_shape):
|
|
157
|
-
self.
|
|
158
|
-
self.
|
|
159
|
-
|
|
140
|
+
if self.rank:
|
|
141
|
+
self.linear = LoRALayer(self.dim_out, rank=self.rank)
|
|
142
|
+
else:
|
|
143
|
+
self.linear = tf.keras.layers.Dense(
|
|
144
|
+
self.dim_out, use_bias=False, kernel_initializer=self.kernel_initializer
|
|
145
|
+
)
|
|
160
146
|
super(VanillaGNNLayer, self).build(input_shape)
|
|
161
147
|
|
|
162
148
|
def call(self, x, adjacency):
|
|
@@ -169,8 +155,11 @@ class VanillaGNNLayer(tf.keras.layers.Layer):
|
|
|
169
155
|
config.update(
|
|
170
156
|
{
|
|
171
157
|
"dim_out": self.dim_out,
|
|
172
|
-
"
|
|
173
|
-
|
|
158
|
+
"rank": self.rank,
|
|
159
|
+
"kernel_initializer": (
|
|
160
|
+
None
|
|
161
|
+
if self.rank
|
|
162
|
+
else tf.keras.initializers.serialize(self.linear.kernel_initializer)
|
|
174
163
|
),
|
|
175
164
|
}
|
|
176
165
|
)
|
|
@@ -179,14 +168,16 @@ class VanillaGNNLayer(tf.keras.layers.Layer):
|
|
|
179
168
|
|
|
180
169
|
@tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNN")
|
|
181
170
|
class VanillaGNN(tf.keras.Model):
|
|
182
|
-
def __init__(self, dim_in, dim_h, dim_out, **kwargs):
|
|
171
|
+
def __init__(self, dim_in, dim_h, dim_out, rank=2, **kwargs):
|
|
183
172
|
super(VanillaGNN, self).__init__(**kwargs)
|
|
184
173
|
self.dim_in = dim_in
|
|
185
174
|
self.dim_h = dim_h
|
|
186
175
|
self.dim_out = dim_out
|
|
187
|
-
self.
|
|
188
|
-
|
|
189
|
-
self.
|
|
176
|
+
self.rank = rank
|
|
177
|
+
|
|
178
|
+
self.gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h, self.rank)
|
|
179
|
+
self.gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h, self.rank)
|
|
180
|
+
self.gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out, None)
|
|
190
181
|
|
|
191
182
|
def call(self, x, adjacency):
|
|
192
183
|
h = self.gnn1(x, adjacency)
|
|
@@ -208,13 +199,13 @@ class VanillaGNN(tf.keras.Model):
|
|
|
208
199
|
out = self(x, adjacency)
|
|
209
200
|
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=out)
|
|
210
201
|
loss = tf.reduce_mean(loss)
|
|
211
|
-
f1 = self.compute_f1_score(out, y)
|
|
202
|
+
f1 = round(self.compute_f1_score(out, y), 4)
|
|
212
203
|
return loss.numpy(), f1
|
|
213
204
|
|
|
214
205
|
def test(self, data):
|
|
215
206
|
out = self(data.x, data.adjacency)
|
|
216
207
|
test_f1 = self.compute_f1_score(out, data.y)
|
|
217
|
-
return test_f1
|
|
208
|
+
return round(test_f1, 4)
|
|
218
209
|
|
|
219
210
|
def predict(self, data):
|
|
220
211
|
out = self(data.x, data.adjacency)
|
|
@@ -225,6 +216,7 @@ class VanillaGNN(tf.keras.Model):
|
|
|
225
216
|
"dim_in": self.dim_in,
|
|
226
217
|
"dim_h": self.dim_h,
|
|
227
218
|
"dim_out": self.dim_out,
|
|
219
|
+
"rank": self.rank,
|
|
228
220
|
}
|
|
229
221
|
base_config = super(VanillaGNN, self).get_config()
|
|
230
222
|
return dict(list(base_config.items()) + list(config.items()))
|
|
@@ -235,6 +227,7 @@ class VanillaGNN(tf.keras.Model):
|
|
|
235
227
|
dim_in=config["dim_in"],
|
|
236
228
|
dim_h=config["dim_h"],
|
|
237
229
|
dim_out=config["dim_out"],
|
|
230
|
+
rank=config["rank"],
|
|
238
231
|
)
|
|
239
232
|
|
|
240
233
|
@tf.function
|
|
@@ -248,10 +241,6 @@ class VanillaGNN(tf.keras.Model):
|
|
|
248
241
|
return loss
|
|
249
242
|
|
|
250
243
|
def fit(self, data, epochs, batch_size, test_size=0.2, optimizer="adam"):
|
|
251
|
-
warnings.warn(
|
|
252
|
-
"It is normal for validation metrics to underperform. Use the test method to validate after training.",
|
|
253
|
-
UserWarning,
|
|
254
|
-
)
|
|
255
244
|
optimizers = {
|
|
256
245
|
"sgd": tf.keras.optimizers.SGD(),
|
|
257
246
|
"adam": tf.keras.optimizers.Adam(),
|
|
@@ -290,56 +279,20 @@ class VanillaGNN(tf.keras.Model):
|
|
|
290
279
|
train_f1_scores.append(train_f1)
|
|
291
280
|
|
|
292
281
|
if epoch % 5 == 0:
|
|
282
|
+
clear_output(wait=True)
|
|
283
|
+
warnings.warn(
|
|
284
|
+
"It is normal for validation metrics to underperform during training. Use the test method to validate after training.",
|
|
285
|
+
UserWarning,
|
|
286
|
+
)
|
|
293
287
|
val_loss, val_f1 = self.evaluate(X_test, adjacency_test, y_test)
|
|
294
288
|
val_losses.append(val_loss)
|
|
295
289
|
val_f1_scores.append(val_f1)
|
|
296
|
-
clear_output(wait=True)
|
|
297
290
|
print(
|
|
298
|
-
f"Epoch {epoch:>3} | Train Loss: {train_loss:.
|
|
291
|
+
f"Epoch {epoch:>3} | Train Loss: {train_loss:.4f} | Train F1: {train_f1:.4f} | Val Loss: {val_loss:.4f} | Val F1: {val_f1:.4f}"
|
|
299
292
|
)
|
|
300
293
|
|
|
301
294
|
return train_losses, train_f1_scores, val_losses, val_f1_scores
|
|
302
295
|
|
|
303
296
|
|
|
304
297
|
if __name__ == "__main__":
|
|
305
|
-
|
|
306
|
-
import pandas as pd
|
|
307
|
-
from sklearn.datasets import load_iris
|
|
308
|
-
|
|
309
|
-
# Load the dataset
|
|
310
|
-
iris = load_iris()
|
|
311
|
-
|
|
312
|
-
# Convert to a DataFrame for easy exploration
|
|
313
|
-
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
|
|
314
|
-
iris_df["species"] = iris.target
|
|
315
|
-
|
|
316
|
-
iris_df["sepal length (cm)"] = iris_df["sepal length (cm)"].astype("category")
|
|
317
|
-
iris_df["sepal width (cm)"] = iris_df["sepal width (cm)"].astype("category")
|
|
318
|
-
iris_df["petal length (cm)"] = iris_df["petal length (cm)"].astype("category")
|
|
319
|
-
iris_df["petal width (cm)"] = iris_df["petal width (cm)"].astype("category")
|
|
320
|
-
|
|
321
|
-
# Display the first few rows of the dataset
|
|
322
|
-
print(iris_df.head())
|
|
323
|
-
|
|
324
|
-
iris_df = iris_df.sample(frac=1, replace=False).reset_index(drop=True)
|
|
325
|
-
|
|
326
|
-
data = Data(iris_df, "species")
|
|
327
|
-
|
|
328
|
-
model = VanillaGNN(dim_in=data.x.shape[1], dim_h=8, dim_out=len(iris_df["species"].unique()))
|
|
329
|
-
print("Before training F1:", model.test(data))
|
|
330
|
-
model.fit(data, epochs=200, batch_size=32, test_size=0.5)
|
|
331
|
-
model.save("./best_model", save_format="tf")
|
|
332
|
-
print("After training F1:", model.test(data))
|
|
333
|
-
best_model = tf.keras.models.load_model("./best_model")
|
|
334
|
-
|
|
335
|
-
print("After loading F1:", best_model.test(data))
|
|
336
|
-
df_results = pd.DataFrame()
|
|
337
|
-
|
|
338
|
-
# Suppose we have a new dataset without the target variable
|
|
339
|
-
iris_df = iris_df.drop(columns=["species"])
|
|
340
|
-
data_new = Data(iris_df)
|
|
341
|
-
print("Predictions:", best_model.predict(data_new))
|
|
342
|
-
df_results["predicted"] = list(model.predict(data))
|
|
343
|
-
df_results["actual"] = list(data.y)
|
|
344
|
-
# df_results.to_csv("results.csv", index=False)
|
|
345
|
-
breakpoint()
|
|
298
|
+
print("Examples will be running below")
|
|
@@ -24,7 +24,7 @@ from sklearn.manifold import TSNE
|
|
|
24
24
|
from tensorflow.keras.layers import InputLayer
|
|
25
25
|
from tensorflow.keras.regularizers import l2
|
|
26
26
|
|
|
27
|
-
from likelihood.tools import OneHotEncoder
|
|
27
|
+
from likelihood.tools import LoRALayer, OneHotEncoder
|
|
28
28
|
|
|
29
29
|
tf.get_logger().setLevel("ERROR")
|
|
30
30
|
|
|
@@ -39,53 +39,231 @@ def suppress_warnings(func):
|
|
|
39
39
|
return wrapper
|
|
40
40
|
|
|
41
41
|
|
|
42
|
+
class EarlyStopping:
|
|
43
|
+
def __init__(self, patience=10, min_delta=0.001):
|
|
44
|
+
self.patience = patience
|
|
45
|
+
self.min_delta = min_delta
|
|
46
|
+
self.best_loss = np.inf
|
|
47
|
+
self.counter = 0
|
|
48
|
+
self.stop_training = False
|
|
49
|
+
|
|
50
|
+
def __call__(self, current_loss):
|
|
51
|
+
if self.best_loss - current_loss > self.min_delta:
|
|
52
|
+
self.best_loss = current_loss
|
|
53
|
+
self.counter = 0
|
|
54
|
+
else:
|
|
55
|
+
self.counter += 1
|
|
56
|
+
|
|
57
|
+
if self.counter >= self.patience:
|
|
58
|
+
self.stop_training = True
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def mse_loss(y_true, y_pred):
|
|
62
|
+
"""
|
|
63
|
+
Mean squared error loss function.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
y_true : `tf.Tensor`
|
|
68
|
+
The true values.
|
|
69
|
+
y_pred : `tf.Tensor`
|
|
70
|
+
The predicted values.
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
`tf.Tensor`
|
|
75
|
+
"""
|
|
76
|
+
return tf.reduce_mean(tf.square(y_true - y_pred))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def kl_loss(mean, log_var):
|
|
80
|
+
"""
|
|
81
|
+
Kullback-Leibler divergence loss function.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
mean : `tf.Tensor`
|
|
86
|
+
The mean of the distribution.
|
|
87
|
+
log_var : `tf.Tensor`
|
|
88
|
+
The log variance of the distribution.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
`tf.Tensor`
|
|
93
|
+
"""
|
|
94
|
+
return -0.5 * tf.reduce_mean(1 + log_var - tf.square(mean) - tf.exp(log_var))
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def vae_loss(y_true, y_pred, mean, log_var):
|
|
98
|
+
"""
|
|
99
|
+
Variational autoencoder loss function.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
y_true : `tf.Tensor`
|
|
104
|
+
The true values.
|
|
105
|
+
y_pred : `tf.Tensor`
|
|
106
|
+
The predicted values.
|
|
107
|
+
mean : `tf.Tensor`
|
|
108
|
+
The mean of the distribution.
|
|
109
|
+
log_var : `tf.Tensor`
|
|
110
|
+
The log variance of the distribution.
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
`tf.Tensor`
|
|
115
|
+
"""
|
|
116
|
+
return mse_loss(y_true, y_pred) + kl_loss(mean, log_var)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def sampling(mean, log_var, epsilon_value=1e-8):
|
|
120
|
+
"""
|
|
121
|
+
Samples from the distribution.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
mean : `tf.Tensor`
|
|
126
|
+
The mean of the distribution.
|
|
127
|
+
log_var : `tf.Tensor`
|
|
128
|
+
The log variance of the distribution.
|
|
129
|
+
epsilon_value : float
|
|
130
|
+
A small value to avoid numerical instability.
|
|
131
|
+
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
134
|
+
`tf.Tensor`
|
|
135
|
+
"""
|
|
136
|
+
epsilon = tf.random.normal(shape=tf.shape(mean), mean=0.0, stddev=1.0)
|
|
137
|
+
stddev = tf.exp(0.5 * log_var) + epsilon_value
|
|
138
|
+
epsilon = tf.random.normal(shape=tf.shape(mean), mean=0.0, stddev=1.0)
|
|
139
|
+
return mean + stddev * epsilon
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def check_for_nans(tensors, name="Tensor"):
|
|
143
|
+
for t in tensors:
|
|
144
|
+
if tf.reduce_any(tf.math.is_nan(t)) or tf.reduce_any(tf.math.is_inf(t)):
|
|
145
|
+
print(f"Warning: {name} contains NaNs or Infs")
|
|
146
|
+
return True
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def cal_loss_step(batch, encoder, decoder, vae_mode=False, training=True):
|
|
151
|
+
"""
|
|
152
|
+
Calculates the loss value on a batch of data.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
batch : `tf.Tensor`
|
|
157
|
+
The batch of data.
|
|
158
|
+
encoder : `tf.keras.Model`
|
|
159
|
+
The encoder model.
|
|
160
|
+
decoder : `tf.keras.Model`
|
|
161
|
+
The decoder model.
|
|
162
|
+
optimizer : `tf.keras.optimizers.Optimizer`
|
|
163
|
+
The optimizer to use.
|
|
164
|
+
vae_mode : `bool`
|
|
165
|
+
Whether to use variational autoencoder mode. Default is False.
|
|
166
|
+
training : `bool`
|
|
167
|
+
Whether the model is in training mode. Default is True.
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
`tf.Tensor`
|
|
172
|
+
The loss value.
|
|
173
|
+
"""
|
|
174
|
+
if vae_mode:
|
|
175
|
+
mean, log_var = encoder(batch, training=training)
|
|
176
|
+
log_var = tf.clip_by_value(log_var, clip_value_min=1e-8, clip_value_max=tf.float32.max)
|
|
177
|
+
decoded = decoder(sampling(mean, log_var), training=training)
|
|
178
|
+
loss = vae_loss(batch, decoded, mean, log_var)
|
|
179
|
+
else:
|
|
180
|
+
encoded = encoder(batch, training=training)
|
|
181
|
+
decoded = decoder(encoded, training=training)
|
|
182
|
+
loss = mse_loss(batch, decoded)
|
|
183
|
+
|
|
184
|
+
return loss
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@tf.function
|
|
188
|
+
def train_step(batch, encoder, decoder, optimizer, vae_mode=False):
|
|
189
|
+
"""
|
|
190
|
+
Trains the model on a batch of data.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
mean : `tf.Tensor`
|
|
195
|
+
The mean of the distribution.
|
|
196
|
+
log_var : `tf.Tensor`
|
|
197
|
+
The log variance of the distribution.
|
|
198
|
+
batch : `tf.Tensor`
|
|
199
|
+
The batch of data.
|
|
200
|
+
encoder : `tf.keras.Model`
|
|
201
|
+
The encoder model.
|
|
202
|
+
decoder : `tf.keras.Model`
|
|
203
|
+
The decoder model.
|
|
204
|
+
optimizer : `tf.keras.optimizers.Optimizer`
|
|
205
|
+
The optimizer to use.
|
|
206
|
+
vae_mode : `bool`
|
|
207
|
+
Whether to use variational autoencoder mode. Default is False.
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
`tf.Tensor`
|
|
212
|
+
The loss value.
|
|
213
|
+
"""
|
|
214
|
+
optimizer.build(encoder.trainable_variables + decoder.trainable_variables)
|
|
215
|
+
|
|
216
|
+
with tf.GradientTape() as encoder_tape, tf.GradientTape() as decoder_tape:
|
|
217
|
+
loss = cal_loss_step(batch, encoder, decoder, vae_mode=vae_mode)
|
|
218
|
+
|
|
219
|
+
gradients_of_encoder = encoder_tape.gradient(loss, encoder.trainable_variables)
|
|
220
|
+
gradients_of_decoder = decoder_tape.gradient(loss, decoder.trainable_variables)
|
|
221
|
+
|
|
222
|
+
optimizer.apply_gradients(zip(gradients_of_encoder, encoder.trainable_variables))
|
|
223
|
+
optimizer.apply_gradients(zip(gradients_of_decoder, decoder.trainable_variables))
|
|
224
|
+
|
|
225
|
+
return loss
|
|
226
|
+
|
|
227
|
+
|
|
42
228
|
@tf.keras.utils.register_keras_serializable(package="Custom", name="AutoClassifier")
|
|
43
229
|
class AutoClassifier(tf.keras.Model):
|
|
44
230
|
"""
|
|
45
231
|
An auto-classifier model that automatically determines the best classification strategy based on the input data.
|
|
46
232
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
get_config(self): Returns the configuration of the model.
|
|
58
|
-
from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
|
|
59
|
-
"""
|
|
60
|
-
|
|
61
|
-
def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
|
|
62
|
-
"""
|
|
63
|
-
Initializes an AutoClassifier instance with the given parameters.
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
input_shape_parm : `int`
|
|
236
|
+
The shape of the input data.
|
|
237
|
+
num_classes : `int`
|
|
238
|
+
The number of classes in the dataset.
|
|
239
|
+
units : `int`
|
|
240
|
+
The number of neurons in each hidden layer.
|
|
241
|
+
activation : `str`
|
|
242
|
+
The type of activation function to use for the neural network layers.
|
|
64
243
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
The shape of the input data.
|
|
69
|
-
num_classes : `int`
|
|
70
|
-
The number of classes in the dataset.
|
|
71
|
-
units : `int`
|
|
72
|
-
The number of neurons in each hidden layer.
|
|
73
|
-
activation : `str`
|
|
74
|
-
The type of activation function to use for the neural network layers.
|
|
244
|
+
Keyword Arguments:
|
|
245
|
+
----------
|
|
246
|
+
Additional keyword arguments to pass to the model.
|
|
75
247
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
248
|
+
classifier_activation : `str`
|
|
249
|
+
The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
|
|
250
|
+
num_layers : `int`
|
|
251
|
+
The number of hidden layers in the classifier. Default is 1.
|
|
252
|
+
dropout : `float`
|
|
253
|
+
The dropout rate to use in the classifier. Default is None.
|
|
254
|
+
l2_reg : `float`
|
|
255
|
+
The L2 regularization parameter. Default is 0.0.
|
|
256
|
+
vae_mode : `bool`
|
|
257
|
+
Whether to use variational autoencoder mode. Default is False.
|
|
258
|
+
vae_units : `int`
|
|
259
|
+
The number of units in the variational autoencoder. Default is 2.
|
|
260
|
+
lora_mode : `bool`
|
|
261
|
+
Whether to use LoRA layers. Default is False.
|
|
262
|
+
lora_rank : `int`
|
|
263
|
+
The rank of the LoRA layer. Default is 4.
|
|
264
|
+
"""
|
|
79
265
|
|
|
80
|
-
|
|
81
|
-
The activation function to use for the classifier layer. Default is "softmax". If the activation function is not a classification function, the model can be used in regression problems.
|
|
82
|
-
num_layers : `int`
|
|
83
|
-
The number of hidden layers in the classifier. Default is 1.
|
|
84
|
-
dropout : `float`
|
|
85
|
-
The dropout rate to use in the classifier. Default is None.
|
|
86
|
-
l2_reg : `float`
|
|
87
|
-
The L2 regularization parameter. Default is 0.0.
|
|
88
|
-
"""
|
|
266
|
+
def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
|
|
89
267
|
super(AutoClassifier, self).__init__()
|
|
90
268
|
self.input_shape_parm = input_shape_parm
|
|
91
269
|
self.num_classes = num_classes
|
|
@@ -99,9 +277,12 @@ class AutoClassifier(tf.keras.Model):
|
|
|
99
277
|
self.num_layers = kwargs.get("num_layers", 1)
|
|
100
278
|
self.dropout = kwargs.get("dropout", None)
|
|
101
279
|
self.l2_reg = kwargs.get("l2_reg", 0.0)
|
|
280
|
+
self.vae_mode = kwargs.get("vae_mode", False)
|
|
281
|
+
self.vae_units = kwargs.get("vae_units", 2)
|
|
282
|
+
self.lora_mode = kwargs.get("lora_mode", False)
|
|
283
|
+
self.lora_rank = kwargs.get("lora_rank", 4)
|
|
102
284
|
|
|
103
|
-
def
|
|
104
|
-
# Encoder with L2 regularization
|
|
285
|
+
def build_encoder_decoder(self, input_shape):
|
|
105
286
|
self.encoder = (
|
|
106
287
|
tf.keras.Sequential(
|
|
107
288
|
[
|
|
@@ -121,7 +302,6 @@ class AutoClassifier(tf.keras.Model):
|
|
|
121
302
|
else self.encoder
|
|
122
303
|
)
|
|
123
304
|
|
|
124
|
-
# Decoder with L2 regularization
|
|
125
305
|
self.decoder = (
|
|
126
306
|
tf.keras.Sequential(
|
|
127
307
|
[
|
|
@@ -141,9 +321,61 @@ class AutoClassifier(tf.keras.Model):
|
|
|
141
321
|
else self.decoder
|
|
142
322
|
)
|
|
143
323
|
|
|
324
|
+
def build(self, input_shape):
|
|
325
|
+
if self.vae_mode:
|
|
326
|
+
inputs = tf.keras.Input(shape=self.input_shape_parm, name="encoder_input")
|
|
327
|
+
x = tf.keras.layers.Dense(
|
|
328
|
+
units=self.units,
|
|
329
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
330
|
+
kernel_initializer="he_normal",
|
|
331
|
+
)(inputs)
|
|
332
|
+
x = tf.keras.layers.BatchNormalization()(x)
|
|
333
|
+
x = tf.keras.layers.Activation(self.activation)(x)
|
|
334
|
+
x = tf.keras.layers.Dense(
|
|
335
|
+
units=int(self.units / 2),
|
|
336
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
337
|
+
kernel_initializer="he_normal",
|
|
338
|
+
name="encoder_hidden",
|
|
339
|
+
)(x)
|
|
340
|
+
x = tf.keras.layers.BatchNormalization()(x)
|
|
341
|
+
x = tf.keras.layers.Activation(self.activation)(x)
|
|
342
|
+
|
|
343
|
+
mean = tf.keras.layers.Dense(2, name="mean")(x)
|
|
344
|
+
log_var = tf.keras.layers.Dense(2, name="log_var")(x)
|
|
345
|
+
log_var = tf.keras.layers.Lambda(lambda x: x + 1e-7)(log_var)
|
|
346
|
+
|
|
347
|
+
self.encoder = (
|
|
348
|
+
tf.keras.Model(inputs, [mean, log_var], name="encoder")
|
|
349
|
+
if not self.encoder
|
|
350
|
+
else self.encoder
|
|
351
|
+
)
|
|
352
|
+
self.decoder = (
|
|
353
|
+
tf.keras.Sequential(
|
|
354
|
+
[
|
|
355
|
+
tf.keras.layers.Dense(
|
|
356
|
+
units=self.units,
|
|
357
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
358
|
+
),
|
|
359
|
+
tf.keras.layers.BatchNormalization(),
|
|
360
|
+
tf.keras.layers.Activation(self.activation),
|
|
361
|
+
tf.keras.layers.Dense(
|
|
362
|
+
units=self.input_shape_parm,
|
|
363
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
364
|
+
),
|
|
365
|
+
tf.keras.layers.BatchNormalization(),
|
|
366
|
+
tf.keras.layers.Activation(self.activation),
|
|
367
|
+
]
|
|
368
|
+
)
|
|
369
|
+
if not self.decoder
|
|
370
|
+
else self.decoder
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
else:
|
|
374
|
+
self.build_encoder_decoder(input_shape)
|
|
375
|
+
|
|
144
376
|
# Classifier with L2 regularization
|
|
145
377
|
self.classifier = tf.keras.Sequential()
|
|
146
|
-
if self.num_layers > 1:
|
|
378
|
+
if self.num_layers > 1 and not self.lora_mode:
|
|
147
379
|
for _ in range(self.num_layers - 1):
|
|
148
380
|
self.classifier.add(
|
|
149
381
|
tf.keras.layers.Dense(
|
|
@@ -154,16 +386,106 @@ class AutoClassifier(tf.keras.Model):
|
|
|
154
386
|
)
|
|
155
387
|
if self.dropout:
|
|
156
388
|
self.classifier.add(tf.keras.layers.Dropout(self.dropout))
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
389
|
+
self.classifier.add(
|
|
390
|
+
tf.keras.layers.Dense(
|
|
391
|
+
units=self.num_classes,
|
|
392
|
+
activation=self.classifier_activation,
|
|
393
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
394
|
+
)
|
|
162
395
|
)
|
|
163
|
-
|
|
396
|
+
elif self.lora_mode:
|
|
397
|
+
for _ in range(self.num_layers - 1):
|
|
398
|
+
self.classifier.add(
|
|
399
|
+
LoRALayer(units=self.units, rank=self.lora_rank, name=f"LoRA_{_}")
|
|
400
|
+
)
|
|
401
|
+
self.classifier.add(tf.keras.layers.Activation(self.activation))
|
|
402
|
+
if self.dropout:
|
|
403
|
+
self.classifier.add(tf.keras.layers.Dropout(self.dropout))
|
|
404
|
+
self.classifier.add(
|
|
405
|
+
tf.keras.layers.Dense(
|
|
406
|
+
units=self.num_classes,
|
|
407
|
+
activation=self.classifier_activation,
|
|
408
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
409
|
+
)
|
|
410
|
+
)
|
|
411
|
+
else:
|
|
412
|
+
self.classifier.add(
|
|
413
|
+
tf.keras.layers.Dense(
|
|
414
|
+
units=self.num_classes,
|
|
415
|
+
activation=self.classifier_activation,
|
|
416
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
417
|
+
)
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
def train_encoder_decoder(
|
|
421
|
+
self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
|
|
422
|
+
):
|
|
423
|
+
"""
|
|
424
|
+
Trains the encoder and decoder on the input data.
|
|
425
|
+
|
|
426
|
+
Parameters
|
|
427
|
+
----------
|
|
428
|
+
data : `tf.data.Dataset`, `np.ndarray`
|
|
429
|
+
The input data.
|
|
430
|
+
epochs : `int`
|
|
431
|
+
The number of epochs to train for.
|
|
432
|
+
batch_size : `int`
|
|
433
|
+
The batch size to use.
|
|
434
|
+
validation_split : `float`
|
|
435
|
+
The proportion of the dataset to use for validation. Default is 0.2.
|
|
436
|
+
patience : `int`
|
|
437
|
+
The number of epochs to wait before early stopping. Default is 10.
|
|
438
|
+
|
|
439
|
+
Keyword Arguments:
|
|
440
|
+
----------
|
|
441
|
+
Additional keyword arguments to pass to the model.
|
|
442
|
+
"""
|
|
443
|
+
verbose = kwargs.get("verbose", True)
|
|
444
|
+
optimizer = kwargs.get("optimizer", tf.keras.optimizers.Adam())
|
|
445
|
+
dummy_input = tf.convert_to_tensor(tf.random.normal([1, self.input_shape_parm]))
|
|
446
|
+
self.build(dummy_input.shape)
|
|
447
|
+
if not self.vae_mode:
|
|
448
|
+
dummy_output = self.encoder(dummy_input)
|
|
449
|
+
self.decoder(dummy_output)
|
|
450
|
+
else:
|
|
451
|
+
mean, log_var = self.encoder(dummy_input)
|
|
452
|
+
dummy_output = sampling(mean, log_var)
|
|
453
|
+
self.decoder(dummy_output)
|
|
454
|
+
|
|
455
|
+
if isinstance(data, np.ndarray):
|
|
456
|
+
data = tf.data.Dataset.from_tensor_slices(data).batch(batch_size)
|
|
457
|
+
data = data.map(lambda x: tf.cast(x, tf.float32))
|
|
458
|
+
|
|
459
|
+
early_stopping = EarlyStopping(patience=patience)
|
|
460
|
+
train_batches = data.take(int((1 - validation_split) * len(data)))
|
|
461
|
+
val_batches = data.skip(int((1 - validation_split) * len(data)))
|
|
462
|
+
for epoch in range(epochs):
|
|
463
|
+
for train_batch, val_batch in zip(train_batches, val_batches):
|
|
464
|
+
loss_train = train_step(
|
|
465
|
+
train_batch, self.encoder, self.decoder, optimizer, self.vae_mode
|
|
466
|
+
)
|
|
467
|
+
loss_val = cal_loss_step(
|
|
468
|
+
val_batch, self.encoder, self.decoder, self.vae_mode, False
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
early_stopping(loss_train)
|
|
472
|
+
|
|
473
|
+
if early_stopping.stop_training:
|
|
474
|
+
print(f"Early stopping triggered at epoch {epoch}.")
|
|
475
|
+
break
|
|
476
|
+
|
|
477
|
+
if epoch % 10 == 0 and verbose:
|
|
478
|
+
print(
|
|
479
|
+
f"Epoch {epoch}: Train Loss: {loss_train:.6f} Validation Loss: {loss_val:.6f}"
|
|
480
|
+
)
|
|
481
|
+
self.freeze_encoder_decoder()
|
|
164
482
|
|
|
165
483
|
def call(self, x):
|
|
166
|
-
|
|
484
|
+
if self.vae_mode:
|
|
485
|
+
mean, log_var = self.encoder(x)
|
|
486
|
+
encoded = sampling(mean, log_var)
|
|
487
|
+
else:
|
|
488
|
+
encoded = self.encoder(x)
|
|
167
489
|
decoded = self.decoder(encoded)
|
|
168
490
|
combined = tf.concat([decoded, encoded], axis=1)
|
|
169
491
|
classification = self.classifier(combined)
|
|
@@ -190,7 +512,7 @@ class AutoClassifier(tf.keras.Model):
|
|
|
190
512
|
def set_encoder_decoder(self, source_model):
|
|
191
513
|
"""
|
|
192
514
|
Sets the encoder and decoder layers from another AutoClassifier instance,
|
|
193
|
-
ensuring compatibility in dimensions.
|
|
515
|
+
ensuring compatibility in dimensions. Only works if vae_mode is False.
|
|
194
516
|
|
|
195
517
|
Parameters:
|
|
196
518
|
-----------
|
|
@@ -257,6 +579,10 @@ class AutoClassifier(tf.keras.Model):
|
|
|
257
579
|
"num_layers": self.num_layers,
|
|
258
580
|
"dropout": self.dropout,
|
|
259
581
|
"l2_reg": self.l2_reg,
|
|
582
|
+
"vae_mode": self.vae_mode,
|
|
583
|
+
"vae_units": self.vae_units,
|
|
584
|
+
"lora_mode": self.lora_mode,
|
|
585
|
+
"lora_rank": self.lora_rank,
|
|
260
586
|
}
|
|
261
587
|
base_config = super(AutoClassifier, self).get_config()
|
|
262
588
|
return dict(list(base_config.items()) + list(config.items()))
|
|
@@ -272,6 +598,10 @@ class AutoClassifier(tf.keras.Model):
|
|
|
272
598
|
num_layers=config["num_layers"],
|
|
273
599
|
dropout=config["dropout"],
|
|
274
600
|
l2_reg=config["l2_reg"],
|
|
601
|
+
vae_mode=config["vae_mode"],
|
|
602
|
+
vae_units=config["vae_units"],
|
|
603
|
+
lora_mode=config["lora_mode"],
|
|
604
|
+
lora_rank=config["lora_rank"],
|
|
275
605
|
)
|
|
276
606
|
|
|
277
607
|
|
|
@@ -302,6 +632,8 @@ def call_existing_code(
|
|
|
302
632
|
The shape of the input data.
|
|
303
633
|
num_classes : `int`
|
|
304
634
|
The number of classes in the dataset.
|
|
635
|
+
num_layers : `int`
|
|
636
|
+
The number of hidden layers in the classifier. Default is 1.
|
|
305
637
|
|
|
306
638
|
Returns
|
|
307
639
|
-------
|
|
@@ -578,7 +910,10 @@ class GetInsights:
|
|
|
578
910
|
def __init__(self, model: AutoClassifier, inputs: np.ndarray) -> None:
|
|
579
911
|
self.inputs = inputs
|
|
580
912
|
self.model = model
|
|
581
|
-
|
|
913
|
+
if isinstance(self.model.encoder.layers[0], InputLayer):
|
|
914
|
+
self.encoder_layer = self.model.encoder.layers[1]
|
|
915
|
+
else:
|
|
916
|
+
self.encoder_layer = self.model.encoder.layers[0]
|
|
582
917
|
self.decoder_layer = self.model.decoder.layers[0]
|
|
583
918
|
self.encoder_weights = self.encoder_layer.get_weights()[0]
|
|
584
919
|
self.decoder_weights = self.decoder_layer.get_weights()[0]
|
|
@@ -607,7 +942,12 @@ class GetInsights:
|
|
|
607
942
|
indexes = np.random.choice(np.arange(inputs.shape[0]), n, replace=False)
|
|
608
943
|
inputs = inputs[indexes]
|
|
609
944
|
inputs[np.isnan(inputs)] = 0.0
|
|
610
|
-
|
|
945
|
+
# check if self.model.encoder(inputs) has two outputs
|
|
946
|
+
try:
|
|
947
|
+
mean, log_var = self.model.encoder(inputs)
|
|
948
|
+
encoded = sampling(mean, log_var)
|
|
949
|
+
except:
|
|
950
|
+
encoded = self.model.encoder(inputs)
|
|
611
951
|
reconstructed = self.model.decoder(encoded)
|
|
612
952
|
combined = tf.concat([reconstructed, encoded], axis=1)
|
|
613
953
|
self.classification = self.model.classifier(combined).numpy().argmax(axis=1)
|
likelihood/tools/__init__.py
CHANGED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import networkx as nx
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
8
|
+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
|
9
|
+
|
|
10
|
+
import tensorflow as tf
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@tf.keras.utils.register_keras_serializable(package="Custom", name="LoRALayer")
|
|
14
|
+
class LoRALayer(tf.keras.layers.Layer):
|
|
15
|
+
def __init__(self, units, rank=4, **kwargs):
|
|
16
|
+
super(LoRALayer, self).__init__(**kwargs)
|
|
17
|
+
self.units = units
|
|
18
|
+
self.rank = rank
|
|
19
|
+
|
|
20
|
+
def build(self, input_shape):
|
|
21
|
+
input_dim = input_shape[-1]
|
|
22
|
+
print(f"Input shape: {input_shape}")
|
|
23
|
+
|
|
24
|
+
if self.rank > input_dim:
|
|
25
|
+
raise ValueError(
|
|
26
|
+
f"Rank ({self.rank}) cannot be greater than input dimension ({input_dim})."
|
|
27
|
+
)
|
|
28
|
+
if self.rank > self.units:
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"Rank ({self.rank}) cannot be greater than number of units ({self.units})."
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
self.A = self.add_weight(
|
|
34
|
+
shape=(input_dim, self.rank), initializer="random_normal", trainable=True, name="A"
|
|
35
|
+
)
|
|
36
|
+
self.B = self.add_weight(
|
|
37
|
+
shape=(self.rank, self.units), initializer="random_normal", trainable=True, name="B"
|
|
38
|
+
)
|
|
39
|
+
print(f"Dense weights shape: {input_dim}x{self.units}")
|
|
40
|
+
print(f"LoRA weights shape: A{self.A.shape}, B{self.B.shape}")
|
|
41
|
+
|
|
42
|
+
def call(self, inputs):
|
|
43
|
+
lora_output = tf.matmul(tf.matmul(inputs, self.A), self.B)
|
|
44
|
+
return lora_output
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def apply_lora(model, rank=4):
|
|
48
|
+
inputs = tf.keras.Input(shape=model.input_shape[1:])
|
|
49
|
+
x = inputs
|
|
50
|
+
|
|
51
|
+
for layer in model.layers:
|
|
52
|
+
if isinstance(layer, tf.keras.layers.Dense):
|
|
53
|
+
print(f"Applying LoRA to layer {layer.name}")
|
|
54
|
+
x = LoRALayer(units=layer.units, rank=rank)(x)
|
|
55
|
+
else:
|
|
56
|
+
x = layer(x)
|
|
57
|
+
new_model = tf.keras.Model(inputs=inputs, outputs=x)
|
|
58
|
+
return new_model
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def graph_metrics(adj_matrix, eigenvector_threshold=1e-6):
|
|
62
|
+
"""
|
|
63
|
+
This function calculates the following graph metrics using the adjacency matrix:
|
|
64
|
+
1. Degree Centrality
|
|
65
|
+
2. Clustering Coefficient
|
|
66
|
+
3. Eigenvector Centrality
|
|
67
|
+
4. Degree
|
|
68
|
+
5. Betweenness Centrality
|
|
69
|
+
6. Closeness Centrality
|
|
70
|
+
7. Assortativity
|
|
71
|
+
"""
|
|
72
|
+
adj_matrix = adj_matrix.astype(int)
|
|
73
|
+
G = nx.from_numpy_array(adj_matrix)
|
|
74
|
+
degree_centrality = nx.degree_centrality(G)
|
|
75
|
+
clustering_coeff = nx.clustering(G)
|
|
76
|
+
try:
|
|
77
|
+
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=500)
|
|
78
|
+
except nx.PowerIterationFailedConvergence:
|
|
79
|
+
print("Power iteration failed to converge. Returning NaN for eigenvector centrality.")
|
|
80
|
+
eigenvector_centrality = {node: float("nan") for node in G.nodes()}
|
|
81
|
+
|
|
82
|
+
for node, centrality in eigenvector_centrality.items():
|
|
83
|
+
if centrality < eigenvector_threshold:
|
|
84
|
+
eigenvector_centrality[node] = 0.0
|
|
85
|
+
degree = dict(G.degree())
|
|
86
|
+
betweenness_centrality = nx.betweenness_centrality(G)
|
|
87
|
+
closeness_centrality = nx.closeness_centrality(G)
|
|
88
|
+
assortativity = nx.degree_assortativity_coefficient(G)
|
|
89
|
+
metrics_df = pd.DataFrame(
|
|
90
|
+
{
|
|
91
|
+
"Degree": degree,
|
|
92
|
+
"Degree Centrality": degree_centrality,
|
|
93
|
+
"Clustering Coefficient": clustering_coeff,
|
|
94
|
+
"Eigenvector Centrality": eigenvector_centrality,
|
|
95
|
+
"Betweenness Centrality": betweenness_centrality,
|
|
96
|
+
"Closeness Centrality": closeness_centrality,
|
|
97
|
+
}
|
|
98
|
+
)
|
|
99
|
+
metrics_df["Assortativity"] = assortativity
|
|
100
|
+
|
|
101
|
+
return metrics_df
|
|
@@ -2,19 +2,20 @@ likelihood/__init__.py,sha256=5C0hapdsk85XZhN_rssRAEFpkRRuKNtj6cyRbqD2_gM,994
|
|
|
2
2
|
likelihood/main.py,sha256=fcCkGOOWKjfvw2tLVqjuKPV8t0rVCIT9FlbYcOv4EYo,7974
|
|
3
3
|
likelihood/graph/__init__.py,sha256=6TuFDfmXTwpLyHl7_KqBfdzW6zqHjGzIFvymjFPlvjI,21
|
|
4
4
|
likelihood/graph/graph.py,sha256=bLrNMvIh7GOTdPTwnNss8oPZ7cbSHQScAsH_ttmVUK0,3294
|
|
5
|
-
likelihood/graph/nn.py,sha256
|
|
5
|
+
likelihood/graph/nn.py,sha256=MD2M-KgQnrlHg3iS42vrdOnD51-GRk3CJ5CCMQ0DNWI,10763
|
|
6
6
|
likelihood/models/__init__.py,sha256=e6nB4w47w0Q9DrAFeP3OcUgcoHOtf7Il4mBhgf4AARg,52
|
|
7
7
|
likelihood/models/hmm.py,sha256=0s0gFySH1u4NjRaZDxiZ8oeTaFhFrw1x0GJxwy3dFrA,6253
|
|
8
8
|
likelihood/models/regression.py,sha256=9cakyGlJCEO6WfpoKLh3GxdXQeQp7cUvJIkQ5odT0TA,9404
|
|
9
9
|
likelihood/models/simulation.py,sha256=LFyE_szo7sDukviMLeg_6RoyAaI7yMXUy8f4mDOrGoc,8460
|
|
10
10
|
likelihood/models/utils.py,sha256=dvigPi_hxcs5ntfHr7Y1JvP5ULtMW3kkN0nJpS4orE8,1319
|
|
11
11
|
likelihood/models/deep/__init__.py,sha256=-KIPippVaMqgG8mEgYjNxYQdqOUcFhUuKhbVe8TTCfo,28
|
|
12
|
-
likelihood/models/deep/autoencoders.py,sha256=
|
|
13
|
-
likelihood/tools/__init__.py,sha256=
|
|
12
|
+
likelihood/models/deep/autoencoders.py,sha256=O-H5KLmJvYjuE-b6l97esruihK6djocgxbkO2N1X2RM,39306
|
|
13
|
+
likelihood/tools/__init__.py,sha256=N1IhMDzacsGQT2MIYBMBC0zTxes78vC_0gGrwkuPgmg,78
|
|
14
|
+
likelihood/tools/models_tools.py,sha256=bjwoBlDeW1fUi58yJsuKcaTUTgWhOCNsc24_ESYI3BI,3502
|
|
14
15
|
likelihood/tools/numeric_tools.py,sha256=FA44kbiAcxcquz1el_g3Pqsp5ii8XFkAIrsMs5bGkj0,11445
|
|
15
16
|
likelihood/tools/tools.py,sha256=6JLZBHxc4f1lJfw4aBwdS2s16EpydFNqLZF73I7wddQ,44412
|
|
16
|
-
likelihood-1.
|
|
17
|
-
likelihood-1.
|
|
18
|
-
likelihood-1.
|
|
19
|
-
likelihood-1.
|
|
20
|
-
likelihood-1.
|
|
17
|
+
likelihood-1.4.1.dist-info/LICENSE,sha256=XWHWt9egYEUHGPTnlcZfJKLPmysacOwdiLj_-J7Z9ew,1066
|
|
18
|
+
likelihood-1.4.1.dist-info/METADATA,sha256=6otKXhthH5ZSUvYfcghD6CaC1skWZ0FBouXsGXuJfZw,2822
|
|
19
|
+
likelihood-1.4.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
20
|
+
likelihood-1.4.1.dist-info/top_level.txt,sha256=KDiBLr870YTxqLFqObTOSrTK10uw8dFsITSNLlte3PA,11
|
|
21
|
+
likelihood-1.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|