likelihood 1.4.0__tar.gz → 1.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {likelihood-1.4.0 → likelihood-1.4.1}/PKG-INFO +1 -1
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/graph/nn.py +65 -112
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/models/deep/autoencoders.py +41 -8
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/tools/__init__.py +1 -0
- likelihood-1.4.1/likelihood/tools/models_tools.py +101 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood.egg-info/PKG-INFO +1 -1
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood.egg-info/SOURCES.txt +1 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/LICENSE +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/README.md +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/__init__.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/graph/__init__.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/graph/graph.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/main.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/models/__init__.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/models/deep/__init__.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/models/hmm.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/models/regression.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/models/simulation.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/models/utils.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/tools/numeric_tools.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood/tools/tools.py +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood.egg-info/dependency_links.txt +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood.egg-info/requires.txt +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/likelihood.egg-info/top_level.txt +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/setup.cfg +0 -0
- {likelihood-1.4.0 → likelihood-1.4.1}/setup.py +0 -0
|
@@ -5,7 +5,7 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
|
5
5
|
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
|
6
6
|
|
|
7
7
|
import warnings
|
|
8
|
-
from typing import List, Tuple
|
|
8
|
+
from typing import Any, List, Tuple
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import pandas as pd
|
|
@@ -15,48 +15,43 @@ from pandas.core.frame import DataFrame
|
|
|
15
15
|
from sklearn.metrics import f1_score
|
|
16
16
|
from sklearn.model_selection import train_test_split
|
|
17
17
|
|
|
18
|
-
from likelihood.tools import generate_feature_yaml
|
|
19
|
-
|
|
20
18
|
tf.get_logger().setLevel("ERROR")
|
|
21
19
|
|
|
20
|
+
from likelihood.tools import LoRALayer
|
|
22
21
|
|
|
23
|
-
def compare_similarity(arr1: np.ndarray, arr2: np.ndarray) -> int:
|
|
24
|
-
"""Compares the similarity between two arrays of categories.
|
|
25
22
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
arr1 : `ndarray`
|
|
29
|
-
The first array of categories.
|
|
30
|
-
arr2 : `ndarray`
|
|
31
|
-
The second array of categories.
|
|
23
|
+
def compare_similarity(arr1: List[Any], arr2: List[Any], threshold: float = 0.05) -> int:
|
|
24
|
+
"""Calculate the similarity between two arrays considering numeric values near to 1 in ratio."""
|
|
32
25
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
26
|
+
def is_similar(a: Any, b: Any) -> bool:
|
|
27
|
+
if isinstance(a, (int, float)) and isinstance(b, (int, float)):
|
|
28
|
+
if a == 0 and b == 0:
|
|
29
|
+
return True
|
|
30
|
+
if a == 0 or b == 0:
|
|
31
|
+
return False
|
|
32
|
+
# For numeric values, check if their ratio is within the threshold range
|
|
33
|
+
ratio = max(a, b) / min(a, b)
|
|
34
|
+
return 1 - threshold <= ratio <= 1 + threshold
|
|
35
|
+
else:
|
|
36
|
+
return a == b
|
|
38
37
|
|
|
39
|
-
|
|
40
|
-
for i in range(len(arr1)):
|
|
41
|
-
if arr1[i] == arr2[i]:
|
|
42
|
-
count += 1
|
|
43
|
-
return count
|
|
38
|
+
return sum(is_similar(a, b) for a, b in zip(arr1, arr2))
|
|
44
39
|
|
|
45
40
|
|
|
46
41
|
def cal_adjacency_matrix(
|
|
47
42
|
df: DataFrame, exclude_subset: List[str] = [], sparse: bool = True, **kwargs
|
|
48
43
|
) -> Tuple[dict, np.ndarray]:
|
|
49
44
|
"""Calculates the adjacency matrix for a given DataFrame.
|
|
50
|
-
The adjacency matrix is a matrix that represents the similarity between each pair of
|
|
45
|
+
The adjacency matrix is a matrix that represents the similarity between each pair of features.
|
|
51
46
|
The similarity is calculated using the `compare_similarity` function.
|
|
52
|
-
The resulting matrix is a square matrix with the same number of rows and columns as the input DataFrame.
|
|
47
|
+
The resulting matrix is a square matrix with the same number of rows and columns as the rows of the input DataFrame.
|
|
53
48
|
|
|
54
49
|
Parameters
|
|
55
50
|
----------
|
|
56
51
|
df : `DataFrame`
|
|
57
|
-
The input DataFrame containing the
|
|
52
|
+
The input DataFrame containing the features.
|
|
58
53
|
exclude_subset : `List[str]`, optional
|
|
59
|
-
A list of
|
|
54
|
+
A list of features to exclude from the calculation of the adjacency matrix.
|
|
60
55
|
sparse : `bool`, optional
|
|
61
56
|
Whether to return a sparse matrix or a dense matrix.
|
|
62
57
|
**kwargs : `dict`
|
|
@@ -65,48 +60,33 @@ def cal_adjacency_matrix(
|
|
|
65
60
|
Keyword Arguments:
|
|
66
61
|
----------
|
|
67
62
|
similarity: `int`
|
|
68
|
-
The minimum number of
|
|
63
|
+
The minimum number of features that must be the same in both arrays to be considered similar.
|
|
69
64
|
|
|
70
65
|
Returns
|
|
71
66
|
-------
|
|
72
67
|
adj_dict : `dict`
|
|
73
|
-
A dictionary containing the
|
|
68
|
+
A dictionary containing the features.
|
|
74
69
|
adjacency_matrix : `ndarray`
|
|
75
70
|
The adjacency matrix.
|
|
76
71
|
"""
|
|
77
72
|
|
|
78
|
-
yaml_ = generate_feature_yaml(df)
|
|
79
|
-
categorical_columns = yaml_["categorical_features"]
|
|
80
73
|
if len(exclude_subset) > 0:
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
if len(categorical_columns) > 1:
|
|
84
|
-
df_categorical = df[categorical_columns].copy()
|
|
74
|
+
columns = [col for col in df.columns if col not in exclude_subset]
|
|
75
|
+
df_ = df[columns].copy()
|
|
85
76
|
else:
|
|
86
|
-
|
|
87
|
-
col
|
|
88
|
-
for col in df.columns
|
|
89
|
-
if (
|
|
90
|
-
col not in exclude_subset
|
|
91
|
-
and pd.api.types.is_integer_dtype(df[col])
|
|
92
|
-
and len(df[col].unique()) > 2
|
|
93
|
-
)
|
|
94
|
-
]
|
|
95
|
-
df_categorical = df[categorical_columns].copy()
|
|
77
|
+
df_ = df.copy()
|
|
96
78
|
|
|
97
|
-
assert len(
|
|
79
|
+
assert len(df_) > 0
|
|
98
80
|
|
|
99
|
-
similarity = kwargs.get("similarity", len(
|
|
100
|
-
assert similarity <=
|
|
81
|
+
similarity = kwargs.get("similarity", len(df_.columns) - 1)
|
|
82
|
+
assert similarity <= df_.shape[1]
|
|
101
83
|
|
|
102
|
-
adj_dict = {}
|
|
103
|
-
for index, row in df_categorical.iterrows():
|
|
104
|
-
adj_dict[index] = row.to_list()
|
|
84
|
+
adj_dict = {index: row.tolist() for index, row in df_.iterrows()}
|
|
105
85
|
|
|
106
|
-
adjacency_matrix = np.zeros((len(
|
|
86
|
+
adjacency_matrix = np.zeros((len(df_), len(df_)))
|
|
107
87
|
|
|
108
|
-
for i in range(len(
|
|
109
|
-
for j in range(len(
|
|
88
|
+
for i in range(len(df_)):
|
|
89
|
+
for j in range(len(df_)):
|
|
110
90
|
if compare_similarity(adj_dict[i], adj_dict[j]) >= similarity:
|
|
111
91
|
adjacency_matrix[i][j] = 1
|
|
112
92
|
|
|
@@ -131,8 +111,10 @@ class Data:
|
|
|
131
111
|
df: DataFrame,
|
|
132
112
|
target: str | None = None,
|
|
133
113
|
exclude_subset: List[str] = [],
|
|
114
|
+
**kwargs,
|
|
134
115
|
):
|
|
135
|
-
|
|
116
|
+
sparse = kwargs.get("sparse", True)
|
|
117
|
+
_, adjacency = cal_adjacency_matrix(df, exclude_subset=exclude_subset, sparse=sparse)
|
|
136
118
|
if target is not None:
|
|
137
119
|
X = df.drop(columns=[target] + exclude_subset)
|
|
138
120
|
else:
|
|
@@ -147,16 +129,20 @@ class Data:
|
|
|
147
129
|
|
|
148
130
|
@tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNNLayer")
|
|
149
131
|
class VanillaGNNLayer(tf.keras.layers.Layer):
|
|
150
|
-
def __init__(self, dim_in, dim_out, kernel_initializer="glorot_uniform", **kwargs):
|
|
132
|
+
def __init__(self, dim_in, dim_out, rank=None, kernel_initializer="glorot_uniform", **kwargs):
|
|
151
133
|
super(VanillaGNNLayer, self).__init__(**kwargs)
|
|
152
134
|
self.dim_out = dim_out
|
|
135
|
+
self.rank = rank
|
|
153
136
|
self.kernel_initializer = kernel_initializer
|
|
154
137
|
self.linear = None
|
|
155
138
|
|
|
156
139
|
def build(self, input_shape):
|
|
157
|
-
self.
|
|
158
|
-
self.
|
|
159
|
-
|
|
140
|
+
if self.rank:
|
|
141
|
+
self.linear = LoRALayer(self.dim_out, rank=self.rank)
|
|
142
|
+
else:
|
|
143
|
+
self.linear = tf.keras.layers.Dense(
|
|
144
|
+
self.dim_out, use_bias=False, kernel_initializer=self.kernel_initializer
|
|
145
|
+
)
|
|
160
146
|
super(VanillaGNNLayer, self).build(input_shape)
|
|
161
147
|
|
|
162
148
|
def call(self, x, adjacency):
|
|
@@ -169,8 +155,11 @@ class VanillaGNNLayer(tf.keras.layers.Layer):
|
|
|
169
155
|
config.update(
|
|
170
156
|
{
|
|
171
157
|
"dim_out": self.dim_out,
|
|
172
|
-
"
|
|
173
|
-
|
|
158
|
+
"rank": self.rank,
|
|
159
|
+
"kernel_initializer": (
|
|
160
|
+
None
|
|
161
|
+
if self.rank
|
|
162
|
+
else tf.keras.initializers.serialize(self.linear.kernel_initializer)
|
|
174
163
|
),
|
|
175
164
|
}
|
|
176
165
|
)
|
|
@@ -179,14 +168,16 @@ class VanillaGNNLayer(tf.keras.layers.Layer):
|
|
|
179
168
|
|
|
180
169
|
@tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNN")
|
|
181
170
|
class VanillaGNN(tf.keras.Model):
|
|
182
|
-
def __init__(self, dim_in, dim_h, dim_out, **kwargs):
|
|
171
|
+
def __init__(self, dim_in, dim_h, dim_out, rank=2, **kwargs):
|
|
183
172
|
super(VanillaGNN, self).__init__(**kwargs)
|
|
184
173
|
self.dim_in = dim_in
|
|
185
174
|
self.dim_h = dim_h
|
|
186
175
|
self.dim_out = dim_out
|
|
187
|
-
self.
|
|
188
|
-
|
|
189
|
-
self.
|
|
176
|
+
self.rank = rank
|
|
177
|
+
|
|
178
|
+
self.gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h, self.rank)
|
|
179
|
+
self.gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h, self.rank)
|
|
180
|
+
self.gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out, None)
|
|
190
181
|
|
|
191
182
|
def call(self, x, adjacency):
|
|
192
183
|
h = self.gnn1(x, adjacency)
|
|
@@ -208,13 +199,13 @@ class VanillaGNN(tf.keras.Model):
|
|
|
208
199
|
out = self(x, adjacency)
|
|
209
200
|
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=out)
|
|
210
201
|
loss = tf.reduce_mean(loss)
|
|
211
|
-
f1 = self.compute_f1_score(out, y)
|
|
202
|
+
f1 = round(self.compute_f1_score(out, y), 4)
|
|
212
203
|
return loss.numpy(), f1
|
|
213
204
|
|
|
214
205
|
def test(self, data):
|
|
215
206
|
out = self(data.x, data.adjacency)
|
|
216
207
|
test_f1 = self.compute_f1_score(out, data.y)
|
|
217
|
-
return test_f1
|
|
208
|
+
return round(test_f1, 4)
|
|
218
209
|
|
|
219
210
|
def predict(self, data):
|
|
220
211
|
out = self(data.x, data.adjacency)
|
|
@@ -225,6 +216,7 @@ class VanillaGNN(tf.keras.Model):
|
|
|
225
216
|
"dim_in": self.dim_in,
|
|
226
217
|
"dim_h": self.dim_h,
|
|
227
218
|
"dim_out": self.dim_out,
|
|
219
|
+
"rank": self.rank,
|
|
228
220
|
}
|
|
229
221
|
base_config = super(VanillaGNN, self).get_config()
|
|
230
222
|
return dict(list(base_config.items()) + list(config.items()))
|
|
@@ -235,6 +227,7 @@ class VanillaGNN(tf.keras.Model):
|
|
|
235
227
|
dim_in=config["dim_in"],
|
|
236
228
|
dim_h=config["dim_h"],
|
|
237
229
|
dim_out=config["dim_out"],
|
|
230
|
+
rank=config["rank"],
|
|
238
231
|
)
|
|
239
232
|
|
|
240
233
|
@tf.function
|
|
@@ -248,10 +241,6 @@ class VanillaGNN(tf.keras.Model):
|
|
|
248
241
|
return loss
|
|
249
242
|
|
|
250
243
|
def fit(self, data, epochs, batch_size, test_size=0.2, optimizer="adam"):
|
|
251
|
-
warnings.warn(
|
|
252
|
-
"It is normal for validation metrics to underperform. Use the test method to validate after training.",
|
|
253
|
-
UserWarning,
|
|
254
|
-
)
|
|
255
244
|
optimizers = {
|
|
256
245
|
"sgd": tf.keras.optimizers.SGD(),
|
|
257
246
|
"adam": tf.keras.optimizers.Adam(),
|
|
@@ -290,56 +279,20 @@ class VanillaGNN(tf.keras.Model):
|
|
|
290
279
|
train_f1_scores.append(train_f1)
|
|
291
280
|
|
|
292
281
|
if epoch % 5 == 0:
|
|
282
|
+
clear_output(wait=True)
|
|
283
|
+
warnings.warn(
|
|
284
|
+
"It is normal for validation metrics to underperform during training. Use the test method to validate after training.",
|
|
285
|
+
UserWarning,
|
|
286
|
+
)
|
|
293
287
|
val_loss, val_f1 = self.evaluate(X_test, adjacency_test, y_test)
|
|
294
288
|
val_losses.append(val_loss)
|
|
295
289
|
val_f1_scores.append(val_f1)
|
|
296
|
-
clear_output(wait=True)
|
|
297
290
|
print(
|
|
298
|
-
f"Epoch {epoch:>3} | Train Loss: {train_loss:.
|
|
291
|
+
f"Epoch {epoch:>3} | Train Loss: {train_loss:.4f} | Train F1: {train_f1:.4f} | Val Loss: {val_loss:.4f} | Val F1: {val_f1:.4f}"
|
|
299
292
|
)
|
|
300
293
|
|
|
301
294
|
return train_losses, train_f1_scores, val_losses, val_f1_scores
|
|
302
295
|
|
|
303
296
|
|
|
304
297
|
if __name__ == "__main__":
|
|
305
|
-
|
|
306
|
-
import pandas as pd
|
|
307
|
-
from sklearn.datasets import load_iris
|
|
308
|
-
|
|
309
|
-
# Load the dataset
|
|
310
|
-
iris = load_iris()
|
|
311
|
-
|
|
312
|
-
# Convert to a DataFrame for easy exploration
|
|
313
|
-
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
|
|
314
|
-
iris_df["species"] = iris.target
|
|
315
|
-
|
|
316
|
-
iris_df["sepal length (cm)"] = iris_df["sepal length (cm)"].astype("category")
|
|
317
|
-
iris_df["sepal width (cm)"] = iris_df["sepal width (cm)"].astype("category")
|
|
318
|
-
iris_df["petal length (cm)"] = iris_df["petal length (cm)"].astype("category")
|
|
319
|
-
iris_df["petal width (cm)"] = iris_df["petal width (cm)"].astype("category")
|
|
320
|
-
|
|
321
|
-
# Display the first few rows of the dataset
|
|
322
|
-
print(iris_df.head())
|
|
323
|
-
|
|
324
|
-
iris_df = iris_df.sample(frac=1, replace=False).reset_index(drop=True)
|
|
325
|
-
|
|
326
|
-
data = Data(iris_df, "species")
|
|
327
|
-
|
|
328
|
-
model = VanillaGNN(dim_in=data.x.shape[1], dim_h=8, dim_out=len(iris_df["species"].unique()))
|
|
329
|
-
print("Before training F1:", model.test(data))
|
|
330
|
-
model.fit(data, epochs=200, batch_size=32, test_size=0.5)
|
|
331
|
-
model.save("./best_model", save_format="tf")
|
|
332
|
-
print("After training F1:", model.test(data))
|
|
333
|
-
best_model = tf.keras.models.load_model("./best_model")
|
|
334
|
-
|
|
335
|
-
print("After loading F1:", best_model.test(data))
|
|
336
|
-
df_results = pd.DataFrame()
|
|
337
|
-
|
|
338
|
-
# Suppose we have a new dataset without the target variable
|
|
339
|
-
iris_df = iris_df.drop(columns=["species"])
|
|
340
|
-
data_new = Data(iris_df)
|
|
341
|
-
print("Predictions:", best_model.predict(data_new))
|
|
342
|
-
df_results["predicted"] = list(model.predict(data))
|
|
343
|
-
df_results["actual"] = list(data.y)
|
|
344
|
-
# df_results.to_csv("results.csv", index=False)
|
|
345
|
-
breakpoint()
|
|
298
|
+
print("Examples will be running below")
|
|
@@ -24,7 +24,7 @@ from sklearn.manifold import TSNE
|
|
|
24
24
|
from tensorflow.keras.layers import InputLayer
|
|
25
25
|
from tensorflow.keras.regularizers import l2
|
|
26
26
|
|
|
27
|
-
from likelihood.tools import OneHotEncoder
|
|
27
|
+
from likelihood.tools import LoRALayer, OneHotEncoder
|
|
28
28
|
|
|
29
29
|
tf.get_logger().setLevel("ERROR")
|
|
30
30
|
|
|
@@ -257,6 +257,10 @@ class AutoClassifier(tf.keras.Model):
|
|
|
257
257
|
Whether to use variational autoencoder mode. Default is False.
|
|
258
258
|
vae_units : `int`
|
|
259
259
|
The number of units in the variational autoencoder. Default is 2.
|
|
260
|
+
lora_mode : `bool`
|
|
261
|
+
Whether to use LoRA layers. Default is False.
|
|
262
|
+
lora_rank : `int`
|
|
263
|
+
The rank of the LoRA layer. Default is 4.
|
|
260
264
|
"""
|
|
261
265
|
|
|
262
266
|
def __init__(self, input_shape_parm, num_classes, units, activation, **kwargs):
|
|
@@ -275,6 +279,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
275
279
|
self.l2_reg = kwargs.get("l2_reg", 0.0)
|
|
276
280
|
self.vae_mode = kwargs.get("vae_mode", False)
|
|
277
281
|
self.vae_units = kwargs.get("vae_units", 2)
|
|
282
|
+
self.lora_mode = kwargs.get("lora_mode", False)
|
|
283
|
+
self.lora_rank = kwargs.get("lora_rank", 4)
|
|
278
284
|
|
|
279
285
|
def build_encoder_decoder(self, input_shape):
|
|
280
286
|
self.encoder = (
|
|
@@ -369,7 +375,7 @@ class AutoClassifier(tf.keras.Model):
|
|
|
369
375
|
|
|
370
376
|
# Classifier with L2 regularization
|
|
371
377
|
self.classifier = tf.keras.Sequential()
|
|
372
|
-
if self.num_layers > 1:
|
|
378
|
+
if self.num_layers > 1 and not self.lora_mode:
|
|
373
379
|
for _ in range(self.num_layers - 1):
|
|
374
380
|
self.classifier.add(
|
|
375
381
|
tf.keras.layers.Dense(
|
|
@@ -380,13 +386,36 @@ class AutoClassifier(tf.keras.Model):
|
|
|
380
386
|
)
|
|
381
387
|
if self.dropout:
|
|
382
388
|
self.classifier.add(tf.keras.layers.Dropout(self.dropout))
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
389
|
+
self.classifier.add(
|
|
390
|
+
tf.keras.layers.Dense(
|
|
391
|
+
units=self.num_classes,
|
|
392
|
+
activation=self.classifier_activation,
|
|
393
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
394
|
+
)
|
|
395
|
+
)
|
|
396
|
+
elif self.lora_mode:
|
|
397
|
+
for _ in range(self.num_layers - 1):
|
|
398
|
+
self.classifier.add(
|
|
399
|
+
LoRALayer(units=self.units, rank=self.lora_rank, name=f"LoRA_{_}")
|
|
400
|
+
)
|
|
401
|
+
self.classifier.add(tf.keras.layers.Activation(self.activation))
|
|
402
|
+
if self.dropout:
|
|
403
|
+
self.classifier.add(tf.keras.layers.Dropout(self.dropout))
|
|
404
|
+
self.classifier.add(
|
|
405
|
+
tf.keras.layers.Dense(
|
|
406
|
+
units=self.num_classes,
|
|
407
|
+
activation=self.classifier_activation,
|
|
408
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
409
|
+
)
|
|
410
|
+
)
|
|
411
|
+
else:
|
|
412
|
+
self.classifier.add(
|
|
413
|
+
tf.keras.layers.Dense(
|
|
414
|
+
units=self.num_classes,
|
|
415
|
+
activation=self.classifier_activation,
|
|
416
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
417
|
+
)
|
|
388
418
|
)
|
|
389
|
-
)
|
|
390
419
|
|
|
391
420
|
def train_encoder_decoder(
|
|
392
421
|
self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
|
|
@@ -552,6 +581,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
552
581
|
"l2_reg": self.l2_reg,
|
|
553
582
|
"vae_mode": self.vae_mode,
|
|
554
583
|
"vae_units": self.vae_units,
|
|
584
|
+
"lora_mode": self.lora_mode,
|
|
585
|
+
"lora_rank": self.lora_rank,
|
|
555
586
|
}
|
|
556
587
|
base_config = super(AutoClassifier, self).get_config()
|
|
557
588
|
return dict(list(base_config.items()) + list(config.items()))
|
|
@@ -569,6 +600,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
569
600
|
l2_reg=config["l2_reg"],
|
|
570
601
|
vae_mode=config["vae_mode"],
|
|
571
602
|
vae_units=config["vae_units"],
|
|
603
|
+
lora_mode=config["lora_mode"],
|
|
604
|
+
lora_rank=config["lora_rank"],
|
|
572
605
|
)
|
|
573
606
|
|
|
574
607
|
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import networkx as nx
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
8
|
+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
|
9
|
+
|
|
10
|
+
import tensorflow as tf
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@tf.keras.utils.register_keras_serializable(package="Custom", name="LoRALayer")
|
|
14
|
+
class LoRALayer(tf.keras.layers.Layer):
|
|
15
|
+
def __init__(self, units, rank=4, **kwargs):
|
|
16
|
+
super(LoRALayer, self).__init__(**kwargs)
|
|
17
|
+
self.units = units
|
|
18
|
+
self.rank = rank
|
|
19
|
+
|
|
20
|
+
def build(self, input_shape):
|
|
21
|
+
input_dim = input_shape[-1]
|
|
22
|
+
print(f"Input shape: {input_shape}")
|
|
23
|
+
|
|
24
|
+
if self.rank > input_dim:
|
|
25
|
+
raise ValueError(
|
|
26
|
+
f"Rank ({self.rank}) cannot be greater than input dimension ({input_dim})."
|
|
27
|
+
)
|
|
28
|
+
if self.rank > self.units:
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"Rank ({self.rank}) cannot be greater than number of units ({self.units})."
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
self.A = self.add_weight(
|
|
34
|
+
shape=(input_dim, self.rank), initializer="random_normal", trainable=True, name="A"
|
|
35
|
+
)
|
|
36
|
+
self.B = self.add_weight(
|
|
37
|
+
shape=(self.rank, self.units), initializer="random_normal", trainable=True, name="B"
|
|
38
|
+
)
|
|
39
|
+
print(f"Dense weights shape: {input_dim}x{self.units}")
|
|
40
|
+
print(f"LoRA weights shape: A{self.A.shape}, B{self.B.shape}")
|
|
41
|
+
|
|
42
|
+
def call(self, inputs):
|
|
43
|
+
lora_output = tf.matmul(tf.matmul(inputs, self.A), self.B)
|
|
44
|
+
return lora_output
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def apply_lora(model, rank=4):
|
|
48
|
+
inputs = tf.keras.Input(shape=model.input_shape[1:])
|
|
49
|
+
x = inputs
|
|
50
|
+
|
|
51
|
+
for layer in model.layers:
|
|
52
|
+
if isinstance(layer, tf.keras.layers.Dense):
|
|
53
|
+
print(f"Applying LoRA to layer {layer.name}")
|
|
54
|
+
x = LoRALayer(units=layer.units, rank=rank)(x)
|
|
55
|
+
else:
|
|
56
|
+
x = layer(x)
|
|
57
|
+
new_model = tf.keras.Model(inputs=inputs, outputs=x)
|
|
58
|
+
return new_model
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def graph_metrics(adj_matrix, eigenvector_threshold=1e-6):
|
|
62
|
+
"""
|
|
63
|
+
This function calculates the following graph metrics using the adjacency matrix:
|
|
64
|
+
1. Degree Centrality
|
|
65
|
+
2. Clustering Coefficient
|
|
66
|
+
3. Eigenvector Centrality
|
|
67
|
+
4. Degree
|
|
68
|
+
5. Betweenness Centrality
|
|
69
|
+
6. Closeness Centrality
|
|
70
|
+
7. Assortativity
|
|
71
|
+
"""
|
|
72
|
+
adj_matrix = adj_matrix.astype(int)
|
|
73
|
+
G = nx.from_numpy_array(adj_matrix)
|
|
74
|
+
degree_centrality = nx.degree_centrality(G)
|
|
75
|
+
clustering_coeff = nx.clustering(G)
|
|
76
|
+
try:
|
|
77
|
+
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=500)
|
|
78
|
+
except nx.PowerIterationFailedConvergence:
|
|
79
|
+
print("Power iteration failed to converge. Returning NaN for eigenvector centrality.")
|
|
80
|
+
eigenvector_centrality = {node: float("nan") for node in G.nodes()}
|
|
81
|
+
|
|
82
|
+
for node, centrality in eigenvector_centrality.items():
|
|
83
|
+
if centrality < eigenvector_threshold:
|
|
84
|
+
eigenvector_centrality[node] = 0.0
|
|
85
|
+
degree = dict(G.degree())
|
|
86
|
+
betweenness_centrality = nx.betweenness_centrality(G)
|
|
87
|
+
closeness_centrality = nx.closeness_centrality(G)
|
|
88
|
+
assortativity = nx.degree_assortativity_coefficient(G)
|
|
89
|
+
metrics_df = pd.DataFrame(
|
|
90
|
+
{
|
|
91
|
+
"Degree": degree,
|
|
92
|
+
"Degree Centrality": degree_centrality,
|
|
93
|
+
"Clustering Coefficient": clustering_coeff,
|
|
94
|
+
"Eigenvector Centrality": eigenvector_centrality,
|
|
95
|
+
"Betweenness Centrality": betweenness_centrality,
|
|
96
|
+
"Closeness Centrality": closeness_centrality,
|
|
97
|
+
}
|
|
98
|
+
)
|
|
99
|
+
metrics_df["Assortativity"] = assortativity
|
|
100
|
+
|
|
101
|
+
return metrics_df
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|