likelihood 2.2.0.dev1__cp310-cp310-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
likelihood/VERSION ADDED
@@ -0,0 +1 @@
1
+ 2.2.0dev1
likelihood/__init__.py ADDED
@@ -0,0 +1,20 @@
1
+ """
2
+ Likelihood: Initialize the Package
3
+ =====================================
4
+
5
+ This is the entry point of the Likelihood package. It initializes all necessary modules and provides a central hub for accessing various tools and functions.
6
+
7
+ Main Modules:
8
+ - likelihood.main: Provides access to core functionality, including data preprocessing, model training, and analysis.
9
+ - likelihood.models: Offers pre-built models for AutoEncoder-based classification and regression tasks.
10
+ - likelihood.tools: Contains utility functions for data manipulation, normalization, and visualization.
11
+
12
+ By importing the main modules directly or accessing them through this central entry point (i.e., `from likelihood import *`), you can leverage the full range of Likelihood's capabilities to streamline your data analysis workflow.
13
+
14
+ To get started with Likelihood, simply import the desired modules and start exploring!
15
+ """
16
+
17
+ from likelihood.main import *
18
+ from likelihood.models import *
19
+ from likelihood.pipes import Pipeline
20
+ from likelihood.tools import *
@@ -0,0 +1,9 @@
1
+ import tensorflow as tf
2
+ from packaging import version
3
+
4
+ from .graph import *
5
+
6
+ if version.parse(tf.__version__) > version.parse("2.15.0"):
7
+ from ._nn import *
8
+ else:
9
+ from .nn import *
@@ -0,0 +1,283 @@
1
+ import logging
2
+ import os
3
+
4
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
5
+ logging.getLogger("tensorflow").setLevel(logging.ERROR)
6
+
7
+ from multiprocessing import Pool, cpu_count
8
+ from typing import List, Tuple
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import tensorflow as tf
13
+ from IPython.display import clear_output
14
+ from sklearn.metrics import f1_score
15
+
16
+ tf.get_logger().setLevel("ERROR")
17
+
18
+ from likelihood.tools import LoRALayer
19
+
20
+ from .nn import Data, cal_adjacency_matrix, compare_pair, compare_similarity_np
21
+
22
+
23
+ @tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNNLayer")
24
+ class VanillaGNNLayer(tf.keras.layers.Layer):
25
+ def __init__(self, dim_in, dim_out, rank=None, kernel_initializer="glorot_uniform", **kwargs):
26
+ super(VanillaGNNLayer, self).__init__(**kwargs)
27
+ self.dim_in = dim_in
28
+ self.dim_out = dim_out
29
+ self.rank = rank
30
+ self.kernel_initializer = kernel_initializer
31
+ self.linear = None
32
+
33
+ def build(self, input_shape):
34
+ if self.rank:
35
+ self.linear = LoRALayer(self.dim_out, rank=self.rank)
36
+ else:
37
+ self.linear = tf.keras.layers.Dense(
38
+ self.dim_out, use_bias=False, kernel_initializer=self.kernel_initializer
39
+ )
40
+ super(VanillaGNNLayer, self).build(input_shape)
41
+
42
+ def call(self, x, adjacency):
43
+ x = self.linear(x)
44
+ x = tf.sparse.sparse_dense_matmul(adjacency, x)
45
+ return x
46
+
47
+ def get_config(self):
48
+ config = super(VanillaGNNLayer, self).get_config()
49
+ config.update(
50
+ {
51
+ "dim_in": self.dim_in,
52
+ "dim_out": self.dim_out,
53
+ "rank": self.rank,
54
+ "kernel_initializer": (
55
+ None
56
+ if self.rank
57
+ else tf.keras.initializers.serialize(self.linear.kernel_initializer)
58
+ ),
59
+ }
60
+ )
61
+ return config
62
+
63
+ @classmethod
64
+ def from_config(cls, config):
65
+ if config.get("kernel_initializer") is not None:
66
+ config["kernel_initializer"] = tf.keras.initializers.deserialize(
67
+ config["kernel_initializer"]
68
+ )
69
+ return cls(**config)
70
+
71
+
72
+ class VanillaGNN:
73
+ def __init__(self, dim_in, dim_h, dim_out, rank=2, **kwargs):
74
+ self.dim_in = dim_in
75
+ self.dim_h = dim_h
76
+ self.dim_out = dim_out
77
+ self.rank = rank
78
+
79
+ self.gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h, self.rank)
80
+ self.gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h, self.rank)
81
+ self.gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out, None)
82
+
83
+ self.build()
84
+
85
+ def build(self):
86
+ x_in = tf.keras.Input(shape=(self.dim_in,), name="node_features")
87
+ adjacency_in = tf.keras.Input(shape=(None,), sparse=True, name="adjacency")
88
+
89
+ gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h, self.rank)
90
+ gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h, self.rank)
91
+ gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out, rank=None)
92
+
93
+ h = gnn1(x_in, adjacency_in)
94
+ h = tf.keras.activations.tanh(h)
95
+ h = gnn2(h, adjacency_in)
96
+ h = gnn3(h, adjacency_in)
97
+ out = tf.keras.activations.softmax(h, axis=-1)
98
+
99
+ self.model = tf.keras.Model(
100
+ inputs=[x_in, adjacency_in], outputs=out, name="VanillaGNN_Functional"
101
+ )
102
+
103
+ @tf.function
104
+ def __call__(self, x, adjacency):
105
+ return self.model([x, adjacency])
106
+
107
+ def f1_macro(self, y_true, y_pred):
108
+ return f1_score(y_true, y_pred, average="macro")
109
+
110
+ def compute_f1_score(self, logits, labels):
111
+ predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
112
+ true_labels = tf.cast(labels, tf.int32)
113
+ return self.f1_macro(true_labels.numpy(), predictions.numpy())
114
+
115
+ def evaluate(self, x, adjacency, y):
116
+ y = tf.cast(y, tf.int32)
117
+ out = self(x, adjacency)
118
+ loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=out)
119
+ loss = tf.reduce_mean(loss)
120
+ f1 = round(self.compute_f1_score(out, y), 4)
121
+ return loss.numpy(), f1
122
+
123
+ def test(self, data):
124
+ data.x = tf.convert_to_tensor(data.x) if not tf.is_tensor(data.x) else data.x
125
+ out = self(data.x, data.adjacency)
126
+ test_f1 = self.compute_f1_score(out, data.y)
127
+ return round(test_f1, 4)
128
+
129
+ def predict(self, data):
130
+ data.x = tf.convert_to_tensor(data.x) if not tf.is_tensor(data.x) else data.x
131
+ out = self(data.x, data.adjacency)
132
+ return tf.argmax(out, axis=1, output_type=tf.int32).numpy()
133
+
134
+ def save(self, filepath, **kwargs):
135
+ """
136
+ Save the complete model including all components.
137
+
138
+ Parameters
139
+ ----------
140
+ filepath : str
141
+ Path where to save the model.
142
+ """
143
+ import os
144
+
145
+ # Create directory if it doesn't exist
146
+ os.makedirs(filepath, exist_ok=True)
147
+
148
+ self.model.save(os.path.join(filepath, "main_model.keras"))
149
+
150
+ # Save configuration
151
+ import json
152
+
153
+ config = self.get_config()
154
+
155
+ with open(os.path.join(filepath, "config.json"), "w") as f:
156
+ json.dump(config, f, indent=2)
157
+
158
+ @classmethod
159
+ def load(cls, filepath):
160
+ """
161
+ Load a complete model from saved components.
162
+
163
+ Parameters
164
+ ----------
165
+ filepath : str
166
+ Path where the model was saved.
167
+
168
+ Returns
169
+ -------
170
+ VanillaGNN
171
+ The loaded model instance.
172
+ """
173
+ import json
174
+ import os
175
+
176
+ # Load configuration
177
+ with open(os.path.join(filepath, "config.json"), "r") as f:
178
+ config = json.load(f)
179
+
180
+ # Create new instance
181
+ instance = cls(**config)
182
+
183
+ instance.model = tf.keras.models.load_model(os.path.join(filepath, "main_model.keras"))
184
+
185
+ return instance
186
+
187
+ def get_config(self):
188
+ return {
189
+ "dim_in": self.dim_in,
190
+ "dim_h": self.dim_h,
191
+ "dim_out": self.dim_out,
192
+ "rank": self.rank,
193
+ }
194
+
195
+ @classmethod
196
+ def from_config(cls, config):
197
+ return cls(
198
+ dim_in=config["dim_in"],
199
+ dim_h=config["dim_h"],
200
+ dim_out=config["dim_out"],
201
+ rank=config["rank"],
202
+ )
203
+
204
+ def get_build_config(self):
205
+ config = {
206
+ "dim_in": self.dim_in,
207
+ "dim_h": self.dim_h,
208
+ "dim_out": self.dim_out,
209
+ "rank": self.rank,
210
+ }
211
+ return config
212
+
213
+ @classmethod
214
+ def build_from_config(cls, config):
215
+ return cls(**config)
216
+
217
+ @tf.function
218
+ def train_step(self, batch_x, batch_adjacency, batch_y, optimizer):
219
+ with tf.GradientTape() as tape:
220
+ out = self(batch_x, batch_adjacency)
221
+ loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=batch_y, logits=out)
222
+ loss = tf.reduce_mean(loss)
223
+ gradients = tape.gradient(loss, self.model.trainable_variables)
224
+ optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
225
+ return loss
226
+
227
+ def fit(self, data, epochs, batch_size, test_size=0.2, optimizer="adam"):
228
+ optimizers = {
229
+ "sgd": tf.keras.optimizers.SGD(),
230
+ "adam": tf.keras.optimizers.Adam(),
231
+ "adamw": tf.keras.optimizers.AdamW(),
232
+ "adadelta": tf.keras.optimizers.Adadelta(),
233
+ "rmsprop": tf.keras.optimizers.RMSprop(),
234
+ }
235
+ optimizer = optimizers[optimizer]
236
+ train_losses = []
237
+ train_f1_scores = []
238
+ val_losses = []
239
+ val_f1_scores = []
240
+
241
+ num_nodes = len(data.x)
242
+ split_index = int((1 - test_size) * num_nodes)
243
+
244
+ X_train, X_test = data.x[:split_index], data.x[split_index:]
245
+ y_train, y_test = data.y[:split_index], data.y[split_index:]
246
+
247
+ adjacency_train = tf.sparse.slice(data.adjacency, [0, 0], [split_index, split_index])
248
+ adjacency_test = tf.sparse.slice(
249
+ data.adjacency,
250
+ [split_index, split_index],
251
+ [num_nodes - split_index, num_nodes - split_index],
252
+ )
253
+
254
+ batch_starts = np.arange(0, len(X_train), batch_size)
255
+ for epoch in range(epochs):
256
+ np.random.shuffle(batch_starts)
257
+ for start in batch_starts:
258
+ end = start + batch_size
259
+ batch_x = X_train[start:end, :]
260
+ batch_adjacency = tf.sparse.slice(
261
+ adjacency_train, [start, start], [batch_size, batch_size]
262
+ )
263
+ batch_y = y_train[start:end]
264
+ train_loss = self.train_step(batch_x, batch_adjacency, batch_y, optimizer)
265
+
266
+ train_loss, train_f1 = self.evaluate(X_train, adjacency_train, y_train)
267
+ train_losses.append(train_loss)
268
+ train_f1_scores.append(train_f1)
269
+
270
+ if epoch % 5 == 0:
271
+ clear_output(wait=True)
272
+ val_loss, val_f1 = self.evaluate(X_test, adjacency_test, y_test)
273
+ val_losses.append(val_loss)
274
+ val_f1_scores.append(val_f1)
275
+ print(
276
+ f"Epoch {epoch:>3} | Train Loss: {train_loss:.4f} | Train F1: {train_f1:.4f} | Val Loss: {val_loss:.4f} | Val F1: {val_f1:.4f}"
277
+ )
278
+
279
+ return train_losses, train_f1_scores, val_losses, val_f1_scores
280
+
281
+
282
+ if __name__ == "__main__":
283
+ print("Examples will be running below")
@@ -0,0 +1,86 @@
1
+ from typing import List
2
+
3
+ import networkx as nx
4
+ import pandas as pd
5
+ from IPython.display import HTML, display
6
+ from pyvis.network import Network
7
+
8
+ from likelihood.tools import FeatureSelection
9
+
10
+
11
+ class DynamicGraph(FeatureSelection):
12
+ """A class to represent a dynamic graph"""
13
+
14
+ def __init__(self, df: pd.DataFrame, n_importances: int, **kwargs):
15
+ self.G = Network(notebook=True, cdn_resources="remote", directed=True)
16
+ self.df = df
17
+ self.n_importances = n_importances
18
+ super().__init__(**kwargs)
19
+ self.labels: List[str] = []
20
+
21
+ def fit(self, **kwargs) -> None:
22
+ """Fit the model according to the given data and parameters."""
23
+ self.get_digraph(self.df, self.n_importances)
24
+ self.get_index = dict(zip(self.X.columns, range(len(self.X.columns))))
25
+ self._make_network()
26
+
27
+ def _make_network(self) -> None:
28
+ """Create nodes and edges of the network based on feature importance scores"""
29
+ self._add_nodes()
30
+ for i in range(len(self.all_features_imp_graph)):
31
+ node = self.all_features_imp_graph[i][0]
32
+ edges = self.all_features_imp_graph[i][1]
33
+
34
+ for label, weight in edges:
35
+ self.G.add_edge(self.get_index[node], self.get_index[label], weight=weight)
36
+
37
+ def _add_nodes(self) -> None:
38
+ for i in range(len(self.all_features_imp_graph)):
39
+ node = self.all_features_imp_graph[i][0]
40
+ self.labels.append(node)
41
+ self.G.add_node(n_id=i, label=node)
42
+
43
+ def draw(self, name="graph.html", **kwargs) -> None:
44
+ """Display the network using HTML format"""
45
+ spring_length = kwargs.get("spring_length", 500)
46
+ node_distance = kwargs.get("node_distance", 100)
47
+ self.G.repulsion(node_distance=node_distance, spring_length=spring_length)
48
+ self.G.show_buttons(filter_=["physics"])
49
+ self.G.show(name)
50
+
51
+ html_file_content = open(name, "r").read()
52
+ display(HTML(html_file_content))
53
+
54
+ def pyvis_to_networkx(self):
55
+ nx_graph = nx.Graph()
56
+ nodes = [d["id"] for d in self.G.nodes]
57
+ for node_dic in self.G.nodes:
58
+ id = node_dic["label"]
59
+ del node_dic["label"]
60
+ nx_graph.add_nodes_from([(id, node_dic)])
61
+ self.node_edge_dict = dict(zip(nodes, self.labels))
62
+ del nodes
63
+ for edge in self.G.edges:
64
+ source, target = self.node_edge_dict[edge["from"]], self.node_edge_dict[edge["to"]]
65
+ del edge["from"]
66
+ del edge["to"]
67
+ nx_graph.add_edges_from([(source, target, edge)])
68
+
69
+ return nx_graph
70
+
71
+
72
+ # -------------------------------------------------------------------------
73
+ if __name__ == "__main__":
74
+ import numpy as np
75
+ import pandas as pd
76
+
77
+ # Generate data
78
+ x = np.random.rand(3, 100)
79
+ y = 0.1 * x[0, :] + 0.4 * x[1, :] + 0.5 * x[2, :] + 0.1
80
+ # Create a DataFrame
81
+ df = pd.DataFrame(x.T, columns=["x1", "x2", "x3"])
82
+ df["y"] = y
83
+ # Instantiate DynamicGraph
84
+ fs = DynamicGraph(df, n_importances=2)
85
+ fs.fit()
86
+ fs.draw()