likelihood 2.2.0.dev1__cp312-cp312-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- likelihood/VERSION +1 -0
- likelihood/__init__.py +20 -0
- likelihood/graph/__init__.py +9 -0
- likelihood/graph/_nn.py +283 -0
- likelihood/graph/graph.py +86 -0
- likelihood/graph/nn.py +329 -0
- likelihood/main.py +273 -0
- likelihood/models/__init__.py +3 -0
- likelihood/models/deep/__init__.py +13 -0
- likelihood/models/deep/_autoencoders.py +896 -0
- likelihood/models/deep/_predictor.py +809 -0
- likelihood/models/deep/autoencoders.py +903 -0
- likelihood/models/deep/bandit.py +97 -0
- likelihood/models/deep/gan.py +313 -0
- likelihood/models/deep/predictor.py +805 -0
- likelihood/models/deep/rl.py +345 -0
- likelihood/models/environments.py +202 -0
- likelihood/models/hmm.py +163 -0
- likelihood/models/regression.py +451 -0
- likelihood/models/simulation.py +213 -0
- likelihood/models/utils.py +87 -0
- likelihood/pipes.py +382 -0
- likelihood/rust_py_integration.cpython-312-x86_64-linux-gnu.so +0 -0
- likelihood/tools/__init__.py +4 -0
- likelihood/tools/cat_embed.py +212 -0
- likelihood/tools/figures.py +348 -0
- likelihood/tools/impute.py +278 -0
- likelihood/tools/models_tools.py +866 -0
- likelihood/tools/numeric_tools.py +390 -0
- likelihood/tools/reports.py +375 -0
- likelihood/tools/tools.py +1336 -0
- likelihood-2.2.0.dev1.dist-info/METADATA +68 -0
- likelihood-2.2.0.dev1.dist-info/RECORD +37 -0
- likelihood-2.2.0.dev1.dist-info/WHEEL +5 -0
- likelihood-2.2.0.dev1.dist-info/licenses/LICENSE +21 -0
- likelihood-2.2.0.dev1.dist-info/top_level.txt +7 -0
- src/lib.rs +12 -0
likelihood/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.2.0dev1
|
likelihood/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Likelihood: Initialize the Package
|
|
3
|
+
=====================================
|
|
4
|
+
|
|
5
|
+
This is the entry point of the Likelihood package. It initializes all necessary modules and provides a central hub for accessing various tools and functions.
|
|
6
|
+
|
|
7
|
+
Main Modules:
|
|
8
|
+
- likelihood.main: Provides access to core functionality, including data preprocessing, model training, and analysis.
|
|
9
|
+
- likelihood.models: Offers pre-built models for AutoEncoder-based classification and regression tasks.
|
|
10
|
+
- likelihood.tools: Contains utility functions for data manipulation, normalization, and visualization.
|
|
11
|
+
|
|
12
|
+
By importing the main modules directly or accessing them through this central entry point (i.e., `from likelihood import *`), you can leverage the full range of Likelihood's capabilities to streamline your data analysis workflow.
|
|
13
|
+
|
|
14
|
+
To get started with Likelihood, simply import the desired modules and start exploring!
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from likelihood.main import *
|
|
18
|
+
from likelihood.models import *
|
|
19
|
+
from likelihood.pipes import Pipeline
|
|
20
|
+
from likelihood.tools import *
|
likelihood/graph/_nn.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
5
|
+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
|
6
|
+
|
|
7
|
+
from multiprocessing import Pool, cpu_count
|
|
8
|
+
from typing import List, Tuple
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import tensorflow as tf
|
|
13
|
+
from IPython.display import clear_output
|
|
14
|
+
from sklearn.metrics import f1_score
|
|
15
|
+
|
|
16
|
+
tf.get_logger().setLevel("ERROR")
|
|
17
|
+
|
|
18
|
+
from likelihood.tools import LoRALayer
|
|
19
|
+
|
|
20
|
+
from .nn import Data, cal_adjacency_matrix, compare_pair, compare_similarity_np
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNNLayer")
|
|
24
|
+
class VanillaGNNLayer(tf.keras.layers.Layer):
|
|
25
|
+
def __init__(self, dim_in, dim_out, rank=None, kernel_initializer="glorot_uniform", **kwargs):
|
|
26
|
+
super(VanillaGNNLayer, self).__init__(**kwargs)
|
|
27
|
+
self.dim_in = dim_in
|
|
28
|
+
self.dim_out = dim_out
|
|
29
|
+
self.rank = rank
|
|
30
|
+
self.kernel_initializer = kernel_initializer
|
|
31
|
+
self.linear = None
|
|
32
|
+
|
|
33
|
+
def build(self, input_shape):
|
|
34
|
+
if self.rank:
|
|
35
|
+
self.linear = LoRALayer(self.dim_out, rank=self.rank)
|
|
36
|
+
else:
|
|
37
|
+
self.linear = tf.keras.layers.Dense(
|
|
38
|
+
self.dim_out, use_bias=False, kernel_initializer=self.kernel_initializer
|
|
39
|
+
)
|
|
40
|
+
super(VanillaGNNLayer, self).build(input_shape)
|
|
41
|
+
|
|
42
|
+
def call(self, x, adjacency):
|
|
43
|
+
x = self.linear(x)
|
|
44
|
+
x = tf.sparse.sparse_dense_matmul(adjacency, x)
|
|
45
|
+
return x
|
|
46
|
+
|
|
47
|
+
def get_config(self):
|
|
48
|
+
config = super(VanillaGNNLayer, self).get_config()
|
|
49
|
+
config.update(
|
|
50
|
+
{
|
|
51
|
+
"dim_in": self.dim_in,
|
|
52
|
+
"dim_out": self.dim_out,
|
|
53
|
+
"rank": self.rank,
|
|
54
|
+
"kernel_initializer": (
|
|
55
|
+
None
|
|
56
|
+
if self.rank
|
|
57
|
+
else tf.keras.initializers.serialize(self.linear.kernel_initializer)
|
|
58
|
+
),
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
return config
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def from_config(cls, config):
|
|
65
|
+
if config.get("kernel_initializer") is not None:
|
|
66
|
+
config["kernel_initializer"] = tf.keras.initializers.deserialize(
|
|
67
|
+
config["kernel_initializer"]
|
|
68
|
+
)
|
|
69
|
+
return cls(**config)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class VanillaGNN:
|
|
73
|
+
def __init__(self, dim_in, dim_h, dim_out, rank=2, **kwargs):
|
|
74
|
+
self.dim_in = dim_in
|
|
75
|
+
self.dim_h = dim_h
|
|
76
|
+
self.dim_out = dim_out
|
|
77
|
+
self.rank = rank
|
|
78
|
+
|
|
79
|
+
self.gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h, self.rank)
|
|
80
|
+
self.gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h, self.rank)
|
|
81
|
+
self.gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out, None)
|
|
82
|
+
|
|
83
|
+
self.build()
|
|
84
|
+
|
|
85
|
+
def build(self):
|
|
86
|
+
x_in = tf.keras.Input(shape=(self.dim_in,), name="node_features")
|
|
87
|
+
adjacency_in = tf.keras.Input(shape=(None,), sparse=True, name="adjacency")
|
|
88
|
+
|
|
89
|
+
gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h, self.rank)
|
|
90
|
+
gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h, self.rank)
|
|
91
|
+
gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out, rank=None)
|
|
92
|
+
|
|
93
|
+
h = gnn1(x_in, adjacency_in)
|
|
94
|
+
h = tf.keras.activations.tanh(h)
|
|
95
|
+
h = gnn2(h, adjacency_in)
|
|
96
|
+
h = gnn3(h, adjacency_in)
|
|
97
|
+
out = tf.keras.activations.softmax(h, axis=-1)
|
|
98
|
+
|
|
99
|
+
self.model = tf.keras.Model(
|
|
100
|
+
inputs=[x_in, adjacency_in], outputs=out, name="VanillaGNN_Functional"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
@tf.function
|
|
104
|
+
def __call__(self, x, adjacency):
|
|
105
|
+
return self.model([x, adjacency])
|
|
106
|
+
|
|
107
|
+
def f1_macro(self, y_true, y_pred):
|
|
108
|
+
return f1_score(y_true, y_pred, average="macro")
|
|
109
|
+
|
|
110
|
+
def compute_f1_score(self, logits, labels):
|
|
111
|
+
predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
|
|
112
|
+
true_labels = tf.cast(labels, tf.int32)
|
|
113
|
+
return self.f1_macro(true_labels.numpy(), predictions.numpy())
|
|
114
|
+
|
|
115
|
+
def evaluate(self, x, adjacency, y):
|
|
116
|
+
y = tf.cast(y, tf.int32)
|
|
117
|
+
out = self(x, adjacency)
|
|
118
|
+
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=out)
|
|
119
|
+
loss = tf.reduce_mean(loss)
|
|
120
|
+
f1 = round(self.compute_f1_score(out, y), 4)
|
|
121
|
+
return loss.numpy(), f1
|
|
122
|
+
|
|
123
|
+
def test(self, data):
|
|
124
|
+
data.x = tf.convert_to_tensor(data.x) if not tf.is_tensor(data.x) else data.x
|
|
125
|
+
out = self(data.x, data.adjacency)
|
|
126
|
+
test_f1 = self.compute_f1_score(out, data.y)
|
|
127
|
+
return round(test_f1, 4)
|
|
128
|
+
|
|
129
|
+
def predict(self, data):
|
|
130
|
+
data.x = tf.convert_to_tensor(data.x) if not tf.is_tensor(data.x) else data.x
|
|
131
|
+
out = self(data.x, data.adjacency)
|
|
132
|
+
return tf.argmax(out, axis=1, output_type=tf.int32).numpy()
|
|
133
|
+
|
|
134
|
+
def save(self, filepath, **kwargs):
|
|
135
|
+
"""
|
|
136
|
+
Save the complete model including all components.
|
|
137
|
+
|
|
138
|
+
Parameters
|
|
139
|
+
----------
|
|
140
|
+
filepath : str
|
|
141
|
+
Path where to save the model.
|
|
142
|
+
"""
|
|
143
|
+
import os
|
|
144
|
+
|
|
145
|
+
# Create directory if it doesn't exist
|
|
146
|
+
os.makedirs(filepath, exist_ok=True)
|
|
147
|
+
|
|
148
|
+
self.model.save(os.path.join(filepath, "main_model.keras"))
|
|
149
|
+
|
|
150
|
+
# Save configuration
|
|
151
|
+
import json
|
|
152
|
+
|
|
153
|
+
config = self.get_config()
|
|
154
|
+
|
|
155
|
+
with open(os.path.join(filepath, "config.json"), "w") as f:
|
|
156
|
+
json.dump(config, f, indent=2)
|
|
157
|
+
|
|
158
|
+
@classmethod
|
|
159
|
+
def load(cls, filepath):
|
|
160
|
+
"""
|
|
161
|
+
Load a complete model from saved components.
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
filepath : str
|
|
166
|
+
Path where the model was saved.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
VanillaGNN
|
|
171
|
+
The loaded model instance.
|
|
172
|
+
"""
|
|
173
|
+
import json
|
|
174
|
+
import os
|
|
175
|
+
|
|
176
|
+
# Load configuration
|
|
177
|
+
with open(os.path.join(filepath, "config.json"), "r") as f:
|
|
178
|
+
config = json.load(f)
|
|
179
|
+
|
|
180
|
+
# Create new instance
|
|
181
|
+
instance = cls(**config)
|
|
182
|
+
|
|
183
|
+
instance.model = tf.keras.models.load_model(os.path.join(filepath, "main_model.keras"))
|
|
184
|
+
|
|
185
|
+
return instance
|
|
186
|
+
|
|
187
|
+
def get_config(self):
|
|
188
|
+
return {
|
|
189
|
+
"dim_in": self.dim_in,
|
|
190
|
+
"dim_h": self.dim_h,
|
|
191
|
+
"dim_out": self.dim_out,
|
|
192
|
+
"rank": self.rank,
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
@classmethod
|
|
196
|
+
def from_config(cls, config):
|
|
197
|
+
return cls(
|
|
198
|
+
dim_in=config["dim_in"],
|
|
199
|
+
dim_h=config["dim_h"],
|
|
200
|
+
dim_out=config["dim_out"],
|
|
201
|
+
rank=config["rank"],
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def get_build_config(self):
|
|
205
|
+
config = {
|
|
206
|
+
"dim_in": self.dim_in,
|
|
207
|
+
"dim_h": self.dim_h,
|
|
208
|
+
"dim_out": self.dim_out,
|
|
209
|
+
"rank": self.rank,
|
|
210
|
+
}
|
|
211
|
+
return config
|
|
212
|
+
|
|
213
|
+
@classmethod
|
|
214
|
+
def build_from_config(cls, config):
|
|
215
|
+
return cls(**config)
|
|
216
|
+
|
|
217
|
+
@tf.function
|
|
218
|
+
def train_step(self, batch_x, batch_adjacency, batch_y, optimizer):
|
|
219
|
+
with tf.GradientTape() as tape:
|
|
220
|
+
out = self(batch_x, batch_adjacency)
|
|
221
|
+
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=batch_y, logits=out)
|
|
222
|
+
loss = tf.reduce_mean(loss)
|
|
223
|
+
gradients = tape.gradient(loss, self.model.trainable_variables)
|
|
224
|
+
optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
|
|
225
|
+
return loss
|
|
226
|
+
|
|
227
|
+
def fit(self, data, epochs, batch_size, test_size=0.2, optimizer="adam"):
|
|
228
|
+
optimizers = {
|
|
229
|
+
"sgd": tf.keras.optimizers.SGD(),
|
|
230
|
+
"adam": tf.keras.optimizers.Adam(),
|
|
231
|
+
"adamw": tf.keras.optimizers.AdamW(),
|
|
232
|
+
"adadelta": tf.keras.optimizers.Adadelta(),
|
|
233
|
+
"rmsprop": tf.keras.optimizers.RMSprop(),
|
|
234
|
+
}
|
|
235
|
+
optimizer = optimizers[optimizer]
|
|
236
|
+
train_losses = []
|
|
237
|
+
train_f1_scores = []
|
|
238
|
+
val_losses = []
|
|
239
|
+
val_f1_scores = []
|
|
240
|
+
|
|
241
|
+
num_nodes = len(data.x)
|
|
242
|
+
split_index = int((1 - test_size) * num_nodes)
|
|
243
|
+
|
|
244
|
+
X_train, X_test = data.x[:split_index], data.x[split_index:]
|
|
245
|
+
y_train, y_test = data.y[:split_index], data.y[split_index:]
|
|
246
|
+
|
|
247
|
+
adjacency_train = tf.sparse.slice(data.adjacency, [0, 0], [split_index, split_index])
|
|
248
|
+
adjacency_test = tf.sparse.slice(
|
|
249
|
+
data.adjacency,
|
|
250
|
+
[split_index, split_index],
|
|
251
|
+
[num_nodes - split_index, num_nodes - split_index],
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
batch_starts = np.arange(0, len(X_train), batch_size)
|
|
255
|
+
for epoch in range(epochs):
|
|
256
|
+
np.random.shuffle(batch_starts)
|
|
257
|
+
for start in batch_starts:
|
|
258
|
+
end = start + batch_size
|
|
259
|
+
batch_x = X_train[start:end, :]
|
|
260
|
+
batch_adjacency = tf.sparse.slice(
|
|
261
|
+
adjacency_train, [start, start], [batch_size, batch_size]
|
|
262
|
+
)
|
|
263
|
+
batch_y = y_train[start:end]
|
|
264
|
+
train_loss = self.train_step(batch_x, batch_adjacency, batch_y, optimizer)
|
|
265
|
+
|
|
266
|
+
train_loss, train_f1 = self.evaluate(X_train, adjacency_train, y_train)
|
|
267
|
+
train_losses.append(train_loss)
|
|
268
|
+
train_f1_scores.append(train_f1)
|
|
269
|
+
|
|
270
|
+
if epoch % 5 == 0:
|
|
271
|
+
clear_output(wait=True)
|
|
272
|
+
val_loss, val_f1 = self.evaluate(X_test, adjacency_test, y_test)
|
|
273
|
+
val_losses.append(val_loss)
|
|
274
|
+
val_f1_scores.append(val_f1)
|
|
275
|
+
print(
|
|
276
|
+
f"Epoch {epoch:>3} | Train Loss: {train_loss:.4f} | Train F1: {train_f1:.4f} | Val Loss: {val_loss:.4f} | Val F1: {val_f1:.4f}"
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
return train_losses, train_f1_scores, val_losses, val_f1_scores
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
if __name__ == "__main__":
|
|
283
|
+
print("Examples will be running below")
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
import networkx as nx
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from IPython.display import HTML, display
|
|
6
|
+
from pyvis.network import Network
|
|
7
|
+
|
|
8
|
+
from likelihood.tools import FeatureSelection
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DynamicGraph(FeatureSelection):
|
|
12
|
+
"""A class to represent a dynamic graph"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, df: pd.DataFrame, n_importances: int, **kwargs):
|
|
15
|
+
self.G = Network(notebook=True, cdn_resources="remote", directed=True)
|
|
16
|
+
self.df = df
|
|
17
|
+
self.n_importances = n_importances
|
|
18
|
+
super().__init__(**kwargs)
|
|
19
|
+
self.labels: List[str] = []
|
|
20
|
+
|
|
21
|
+
def fit(self, **kwargs) -> None:
|
|
22
|
+
"""Fit the model according to the given data and parameters."""
|
|
23
|
+
self.get_digraph(self.df, self.n_importances)
|
|
24
|
+
self.get_index = dict(zip(self.X.columns, range(len(self.X.columns))))
|
|
25
|
+
self._make_network()
|
|
26
|
+
|
|
27
|
+
def _make_network(self) -> None:
|
|
28
|
+
"""Create nodes and edges of the network based on feature importance scores"""
|
|
29
|
+
self._add_nodes()
|
|
30
|
+
for i in range(len(self.all_features_imp_graph)):
|
|
31
|
+
node = self.all_features_imp_graph[i][0]
|
|
32
|
+
edges = self.all_features_imp_graph[i][1]
|
|
33
|
+
|
|
34
|
+
for label, weight in edges:
|
|
35
|
+
self.G.add_edge(self.get_index[node], self.get_index[label], weight=weight)
|
|
36
|
+
|
|
37
|
+
def _add_nodes(self) -> None:
|
|
38
|
+
for i in range(len(self.all_features_imp_graph)):
|
|
39
|
+
node = self.all_features_imp_graph[i][0]
|
|
40
|
+
self.labels.append(node)
|
|
41
|
+
self.G.add_node(n_id=i, label=node)
|
|
42
|
+
|
|
43
|
+
def draw(self, name="graph.html", **kwargs) -> None:
|
|
44
|
+
"""Display the network using HTML format"""
|
|
45
|
+
spring_length = kwargs.get("spring_length", 500)
|
|
46
|
+
node_distance = kwargs.get("node_distance", 100)
|
|
47
|
+
self.G.repulsion(node_distance=node_distance, spring_length=spring_length)
|
|
48
|
+
self.G.show_buttons(filter_=["physics"])
|
|
49
|
+
self.G.show(name)
|
|
50
|
+
|
|
51
|
+
html_file_content = open(name, "r").read()
|
|
52
|
+
display(HTML(html_file_content))
|
|
53
|
+
|
|
54
|
+
def pyvis_to_networkx(self):
|
|
55
|
+
nx_graph = nx.Graph()
|
|
56
|
+
nodes = [d["id"] for d in self.G.nodes]
|
|
57
|
+
for node_dic in self.G.nodes:
|
|
58
|
+
id = node_dic["label"]
|
|
59
|
+
del node_dic["label"]
|
|
60
|
+
nx_graph.add_nodes_from([(id, node_dic)])
|
|
61
|
+
self.node_edge_dict = dict(zip(nodes, self.labels))
|
|
62
|
+
del nodes
|
|
63
|
+
for edge in self.G.edges:
|
|
64
|
+
source, target = self.node_edge_dict[edge["from"]], self.node_edge_dict[edge["to"]]
|
|
65
|
+
del edge["from"]
|
|
66
|
+
del edge["to"]
|
|
67
|
+
nx_graph.add_edges_from([(source, target, edge)])
|
|
68
|
+
|
|
69
|
+
return nx_graph
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# -------------------------------------------------------------------------
|
|
73
|
+
if __name__ == "__main__":
|
|
74
|
+
import numpy as np
|
|
75
|
+
import pandas as pd
|
|
76
|
+
|
|
77
|
+
# Generate data
|
|
78
|
+
x = np.random.rand(3, 100)
|
|
79
|
+
y = 0.1 * x[0, :] + 0.4 * x[1, :] + 0.5 * x[2, :] + 0.1
|
|
80
|
+
# Create a DataFrame
|
|
81
|
+
df = pd.DataFrame(x.T, columns=["x1", "x2", "x3"])
|
|
82
|
+
df["y"] = y
|
|
83
|
+
# Instantiate DynamicGraph
|
|
84
|
+
fs = DynamicGraph(df, n_importances=2)
|
|
85
|
+
fs.fit()
|
|
86
|
+
fs.draw()
|