likelihood 2.2.0.dev1__cp312-cp312-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- likelihood/VERSION +1 -0
- likelihood/__init__.py +20 -0
- likelihood/graph/__init__.py +9 -0
- likelihood/graph/_nn.py +283 -0
- likelihood/graph/graph.py +86 -0
- likelihood/graph/nn.py +329 -0
- likelihood/main.py +273 -0
- likelihood/models/__init__.py +3 -0
- likelihood/models/deep/__init__.py +13 -0
- likelihood/models/deep/_autoencoders.py +896 -0
- likelihood/models/deep/_predictor.py +809 -0
- likelihood/models/deep/autoencoders.py +903 -0
- likelihood/models/deep/bandit.py +97 -0
- likelihood/models/deep/gan.py +313 -0
- likelihood/models/deep/predictor.py +805 -0
- likelihood/models/deep/rl.py +345 -0
- likelihood/models/environments.py +202 -0
- likelihood/models/hmm.py +163 -0
- likelihood/models/regression.py +451 -0
- likelihood/models/simulation.py +213 -0
- likelihood/models/utils.py +87 -0
- likelihood/pipes.py +382 -0
- likelihood/rust_py_integration.cpython-312-x86_64-linux-gnu.so +0 -0
- likelihood/tools/__init__.py +4 -0
- likelihood/tools/cat_embed.py +212 -0
- likelihood/tools/figures.py +348 -0
- likelihood/tools/impute.py +278 -0
- likelihood/tools/models_tools.py +866 -0
- likelihood/tools/numeric_tools.py +390 -0
- likelihood/tools/reports.py +375 -0
- likelihood/tools/tools.py +1336 -0
- likelihood-2.2.0.dev1.dist-info/METADATA +68 -0
- likelihood-2.2.0.dev1.dist-info/RECORD +37 -0
- likelihood-2.2.0.dev1.dist-info/WHEEL +5 -0
- likelihood-2.2.0.dev1.dist-info/licenses/LICENSE +21 -0
- likelihood-2.2.0.dev1.dist-info/top_level.txt +7 -0
- src/lib.rs +12 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
import torch
|
|
5
|
+
import torch.nn as nn
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MultiBanditNet(nn.Module):
|
|
9
|
+
def __init__(
|
|
10
|
+
self,
|
|
11
|
+
state_dim: int,
|
|
12
|
+
num_options: int,
|
|
13
|
+
num_actions_per_option: int | List[int],
|
|
14
|
+
num_neurons: int = 128,
|
|
15
|
+
num_layers: int = 1,
|
|
16
|
+
activation: nn.Module = nn.ReLU(),
|
|
17
|
+
):
|
|
18
|
+
super(MultiBanditNet, self).__init__()
|
|
19
|
+
self.state_dim = state_dim
|
|
20
|
+
self.num_options = num_options
|
|
21
|
+
self.num_actions_per_option = num_actions_per_option
|
|
22
|
+
self.num_neurons = num_neurons
|
|
23
|
+
self.num_layers = num_layers
|
|
24
|
+
self.activation = activation
|
|
25
|
+
|
|
26
|
+
self.option_network = nn.Sequential(
|
|
27
|
+
nn.Linear(state_dim, self.num_neurons),
|
|
28
|
+
nn.ReLU(),
|
|
29
|
+
nn.Linear(
|
|
30
|
+
self.num_neurons, num_options
|
|
31
|
+
), # Output a probability distribution over options
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Low-level (action) Q-networks for each option with additional linear layers
|
|
35
|
+
self.action_networks = nn.ModuleList()
|
|
36
|
+
for i in range(num_options):
|
|
37
|
+
action_network_layers = [nn.Linear(state_dim, self.num_neurons), self.activation]
|
|
38
|
+
for _ in range(self.num_layers - 1):
|
|
39
|
+
action_network_layers.extend(
|
|
40
|
+
[nn.Linear(self.num_neurons, self.num_neurons), self.activation]
|
|
41
|
+
)
|
|
42
|
+
num_actions = (
|
|
43
|
+
num_actions_per_option
|
|
44
|
+
if not isinstance(num_actions_per_option, list)
|
|
45
|
+
else num_actions_per_option[i]
|
|
46
|
+
) # Output Q-values for each action in this option
|
|
47
|
+
action_network_layers.append(nn.Linear(self.num_neurons, num_actions))
|
|
48
|
+
self.action_networks.append(nn.Sequential(*action_network_layers))
|
|
49
|
+
|
|
50
|
+
# Option termination network
|
|
51
|
+
self.termination_network = nn.Sequential(
|
|
52
|
+
nn.Linear(state_dim, self.num_neurons),
|
|
53
|
+
nn.ReLU(),
|
|
54
|
+
nn.Linear(self.num_neurons, 1), # Single output for termination probability (0-1)
|
|
55
|
+
nn.Sigmoid(),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def forward(self, state):
|
|
59
|
+
if state.dim() == 1:
|
|
60
|
+
state = state.unsqueeze(0)
|
|
61
|
+
|
|
62
|
+
batch_size = state.shape[0]
|
|
63
|
+
option_probs = torch.softmax(self.option_network(state), dim=-1)
|
|
64
|
+
|
|
65
|
+
action_probs = []
|
|
66
|
+
selected_actions = []
|
|
67
|
+
|
|
68
|
+
for i in range(batch_size):
|
|
69
|
+
selected_option = torch.multinomial(option_probs[i], 1).item()
|
|
70
|
+
|
|
71
|
+
# Get Q-values for this option
|
|
72
|
+
q_values = self.action_networks[selected_option](state[i].unsqueeze(0))
|
|
73
|
+
action_prob = torch.softmax(q_values, dim=-1)
|
|
74
|
+
action_probs.append(action_prob)
|
|
75
|
+
selected_action = torch.argmax(action_prob, dim=-1)
|
|
76
|
+
selected_actions.append(selected_action)
|
|
77
|
+
|
|
78
|
+
if len(action_probs) > 0:
|
|
79
|
+
action_probs = torch.cat(action_probs, dim=0).squeeze(1)
|
|
80
|
+
selected_actions = torch.stack(selected_actions, dim=0).squeeze(1)
|
|
81
|
+
else:
|
|
82
|
+
warnings.warn(
|
|
83
|
+
"The list of action probabilities is empty, initializing with default values.",
|
|
84
|
+
UserWarning,
|
|
85
|
+
)
|
|
86
|
+
action_probs = torch.empty((batch_size, 1))
|
|
87
|
+
selected_actions = torch.zeros(batch_size, dtype=torch.long)
|
|
88
|
+
|
|
89
|
+
termination_prob = self.termination_network(state)
|
|
90
|
+
|
|
91
|
+
return (
|
|
92
|
+
option_probs,
|
|
93
|
+
action_probs,
|
|
94
|
+
termination_prob,
|
|
95
|
+
torch.argmax(option_probs, dim=-1), # selected_options
|
|
96
|
+
selected_actions,
|
|
97
|
+
)
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import tensorflow as tf
|
|
4
|
+
from IPython.display import clear_output
|
|
5
|
+
from tensorflow.keras.regularizers import l2
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
|
|
8
|
+
from likelihood.tools import get_metrics
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@tf.keras.utils.register_keras_serializable(package="Custom", name="GANRegressor")
|
|
12
|
+
class GANRegressor(tf.keras.Model):
|
|
13
|
+
"""
|
|
14
|
+
GANRegressor is a custom Keras model that combines a generator and a discriminator
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
input_shape_parm,
|
|
20
|
+
output_shape_parm,
|
|
21
|
+
num_neurons=128,
|
|
22
|
+
activation="linear",
|
|
23
|
+
depth=5,
|
|
24
|
+
dropout=0.2,
|
|
25
|
+
l2_reg=0.0,
|
|
26
|
+
**kwargs,
|
|
27
|
+
):
|
|
28
|
+
super(GANRegressor, self).__init__()
|
|
29
|
+
self.input_shape_parm = input_shape_parm
|
|
30
|
+
self.output_shape_parm = output_shape_parm
|
|
31
|
+
self.num_neurons = num_neurons
|
|
32
|
+
self.activation = activation
|
|
33
|
+
self.depth = depth
|
|
34
|
+
self.dropout = dropout
|
|
35
|
+
self.l2_reg = l2_reg
|
|
36
|
+
self.optimizer = kwargs.get("optimizer", "adam")
|
|
37
|
+
|
|
38
|
+
self.generator = self._build_generator()
|
|
39
|
+
self.discriminator = self._build_discriminator()
|
|
40
|
+
dummy_input = tf.convert_to_tensor(tf.random.normal([1, self.input_shape_parm]))
|
|
41
|
+
self.build(dummy_input.shape)
|
|
42
|
+
|
|
43
|
+
def build(self, input_shape):
|
|
44
|
+
self.gan = tf.keras.models.Sequential([self.generator, self.discriminator], name="gan")
|
|
45
|
+
|
|
46
|
+
self.generator.compile(
|
|
47
|
+
optimizer=self.optimizer,
|
|
48
|
+
loss=tf.keras.losses.MeanAbsolutePercentageError(),
|
|
49
|
+
metrics=[tf.keras.metrics.MeanAbsolutePercentageError()],
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
self.discriminator.compile(
|
|
53
|
+
optimizer=self.optimizer, loss="binary_crossentropy", metrics=["accuracy"]
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
self.gan.compile(optimizer=self.optimizer, loss="binary_crossentropy")
|
|
57
|
+
super(GANRegressor, self).build(input_shape)
|
|
58
|
+
|
|
59
|
+
def _build_generator(self):
|
|
60
|
+
generator = tf.keras.Sequential(name="generator")
|
|
61
|
+
generator.add(
|
|
62
|
+
tf.keras.layers.Dense(
|
|
63
|
+
self.num_neurons,
|
|
64
|
+
activation="selu",
|
|
65
|
+
input_shape=[self.input_shape_parm],
|
|
66
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
generator.add(tf.keras.layers.Dropout(self.dropout))
|
|
70
|
+
for _ in range(self.depth - 1):
|
|
71
|
+
generator.add(
|
|
72
|
+
tf.keras.layers.Dense(
|
|
73
|
+
self.num_neurons, activation="selu", kernel_regularizer=l2(self.l2_reg)
|
|
74
|
+
),
|
|
75
|
+
)
|
|
76
|
+
generator.add(tf.keras.layers.Dropout(self.dropout))
|
|
77
|
+
generator.add(tf.keras.layers.Dense(2 * self.output_shape_parm, activation=self.activation))
|
|
78
|
+
return generator
|
|
79
|
+
|
|
80
|
+
def _build_discriminator(self):
|
|
81
|
+
discriminator = tf.keras.Sequential(name="discriminator")
|
|
82
|
+
for _ in range(self.depth):
|
|
83
|
+
discriminator.add(
|
|
84
|
+
tf.keras.layers.Dense(
|
|
85
|
+
self.num_neurons, activation="selu", kernel_regularizer=l2(self.l2_reg)
|
|
86
|
+
),
|
|
87
|
+
)
|
|
88
|
+
discriminator.add(tf.keras.layers.Dropout(self.dropout))
|
|
89
|
+
discriminator.add(tf.keras.layers.Dense(2, activation="softmax"))
|
|
90
|
+
return discriminator
|
|
91
|
+
|
|
92
|
+
def train_gan(
|
|
93
|
+
self,
|
|
94
|
+
X,
|
|
95
|
+
y,
|
|
96
|
+
batch_size,
|
|
97
|
+
n_epochs,
|
|
98
|
+
validation_split=0.2,
|
|
99
|
+
verbose=1,
|
|
100
|
+
):
|
|
101
|
+
"""
|
|
102
|
+
Train the GAN model.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
X : array-like
|
|
107
|
+
Input data.
|
|
108
|
+
y : array-like
|
|
109
|
+
Target data.
|
|
110
|
+
batch_size : int
|
|
111
|
+
Number of samples in each batch.
|
|
112
|
+
n_epochs : int
|
|
113
|
+
Number of training epochs.
|
|
114
|
+
validation_split : float, optional
|
|
115
|
+
Fraction of the data to be used for validation.
|
|
116
|
+
verbose : int, optional
|
|
117
|
+
Verbosity level. Default is 1.
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
history : pd.DataFrame
|
|
122
|
+
Training history.
|
|
123
|
+
"""
|
|
124
|
+
loss_history = []
|
|
125
|
+
for epoch in tqdm(range(n_epochs)):
|
|
126
|
+
batch_starts = np.arange(0, len(X), batch_size)
|
|
127
|
+
for start in batch_starts:
|
|
128
|
+
np.random.shuffle(batch_starts)
|
|
129
|
+
end = start + batch_size
|
|
130
|
+
X_batch = X[start:end]
|
|
131
|
+
y_batch = y[start:end].reshape(-1, self.output_shape_parm)
|
|
132
|
+
y_batch = np.concatenate((y_batch, y_batch**2), axis=1)
|
|
133
|
+
X_batch = tf.cast(X_batch, "float32")
|
|
134
|
+
noise = tf.random.normal(
|
|
135
|
+
shape=X_batch.shape, stddev=tf.math.reduce_std(X_batch, keepdims=False)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Phase 1 - training the generator
|
|
139
|
+
self.generator.train_on_batch(X_batch, y_batch)
|
|
140
|
+
|
|
141
|
+
# Phase 2 - training the discriminator
|
|
142
|
+
generated_y_fake = self.generator(noise)
|
|
143
|
+
generated_y_real = self.generator(X_batch)
|
|
144
|
+
fake_and_real = tf.concat([generated_y_fake, generated_y_real], axis=0)
|
|
145
|
+
X_fake_and_real = tf.concat([noise, X_batch], axis=0)
|
|
146
|
+
batch_size = int(fake_and_real.shape[0] / 2)
|
|
147
|
+
indices_ = tf.constant([[0.0]] * batch_size + [[1.0]] * batch_size)[:, 0]
|
|
148
|
+
indices_ = tf.cast(indices_, "int32")
|
|
149
|
+
y1 = tf.one_hot(indices_, 2)
|
|
150
|
+
self.gan.train_on_batch(X_fake_and_real, y1)
|
|
151
|
+
|
|
152
|
+
loss = self._cal_loss(generated_y_real, y_batch)
|
|
153
|
+
loss_history.append([epoch, loss])
|
|
154
|
+
|
|
155
|
+
if verbose:
|
|
156
|
+
X_batch, y_batch, X_batch_val, y_batch_val = self._train_and_val(
|
|
157
|
+
X_batch, y_batch, validation_split=validation_split
|
|
158
|
+
)
|
|
159
|
+
generated_y = self.generator(X_batch)
|
|
160
|
+
generated_y_val = self.generator(X_batch_val)
|
|
161
|
+
y_pred = self.discriminator.predict(fake_and_real, verbose=0)
|
|
162
|
+
y_pred = list(np.argmax(y_pred, axis=1))
|
|
163
|
+
|
|
164
|
+
metrics = get_metrics(self._get_frame(indices_.numpy().tolist(), y_pred), "y", "y_pred")
|
|
165
|
+
loss = self._cal_loss(generated_y, y_batch)
|
|
166
|
+
loss_val = self._cal_loss(generated_y_val, y_batch_val)
|
|
167
|
+
clear_output(wait=True)
|
|
168
|
+
metrics_list = [
|
|
169
|
+
("Epoch", f"{epoch}"),
|
|
170
|
+
("Loss", f"{loss:.2f} / {loss_val:.2f}"),
|
|
171
|
+
("Accuracy", f"{metrics['accuracy']:.2f} / {metrics['accuracy']:.2f}"),
|
|
172
|
+
("Precision", f"{metrics['precision']:.2f} / {metrics['precision']:.2f}"),
|
|
173
|
+
("Recall", f"{metrics['recall']:.2f} / {metrics['recall']:.2f}"),
|
|
174
|
+
("F1 Score", f"{metrics['f1_score']:.2f} / {metrics['f1_score']:.2f}"),
|
|
175
|
+
("Kappa", f"{metrics['kappa']:.2f} / {metrics['kappa']:.2f}"),
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
metric_width = 15
|
|
179
|
+
value_width = 30
|
|
180
|
+
|
|
181
|
+
header = f"| {'Metric':<{metric_width}} | {'Value':<{value_width}} |"
|
|
182
|
+
separator = "+" + "-" * (len(header) - 2) + "+"
|
|
183
|
+
|
|
184
|
+
print(separator)
|
|
185
|
+
print(header)
|
|
186
|
+
print(separator)
|
|
187
|
+
|
|
188
|
+
for metric_name, metric_values in metrics_list:
|
|
189
|
+
data_row = f"| {metric_name:<{metric_width}} | {metric_values:<{value_width}} |"
|
|
190
|
+
print(data_row)
|
|
191
|
+
|
|
192
|
+
print(separator)
|
|
193
|
+
|
|
194
|
+
return pd.DataFrame(loss_history, columns=["epoch", "loss"])
|
|
195
|
+
|
|
196
|
+
def _get_frame(self, y, y_pred):
|
|
197
|
+
df = pd.DataFrame()
|
|
198
|
+
df["y"] = y
|
|
199
|
+
df["y_pred"] = y_pred
|
|
200
|
+
return df
|
|
201
|
+
|
|
202
|
+
def _train_and_val(self, X, y, validation_split):
|
|
203
|
+
split = int((1 - validation_split) * len(X))
|
|
204
|
+
|
|
205
|
+
if len(X) > split and split > 0:
|
|
206
|
+
X_train = X[:split]
|
|
207
|
+
y_train = y[:split]
|
|
208
|
+
X_val = X[split:]
|
|
209
|
+
y_val = y[split:]
|
|
210
|
+
else:
|
|
211
|
+
X_train = X
|
|
212
|
+
y_train = y
|
|
213
|
+
X_val = X
|
|
214
|
+
y_val = y
|
|
215
|
+
|
|
216
|
+
X_train = tf.cast(X_train, "float32")
|
|
217
|
+
X_val = tf.cast(X_val, "float32")
|
|
218
|
+
|
|
219
|
+
return X_train, y_train, X_val, y_val
|
|
220
|
+
|
|
221
|
+
def _cal_loss(self, generated, y):
|
|
222
|
+
return tf.math.reduce_mean(100 * abs((y - generated) / y), keepdims=False).numpy()
|
|
223
|
+
|
|
224
|
+
def train_gen(
|
|
225
|
+
self,
|
|
226
|
+
X_train,
|
|
227
|
+
y_train,
|
|
228
|
+
batch_size,
|
|
229
|
+
n_epochs,
|
|
230
|
+
validation_split=0.2,
|
|
231
|
+
patience=3,
|
|
232
|
+
):
|
|
233
|
+
"""
|
|
234
|
+
Train the generator model.
|
|
235
|
+
|
|
236
|
+
Parameters
|
|
237
|
+
----------
|
|
238
|
+
X_train : array-like
|
|
239
|
+
Training data.
|
|
240
|
+
y_train : array-like
|
|
241
|
+
Training target data.
|
|
242
|
+
batch_size : int
|
|
243
|
+
Batch size for training.
|
|
244
|
+
n_epochs : int
|
|
245
|
+
Number of epochs for training.
|
|
246
|
+
validation_split : float, optional
|
|
247
|
+
Fraction of data to use for validation. Default is 0.2.
|
|
248
|
+
patience : int, optional
|
|
249
|
+
Number of epochs to wait before early stopping. Default is 3.
|
|
250
|
+
|
|
251
|
+
Returns
|
|
252
|
+
-------
|
|
253
|
+
history : pd.DataFrame
|
|
254
|
+
Training history.
|
|
255
|
+
"""
|
|
256
|
+
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=patience)
|
|
257
|
+
# Prepare the target by extending it with its square
|
|
258
|
+
self.discriminator.trainable = False
|
|
259
|
+
y_train_extended = np.concatenate(
|
|
260
|
+
(
|
|
261
|
+
y_train.reshape(-1, self.output_shape_parm),
|
|
262
|
+
y_train.reshape(-1, self.output_shape_parm) ** 2,
|
|
263
|
+
),
|
|
264
|
+
axis=1,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
history = self.generator.fit(
|
|
268
|
+
X_train,
|
|
269
|
+
y_train_extended,
|
|
270
|
+
epochs=n_epochs,
|
|
271
|
+
batch_size=batch_size,
|
|
272
|
+
verbose=0,
|
|
273
|
+
validation_split=validation_split,
|
|
274
|
+
callbacks=[callback],
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
return pd.DataFrame(history.history)
|
|
278
|
+
|
|
279
|
+
def call(self, inputs):
|
|
280
|
+
return self.generator(inputs)[:, 0]
|
|
281
|
+
|
|
282
|
+
def get_config(self):
|
|
283
|
+
config = {
|
|
284
|
+
"input_shape_parm": self.input_shape_parm,
|
|
285
|
+
"output_shape_parm": self.output_shape_parm,
|
|
286
|
+
"num_neurons": self.num_neurons,
|
|
287
|
+
"activation": self.activation,
|
|
288
|
+
"depth": self.depth,
|
|
289
|
+
"dropout": self.dropout,
|
|
290
|
+
"generator": self.generator,
|
|
291
|
+
"discriminator": self.discriminator,
|
|
292
|
+
"gan": self.gan,
|
|
293
|
+
"l2_reg": self.l2_reg,
|
|
294
|
+
"optimizer": self.optimizer,
|
|
295
|
+
}
|
|
296
|
+
base_config = super(GANRegressor, self).get_config()
|
|
297
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
298
|
+
|
|
299
|
+
@classmethod
|
|
300
|
+
def from_config(cls, config):
|
|
301
|
+
return cls(
|
|
302
|
+
input_shape_parm=config["input_shape_parm"],
|
|
303
|
+
output_shape_parm=config["output_shape_parm"],
|
|
304
|
+
num_neurons=config["num_neurons"],
|
|
305
|
+
activation=config["activation"],
|
|
306
|
+
depth=config["depth"],
|
|
307
|
+
dropout=config["dropout"],
|
|
308
|
+
generator=config["generator"],
|
|
309
|
+
discriminator=config["discriminator"],
|
|
310
|
+
gan=config["gan"],
|
|
311
|
+
l2_reg=config["l2_reg"],
|
|
312
|
+
optimizer=config["optimizer"],
|
|
313
|
+
)
|