likelihood 1.2.15__tar.gz → 1.2.17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {likelihood-1.2.15 → likelihood-1.2.17}/PKG-INFO +2 -1
- likelihood-1.2.17/likelihood/graph/nn.py +344 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/models/deep/autoencoders.py +50 -13
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/tools/tools.py +8 -1
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood.egg-info/PKG-INFO +2 -1
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood.egg-info/SOURCES.txt +1 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood.egg-info/requires.txt +1 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/setup.py +1 -1
- {likelihood-1.2.15 → likelihood-1.2.17}/LICENSE +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/README.md +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/__init__.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/graph/__init__.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/graph/graph.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/main.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/models/__init__.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/models/deep/__init__.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/models/regression.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/models/simulation.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/models/utils.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/tools/__init__.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood/tools/numeric_tools.py +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood.egg-info/dependency_links.txt +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/likelihood.egg-info/top_level.txt +0 -0
- {likelihood-1.2.15 → likelihood-1.2.17}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: likelihood
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.17
|
|
4
4
|
Summary: A package that performs the maximum likelihood algorithm.
|
|
5
5
|
Home-page: https://github.com/jzsmoreno/likelihood/
|
|
6
6
|
Author: J. A. Moreno-Guerra
|
|
@@ -30,6 +30,7 @@ Requires-Dist: networkx; extra == "full"
|
|
|
30
30
|
Requires-Dist: pyvis; extra == "full"
|
|
31
31
|
Requires-Dist: tensorflow; extra == "full"
|
|
32
32
|
Requires-Dist: keras-tuner; extra == "full"
|
|
33
|
+
Requires-Dist: scikit-learn; extra == "full"
|
|
33
34
|
|
|
34
35
|

|
|
35
36
|
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import tensorflow as tf
|
|
7
|
+
from numpy import ndarray
|
|
8
|
+
from pandas.core.frame import DataFrame
|
|
9
|
+
from sklearn.metrics import f1_score
|
|
10
|
+
from sklearn.model_selection import train_test_split
|
|
11
|
+
|
|
12
|
+
from likelihood.tools import generate_feature_yaml
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def compare_similarity(arr1: ndarray, arr2: ndarray) -> int:
|
|
16
|
+
"""Compares the similarity between two arrays of categories.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
arr1 : `ndarray`
|
|
21
|
+
The first array of categories.
|
|
22
|
+
arr2 : `ndarray`
|
|
23
|
+
The second array of categories.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
count: `int`
|
|
28
|
+
The number of categories that are the same in both arrays.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
count = 0
|
|
32
|
+
for i in range(len(arr1)):
|
|
33
|
+
if arr1[i] == arr2[i]:
|
|
34
|
+
count += 1
|
|
35
|
+
return count
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def cal_adjency_matrix(
|
|
39
|
+
df: DataFrame, exclude_subset: List[str] = [], sparse: bool = True, **kwargs
|
|
40
|
+
) -> Tuple[dict, ndarray]:
|
|
41
|
+
"""Calculates the adjacency matrix for a given DataFrame.
|
|
42
|
+
The adjacency matrix is a matrix that represents the similarity between each pair of categories.
|
|
43
|
+
The similarity is calculated using the `compare_similarity` function.
|
|
44
|
+
The resulting matrix is a square matrix with the same number of rows and columns as the input DataFrame.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
df : `DataFrame`
|
|
49
|
+
The input DataFrame containing the categories.
|
|
50
|
+
exclude_subset : `List[str]`, optional
|
|
51
|
+
A list of categories to exclude from the calculation of the adjacency matrix.
|
|
52
|
+
sparse : `bool`, optional
|
|
53
|
+
Whether to return a sparse matrix or a dense matrix.
|
|
54
|
+
**kwargs : `dict`
|
|
55
|
+
Additional keyword arguments to pass to the `compare_similarity` function.
|
|
56
|
+
|
|
57
|
+
Keyword Arguments:
|
|
58
|
+
----------
|
|
59
|
+
similarity: `int`
|
|
60
|
+
The minimum number of categories that must be the same in both arrays to be considered similar.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
adj_dict : `dict`
|
|
65
|
+
A dictionary containing the categories.
|
|
66
|
+
adjacency_matrix : `ndarray`
|
|
67
|
+
The adjacency matrix.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
yaml_ = generate_feature_yaml(df)
|
|
71
|
+
categorical_columns = yaml_["categorical_features"]
|
|
72
|
+
if len(exclude_subset) > 0:
|
|
73
|
+
categorical_columns = [col for col in categorical_columns if col not in exclude_subset]
|
|
74
|
+
|
|
75
|
+
if len(categorical_columns) > 1:
|
|
76
|
+
df_categorical = df[categorical_columns].copy()
|
|
77
|
+
else:
|
|
78
|
+
categorical_columns = [
|
|
79
|
+
col
|
|
80
|
+
for col in df.columns
|
|
81
|
+
if (
|
|
82
|
+
col not in exclude_subset
|
|
83
|
+
and pd.api.types.is_integer_dtype(df[col])
|
|
84
|
+
and len(df[col].unique()) > 2
|
|
85
|
+
)
|
|
86
|
+
]
|
|
87
|
+
df_categorical = df[categorical_columns].copy()
|
|
88
|
+
|
|
89
|
+
assert len(df_categorical) > 0
|
|
90
|
+
|
|
91
|
+
similarity = kwargs["similarity"] if "similarity" in kwargs else len(df_categorical.columns) - 1
|
|
92
|
+
assert similarity <= df_categorical.shape[1]
|
|
93
|
+
|
|
94
|
+
adj_dict = {}
|
|
95
|
+
for index, row in df_categorical.iterrows():
|
|
96
|
+
adj_dict[index] = row.to_list()
|
|
97
|
+
|
|
98
|
+
adjacency_matrix = np.zeros((len(df_categorical), len(df_categorical)))
|
|
99
|
+
|
|
100
|
+
for i in range(len(df_categorical)):
|
|
101
|
+
for j in range(len(df_categorical)):
|
|
102
|
+
if compare_similarity(adj_dict[i], adj_dict[j]) >= similarity:
|
|
103
|
+
adjacency_matrix[i][j] = 1
|
|
104
|
+
|
|
105
|
+
if sparse:
|
|
106
|
+
num_nodes = adjacency_matrix.shape[0]
|
|
107
|
+
|
|
108
|
+
indices = np.argwhere(adjacency_matrix != 0.0)
|
|
109
|
+
indices = tf.constant(indices, dtype=tf.int64)
|
|
110
|
+
values = tf.constant(adjacency_matrix[indices[:, 0], indices[:, 1]], dtype=tf.float32)
|
|
111
|
+
adjacency_matrix = tf.sparse.SparseTensor(
|
|
112
|
+
indices=indices, values=values, dense_shape=(num_nodes, num_nodes)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return adj_dict, adjacency_matrix
|
|
116
|
+
else:
|
|
117
|
+
return adj_dict, adjacency_matrix
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class Data:
|
|
121
|
+
def __init__(
|
|
122
|
+
self,
|
|
123
|
+
df: DataFrame,
|
|
124
|
+
target: str | None = None,
|
|
125
|
+
exclude_subset: List[str] = [],
|
|
126
|
+
):
|
|
127
|
+
_, adjacency = cal_adjency_matrix(df, exclude_subset=exclude_subset, sparse=True)
|
|
128
|
+
if target is not None:
|
|
129
|
+
X = df.drop(columns=[target] + exclude_subset)
|
|
130
|
+
else:
|
|
131
|
+
X = df.drop(columns=exclude_subset)
|
|
132
|
+
self.columns = X.columns
|
|
133
|
+
X = X.to_numpy()
|
|
134
|
+
self.x = np.asarray(X).astype(np.float32)
|
|
135
|
+
self.adjacency = adjacency
|
|
136
|
+
if target is not None:
|
|
137
|
+
self.y = np.asarray(df[target].values).astype(np.int32)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNNLayer")
|
|
141
|
+
class VanillaGNNLayer(tf.keras.layers.Layer):
|
|
142
|
+
def __init__(self, dim_in, dim_out, kernel_initializer="glorot_uniform", **kwargs):
|
|
143
|
+
super(VanillaGNNLayer, self).__init__(**kwargs)
|
|
144
|
+
self.dim_out = dim_out
|
|
145
|
+
self.kernel_initializer = kernel_initializer
|
|
146
|
+
self.linear = None
|
|
147
|
+
|
|
148
|
+
def build(self, input_shape):
|
|
149
|
+
self.linear = tf.keras.layers.Dense(
|
|
150
|
+
self.dim_out, use_bias=False, kernel_initializer=self.kernel_initializer
|
|
151
|
+
)
|
|
152
|
+
super(VanillaGNNLayer, self).build(input_shape)
|
|
153
|
+
|
|
154
|
+
def call(self, x, adjacency):
|
|
155
|
+
x = self.linear(x)
|
|
156
|
+
x = tf.sparse.sparse_dense_matmul(adjacency, x)
|
|
157
|
+
return x
|
|
158
|
+
|
|
159
|
+
def get_config(self):
|
|
160
|
+
config = super(VanillaGNNLayer, self).get_config()
|
|
161
|
+
config.update(
|
|
162
|
+
{
|
|
163
|
+
"dim_out": self.dim_out,
|
|
164
|
+
"kernel_initializer": tf.keras.initializers.serialize(
|
|
165
|
+
self.linear.kernel_initializer
|
|
166
|
+
),
|
|
167
|
+
}
|
|
168
|
+
)
|
|
169
|
+
return config
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@tf.keras.utils.register_keras_serializable(package="Custom", name="VanillaGNN")
|
|
173
|
+
class VanillaGNN(tf.keras.Model):
|
|
174
|
+
def __init__(self, dim_in, dim_h, dim_out, **kwargs):
|
|
175
|
+
super(VanillaGNN, self).__init__(**kwargs)
|
|
176
|
+
self.dim_in = dim_in
|
|
177
|
+
self.dim_h = dim_h
|
|
178
|
+
self.dim_out = dim_out
|
|
179
|
+
self.gnn1 = VanillaGNNLayer(self.dim_in, self.dim_h)
|
|
180
|
+
self.gnn2 = VanillaGNNLayer(self.dim_h, self.dim_h)
|
|
181
|
+
self.gnn3 = VanillaGNNLayer(self.dim_h, self.dim_out)
|
|
182
|
+
|
|
183
|
+
def build(self, input_shape):
|
|
184
|
+
super(VanillaGNN, self).build(input_shape)
|
|
185
|
+
dummy_input = tf.keras.Input(shape=input_shape[1:])
|
|
186
|
+
dummy_adjacency = tf.sparse.SparseTensor(
|
|
187
|
+
indices=[[0, 0]], values=[1.0], dense_shape=[input_shape[0], input_shape[0]]
|
|
188
|
+
)
|
|
189
|
+
_ = self(dummy_input, dummy_adjacency)
|
|
190
|
+
|
|
191
|
+
def call(self, x, adjacency):
|
|
192
|
+
h = self.gnn1(x, adjacency)
|
|
193
|
+
h = tf.nn.tanh(h)
|
|
194
|
+
h = self.gnn2(h, adjacency)
|
|
195
|
+
h = self.gnn3(h, adjacency)
|
|
196
|
+
return tf.nn.softmax(h, axis=1)
|
|
197
|
+
|
|
198
|
+
def f1_macro(self, y_true, y_pred):
|
|
199
|
+
return f1_score(y_true, y_pred, average="macro")
|
|
200
|
+
|
|
201
|
+
def compute_f1_score(self, logits, labels):
|
|
202
|
+
predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
|
|
203
|
+
true_labels = tf.cast(labels, tf.int32)
|
|
204
|
+
return self.f1_macro(true_labels.numpy(), predictions.numpy())
|
|
205
|
+
|
|
206
|
+
def evaluate(self, x, adjacency, y):
|
|
207
|
+
y = tf.cast(y, tf.int32)
|
|
208
|
+
out = self(x, adjacency)
|
|
209
|
+
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=out)
|
|
210
|
+
loss = tf.reduce_mean(loss)
|
|
211
|
+
f1 = self.compute_f1_score(out, y)
|
|
212
|
+
return loss.numpy(), f1
|
|
213
|
+
|
|
214
|
+
def test(self, data):
|
|
215
|
+
out = self(data.x, data.adjacency)
|
|
216
|
+
test_f1 = self.compute_f1_score(out, data.y)
|
|
217
|
+
return test_f1
|
|
218
|
+
|
|
219
|
+
def predict(self, data):
|
|
220
|
+
out = self(data.x, data.adjacency)
|
|
221
|
+
return tf.argmax(out, axis=1, output_type=tf.int32).numpy()
|
|
222
|
+
|
|
223
|
+
def get_config(self):
|
|
224
|
+
config = {
|
|
225
|
+
"dim_in": self.dim_in,
|
|
226
|
+
"dim_h": self.dim_h,
|
|
227
|
+
"dim_out": self.dim_out,
|
|
228
|
+
}
|
|
229
|
+
base_config = super(VanillaGNN, self).get_config()
|
|
230
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
231
|
+
|
|
232
|
+
@classmethod
|
|
233
|
+
def from_config(cls, config):
|
|
234
|
+
return cls(
|
|
235
|
+
dim_in=config["dim_in"],
|
|
236
|
+
dim_h=config["dim_h"],
|
|
237
|
+
dim_out=config["dim_out"],
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
@tf.function
|
|
241
|
+
def train_step(self, batch_x, batch_adjacency, batch_y, optimizer):
|
|
242
|
+
with tf.GradientTape() as tape:
|
|
243
|
+
out = self(batch_x, batch_adjacency)
|
|
244
|
+
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=batch_y, logits=out)
|
|
245
|
+
loss = tf.reduce_mean(loss)
|
|
246
|
+
gradients = tape.gradient(loss, self.trainable_variables)
|
|
247
|
+
optimizer.apply_gradients(zip(gradients, self.trainable_variables))
|
|
248
|
+
return loss
|
|
249
|
+
|
|
250
|
+
def fit(self, data, epochs, batch_size, test_size=0.2, optimizer="adam"):
|
|
251
|
+
warnings.warn(
|
|
252
|
+
"It is normal for validation metrics to underperform. Use the test method to validate after training.",
|
|
253
|
+
UserWarning,
|
|
254
|
+
)
|
|
255
|
+
optimizers = {
|
|
256
|
+
"sgd": tf.keras.optimizers.SGD(),
|
|
257
|
+
"adam": tf.keras.optimizers.Adam(),
|
|
258
|
+
"adamw": tf.keras.optimizers.AdamW(),
|
|
259
|
+
"adadelta": tf.keras.optimizers.Adadelta(),
|
|
260
|
+
"rmsprop": tf.keras.optimizers.RMSprop(),
|
|
261
|
+
}
|
|
262
|
+
optimizer = optimizers[optimizer]
|
|
263
|
+
train_losses = []
|
|
264
|
+
train_f1_scores = []
|
|
265
|
+
val_losses = []
|
|
266
|
+
val_f1_scores = []
|
|
267
|
+
|
|
268
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
269
|
+
data.x, data.y, test_size=test_size, shuffle=False
|
|
270
|
+
)
|
|
271
|
+
adjacency_train = tf.sparse.slice(data.adjacency, [0, 0], [len(X_train), len(X_train)])
|
|
272
|
+
adjacency_test = tf.sparse.slice(
|
|
273
|
+
data.adjacency, [len(X_train), 0], [len(X_test), len(X_test)]
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
batch_starts = np.arange(0, len(X_train), batch_size)
|
|
277
|
+
for epoch in range(epochs):
|
|
278
|
+
np.random.shuffle(batch_starts)
|
|
279
|
+
for start in batch_starts:
|
|
280
|
+
end = start + batch_size
|
|
281
|
+
batch_x = X_train[start:end, :]
|
|
282
|
+
batch_adjacency = tf.sparse.slice(
|
|
283
|
+
adjacency_train, [start, start], [batch_size, batch_size]
|
|
284
|
+
)
|
|
285
|
+
batch_y = y_train[start:end]
|
|
286
|
+
train_loss = self.train_step(batch_x, batch_adjacency, batch_y, optimizer)
|
|
287
|
+
|
|
288
|
+
train_loss, train_f1 = self.evaluate(X_train, adjacency_train, y_train)
|
|
289
|
+
train_losses.append(train_loss)
|
|
290
|
+
train_f1_scores.append(train_f1)
|
|
291
|
+
|
|
292
|
+
if epoch % 2 == 0:
|
|
293
|
+
val_loss, val_f1 = self.evaluate(X_test, adjacency_test, y_test)
|
|
294
|
+
val_losses.append(val_loss)
|
|
295
|
+
val_f1_scores.append(val_f1)
|
|
296
|
+
print(
|
|
297
|
+
f"Epoch {epoch:>3} | Train Loss: {train_loss:.3f} | Train F1: {train_f1:.3f} | Val Loss: {val_loss:.3f} | Val F1: {val_f1:.3f}"
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
return train_losses, train_f1_scores, val_losses, val_f1_scores
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
if __name__ == "__main__":
|
|
304
|
+
# Example usage
|
|
305
|
+
import pandas as pd
|
|
306
|
+
from sklearn.datasets import load_iris
|
|
307
|
+
|
|
308
|
+
# Load the dataset
|
|
309
|
+
iris = load_iris()
|
|
310
|
+
|
|
311
|
+
# Convert to a DataFrame for easy exploration
|
|
312
|
+
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
|
|
313
|
+
iris_df["species"] = iris.target
|
|
314
|
+
|
|
315
|
+
iris_df["sepal length (cm)"] = iris_df["sepal length (cm)"].astype("category")
|
|
316
|
+
iris_df["sepal width (cm)"] = iris_df["sepal width (cm)"].astype("category")
|
|
317
|
+
iris_df["petal length (cm)"] = iris_df["petal length (cm)"].astype("category")
|
|
318
|
+
iris_df["petal width (cm)"] = iris_df["petal width (cm)"].astype("category")
|
|
319
|
+
|
|
320
|
+
# Display the first few rows of the dataset
|
|
321
|
+
print(iris_df.head())
|
|
322
|
+
|
|
323
|
+
iris_df = iris_df.sample(frac=1, replace=False).reset_index(drop=True)
|
|
324
|
+
|
|
325
|
+
data = Data(iris_df, "species")
|
|
326
|
+
|
|
327
|
+
model = VanillaGNN(dim_in=data.x.shape[1], dim_h=8, dim_out=len(iris_df["species"].unique()))
|
|
328
|
+
print("Before training F1:", model.test(data))
|
|
329
|
+
model.fit(data, epochs=200, batch_size=32, test_size=0.5)
|
|
330
|
+
model.save("./best_model.keras")
|
|
331
|
+
print("After training F1:", model.test(data))
|
|
332
|
+
best_model = tf.keras.models.load_model("./best_model.keras")
|
|
333
|
+
|
|
334
|
+
print("After loading F1:", best_model.test(data))
|
|
335
|
+
df_results = pd.DataFrame()
|
|
336
|
+
|
|
337
|
+
# Suppose we have a new dataset without the target variable
|
|
338
|
+
iris_df = iris_df.drop(columns=["species"])
|
|
339
|
+
data_new = Data(iris_df)
|
|
340
|
+
print("Predictions:", best_model.predict(data_new))
|
|
341
|
+
df_results["predicted"] = list(model.predict(data))
|
|
342
|
+
df_results["actual"] = list(data.y)
|
|
343
|
+
# df_results.to_csv("results.csv", index=False)
|
|
344
|
+
breakpoint()
|
|
@@ -6,12 +6,11 @@ import numpy as np
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import tensorflow as tf
|
|
8
8
|
from pandas.core.frame import DataFrame
|
|
9
|
-
from tensorflow.keras.models import Model
|
|
10
9
|
|
|
11
10
|
from likelihood.tools import OneHotEncoder
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
class AutoClassifier(Model):
|
|
13
|
+
class AutoClassifier(tf.keras.Model):
|
|
15
14
|
"""
|
|
16
15
|
An auto-classifier model that automatically determines the best classification strategy based on the input data.
|
|
17
16
|
|
|
@@ -23,6 +22,10 @@ class AutoClassifier(Model):
|
|
|
23
22
|
|
|
24
23
|
Methods:
|
|
25
24
|
__init__(self, input_shape, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
|
|
25
|
+
build(self, input_shape): Builds the model architecture based on input_shape.
|
|
26
|
+
call(self, x): Defines the forward pass of the model.
|
|
27
|
+
get_config(self): Returns the configuration of the model.
|
|
28
|
+
from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
|
|
26
29
|
"""
|
|
27
30
|
|
|
28
31
|
def __init__(self, input_shape, num_classes, units, activation):
|
|
@@ -41,33 +44,59 @@ class AutoClassifier(Model):
|
|
|
41
44
|
The type of activation function to use for the neural network layers.
|
|
42
45
|
"""
|
|
43
46
|
super(AutoClassifier, self).__init__()
|
|
47
|
+
self.input_shape = input_shape
|
|
48
|
+
self.num_classes = num_classes
|
|
44
49
|
self.units = units
|
|
45
|
-
self.
|
|
50
|
+
self.activation = activation
|
|
46
51
|
|
|
52
|
+
self.encoder = None
|
|
53
|
+
self.decoder = None
|
|
54
|
+
self.classifier = None
|
|
55
|
+
|
|
56
|
+
def build(self, input_shape):
|
|
47
57
|
self.encoder = tf.keras.Sequential(
|
|
48
58
|
[
|
|
49
|
-
tf.keras.layers.Dense(units=units, activation=activation),
|
|
50
|
-
tf.keras.layers.Dense(units=int(units / 2), activation=activation),
|
|
59
|
+
tf.keras.layers.Dense(units=self.units, activation=self.activation),
|
|
60
|
+
tf.keras.layers.Dense(units=int(self.units / 2), activation=self.activation),
|
|
51
61
|
]
|
|
52
62
|
)
|
|
53
63
|
|
|
54
64
|
self.decoder = tf.keras.Sequential(
|
|
55
65
|
[
|
|
56
|
-
tf.keras.layers.Dense(units=units, activation=activation),
|
|
57
|
-
tf.keras.layers.Dense(units=input_shape, activation=activation),
|
|
66
|
+
tf.keras.layers.Dense(units=self.units, activation=self.activation),
|
|
67
|
+
tf.keras.layers.Dense(units=self.input_shape, activation=self.activation),
|
|
58
68
|
]
|
|
59
69
|
)
|
|
60
70
|
|
|
61
71
|
self.classifier = tf.keras.Sequential(
|
|
62
|
-
[tf.keras.layers.Dense(num_classes, activation="softmax")]
|
|
72
|
+
[tf.keras.layers.Dense(self.num_classes, activation="softmax")]
|
|
63
73
|
)
|
|
64
74
|
|
|
65
75
|
def call(self, x):
|
|
66
76
|
encoded = self.encoder(x)
|
|
67
77
|
decoded = self.decoder(encoded)
|
|
68
78
|
combined = tf.concat([decoded, encoded], axis=1)
|
|
69
|
-
|
|
70
|
-
return
|
|
79
|
+
classification = self.classifier(combined)
|
|
80
|
+
return classification
|
|
81
|
+
|
|
82
|
+
def get_config(self):
|
|
83
|
+
config = {
|
|
84
|
+
"input_shape": self.input_shape,
|
|
85
|
+
"num_classes": self.num_classes,
|
|
86
|
+
"units": self.units,
|
|
87
|
+
"activation": self.activation,
|
|
88
|
+
}
|
|
89
|
+
base_config = super(AutoClassifier, self).get_config()
|
|
90
|
+
return dict(list(base_config.items()) + list(config.items()))
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_config(cls, config):
|
|
94
|
+
return cls(
|
|
95
|
+
input_shape=config["input_shape"],
|
|
96
|
+
num_classes=config["num_classes"],
|
|
97
|
+
units=config["units"],
|
|
98
|
+
activation=config["activation"],
|
|
99
|
+
)
|
|
71
100
|
|
|
72
101
|
|
|
73
102
|
def call_existing_code(
|
|
@@ -146,7 +175,13 @@ def build_model(hp, input_shape: None | int, num_classes: None | int) -> AutoCla
|
|
|
146
175
|
|
|
147
176
|
|
|
148
177
|
def setup_model(
|
|
149
|
-
data: DataFrame,
|
|
178
|
+
data: DataFrame,
|
|
179
|
+
target: str,
|
|
180
|
+
epochs: int,
|
|
181
|
+
train_size: float = 0.7,
|
|
182
|
+
seed=None,
|
|
183
|
+
filepath: str = "./my_dir/best_model.keras",
|
|
184
|
+
**kwargs
|
|
150
185
|
) -> AutoClassifier:
|
|
151
186
|
"""Setup model for training and tuning.
|
|
152
187
|
|
|
@@ -162,6 +197,8 @@ def setup_model(
|
|
|
162
197
|
The proportion of the dataset to use for training.
|
|
163
198
|
seed : `Any` | `int`
|
|
164
199
|
The random seed to use for reproducibility.
|
|
200
|
+
filepath : `str`
|
|
201
|
+
The path to save the best model to.
|
|
165
202
|
|
|
166
203
|
Keyword Arguments:
|
|
167
204
|
----------
|
|
@@ -228,13 +265,13 @@ def setup_model(
|
|
|
228
265
|
best_model = models[0]
|
|
229
266
|
|
|
230
267
|
# save model
|
|
231
|
-
best_model.save(
|
|
268
|
+
best_model.save(filepath)
|
|
232
269
|
|
|
233
270
|
if verbose:
|
|
234
271
|
tuner.results_summary()
|
|
235
272
|
else:
|
|
236
273
|
# Load the best model from the directory
|
|
237
|
-
best_model = tf.keras.models.load_model(
|
|
274
|
+
best_model = tf.keras.models.load_model(filepath)
|
|
238
275
|
|
|
239
276
|
return best_model
|
|
240
277
|
|
|
@@ -815,7 +815,10 @@ class DataFrameEncoder:
|
|
|
815
815
|
self._df[i] = self._df[i].apply(
|
|
816
816
|
self._code_transformation_to, dictionary_list=encode_dict
|
|
817
817
|
)
|
|
818
|
-
|
|
818
|
+
if len(self._df[i].unique()) > 1:
|
|
819
|
+
median_value = len(self._df[i].unique()) // 2
|
|
820
|
+
else:
|
|
821
|
+
median_value = 1.0
|
|
819
822
|
if norm_method == "median":
|
|
820
823
|
self._df[i] = self._df[i].astype("float64")
|
|
821
824
|
self._df[i] = self._df[i] / median_value
|
|
@@ -842,6 +845,8 @@ class DataFrameEncoder:
|
|
|
842
845
|
print("Configuration detected")
|
|
843
846
|
if len(self.median_list) == len(self._encode_columns):
|
|
844
847
|
median_mode = True
|
|
848
|
+
else:
|
|
849
|
+
median_mode = False
|
|
845
850
|
for num, colname in enumerate(self._encode_columns):
|
|
846
851
|
if self._df[colname].dtype == "object":
|
|
847
852
|
encode_dict = self.encoding_list[num]
|
|
@@ -859,6 +864,8 @@ class DataFrameEncoder:
|
|
|
859
864
|
df_decoded = self._df.copy()
|
|
860
865
|
if len(self.median_list) == len(self._encode_columns):
|
|
861
866
|
median_mode = True
|
|
867
|
+
else:
|
|
868
|
+
median_mode = False
|
|
862
869
|
try:
|
|
863
870
|
number_of_columns = len(self.decoding_list[j])
|
|
864
871
|
for i in self._encode_columns:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: likelihood
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.17
|
|
4
4
|
Summary: A package that performs the maximum likelihood algorithm.
|
|
5
5
|
Home-page: https://github.com/jzsmoreno/likelihood/
|
|
6
6
|
Author: J. A. Moreno-Guerra
|
|
@@ -30,6 +30,7 @@ Requires-Dist: networkx; extra == "full"
|
|
|
30
30
|
Requires-Dist: pyvis; extra == "full"
|
|
31
31
|
Requires-Dist: tensorflow; extra == "full"
|
|
32
32
|
Requires-Dist: keras-tuner; extra == "full"
|
|
33
|
+
Requires-Dist: scikit-learn; extra == "full"
|
|
33
34
|
|
|
34
35
|

|
|
35
36
|
|
|
@@ -31,7 +31,7 @@ setuptools.setup(
|
|
|
31
31
|
packages=setuptools.find_packages(),
|
|
32
32
|
install_requires=install_requires,
|
|
33
33
|
extras_require={
|
|
34
|
-
"full": ["networkx", "pyvis", "tensorflow", "keras-tuner"],
|
|
34
|
+
"full": ["networkx", "pyvis", "tensorflow", "keras-tuner", "scikit-learn"],
|
|
35
35
|
},
|
|
36
36
|
classifiers=[
|
|
37
37
|
"Programming Language :: Python :: 3",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|