likelihood 1.5.7__tar.gz → 1.5.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {likelihood-1.5.7 → likelihood-1.5.8}/PKG-INFO +1 -1
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/deep/autoencoders.py +50 -27
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/deep/gan.py +3 -3
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/deep/predictor.py +9 -8
- likelihood-1.5.8/likelihood/tools/cat_embed.py +213 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood.egg-info/PKG-INFO +1 -1
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood.egg-info/SOURCES.txt +1 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/LICENSE +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/README.md +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/__init__.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/graph/__init__.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/graph/graph.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/graph/nn.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/main.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/__init__.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/deep/__init__.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/hmm.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/regression.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/simulation.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/models/utils.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/__init__.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/figures.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/impute.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/models_tools.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/numeric_tools.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood/tools/tools.py +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood.egg-info/dependency_links.txt +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood.egg-info/requires.txt +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/likelihood.egg-info/top_level.txt +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/setup.cfg +0 -0
- {likelihood-1.5.7 → likelihood-1.5.8}/setup.py +0 -0
|
@@ -277,7 +277,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
277
277
|
activation=self.activation,
|
|
278
278
|
kernel_regularizer=l2(self.l2_reg),
|
|
279
279
|
),
|
|
280
|
-
]
|
|
280
|
+
],
|
|
281
|
+
name="encoder",
|
|
281
282
|
)
|
|
282
283
|
if not self.encoder
|
|
283
284
|
else self.encoder
|
|
@@ -296,7 +297,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
296
297
|
activation=self.activation,
|
|
297
298
|
kernel_regularizer=l2(self.l2_reg),
|
|
298
299
|
),
|
|
299
|
-
]
|
|
300
|
+
],
|
|
301
|
+
name="decoder",
|
|
300
302
|
)
|
|
301
303
|
if not self.decoder
|
|
302
304
|
else self.decoder
|
|
@@ -326,7 +328,7 @@ class AutoClassifier(tf.keras.Model):
|
|
|
326
328
|
log_var = tf.keras.layers.Lambda(lambda x: x + 1e-7)(log_var)
|
|
327
329
|
|
|
328
330
|
self.encoder = (
|
|
329
|
-
tf.keras.Model(inputs, [mean, log_var], name="
|
|
331
|
+
tf.keras.Model(inputs, [mean, log_var], name="vae_encoder")
|
|
330
332
|
if not self.encoder
|
|
331
333
|
else self.encoder
|
|
332
334
|
)
|
|
@@ -345,7 +347,8 @@ class AutoClassifier(tf.keras.Model):
|
|
|
345
347
|
),
|
|
346
348
|
tf.keras.layers.BatchNormalization(),
|
|
347
349
|
tf.keras.layers.Activation(self.activation),
|
|
348
|
-
]
|
|
350
|
+
],
|
|
351
|
+
name="vae_decoder",
|
|
349
352
|
)
|
|
350
353
|
if not self.decoder
|
|
351
354
|
else self.decoder
|
|
@@ -366,13 +369,7 @@ class AutoClassifier(tf.keras.Model):
|
|
|
366
369
|
)
|
|
367
370
|
if self.dropout:
|
|
368
371
|
self.classifier.add(tf.keras.layers.Dropout(self.dropout))
|
|
369
|
-
|
|
370
|
-
tf.keras.layers.Dense(
|
|
371
|
-
units=self.num_classes,
|
|
372
|
-
activation=self.classifier_activation,
|
|
373
|
-
kernel_regularizer=l2(self.l2_reg),
|
|
374
|
-
)
|
|
375
|
-
)
|
|
372
|
+
|
|
376
373
|
elif self.lora_mode:
|
|
377
374
|
for _ in range(self.num_layers - 1):
|
|
378
375
|
self.classifier.add(
|
|
@@ -381,21 +378,14 @@ class AutoClassifier(tf.keras.Model):
|
|
|
381
378
|
self.classifier.add(tf.keras.layers.Activation(self.activation))
|
|
382
379
|
if self.dropout:
|
|
383
380
|
self.classifier.add(tf.keras.layers.Dropout(self.dropout))
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
)
|
|
390
|
-
)
|
|
391
|
-
else:
|
|
392
|
-
self.classifier.add(
|
|
393
|
-
tf.keras.layers.Dense(
|
|
394
|
-
units=self.num_classes,
|
|
395
|
-
activation=self.classifier_activation,
|
|
396
|
-
kernel_regularizer=l2(self.l2_reg),
|
|
397
|
-
)
|
|
381
|
+
|
|
382
|
+
self.classifier.add(
|
|
383
|
+
tf.keras.layers.Dense(
|
|
384
|
+
units=self.num_classes,
|
|
385
|
+
activation=self.classifier_activation,
|
|
386
|
+
kernel_regularizer=l2(self.l2_reg),
|
|
398
387
|
)
|
|
388
|
+
)
|
|
399
389
|
|
|
400
390
|
def train_encoder_decoder(
|
|
401
391
|
self, data, epochs, batch_size, validation_split=0.2, patience=10, **kwargs
|
|
@@ -610,6 +600,13 @@ def call_existing_code(
|
|
|
610
600
|
num_layers : `int`
|
|
611
601
|
The number of hidden layers in the classifier. Default is 1.
|
|
612
602
|
|
|
603
|
+
Keyword Arguments:
|
|
604
|
+
----------
|
|
605
|
+
vae_mode : `bool`
|
|
606
|
+
Whether to use variational autoencoder mode. Default is False.
|
|
607
|
+
vae_units : `int`
|
|
608
|
+
The number of units in the variational autoencoder. Default is 2.
|
|
609
|
+
|
|
613
610
|
Returns
|
|
614
611
|
-------
|
|
615
612
|
`AutoClassifier`
|
|
@@ -617,6 +614,8 @@ def call_existing_code(
|
|
|
617
614
|
"""
|
|
618
615
|
dropout = kwargs.get("dropout", None)
|
|
619
616
|
l2_reg = kwargs.get("l2_reg", 0.0)
|
|
617
|
+
vae_mode = kwargs.get("vae_mode", False)
|
|
618
|
+
vae_units = kwargs.get("vae_units", 2)
|
|
620
619
|
model = AutoClassifier(
|
|
621
620
|
input_shape_parm=input_shape_parm,
|
|
622
621
|
num_classes=num_classes,
|
|
@@ -625,6 +624,8 @@ def call_existing_code(
|
|
|
625
624
|
num_layers=num_layers,
|
|
626
625
|
dropout=dropout,
|
|
627
626
|
l2_reg=l2_reg,
|
|
627
|
+
vae_mode=vae_mode,
|
|
628
|
+
vae_units=vae_units,
|
|
628
629
|
)
|
|
629
630
|
model.compile(
|
|
630
631
|
optimizer=optimizer,
|
|
@@ -731,6 +732,24 @@ def build_model(
|
|
|
731
732
|
else hyperparameters["l2_reg"]
|
|
732
733
|
)
|
|
733
734
|
)
|
|
735
|
+
vae_mode = (
|
|
736
|
+
hp.Choice("vae_mode", [True, False])
|
|
737
|
+
if "vae_mode" not in hyperparameters_keys
|
|
738
|
+
else hyperparameters["vae_mode"]
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
try:
|
|
742
|
+
vae_units = (
|
|
743
|
+
hp.Int("vae_units", min_value=2, max_value=10, step=1)
|
|
744
|
+
if ("vae_units" not in hyperparameters_keys) and vae_mode
|
|
745
|
+
else (
|
|
746
|
+
hp.Choice("vae_units", hyperparameters["vae_units"])
|
|
747
|
+
if isinstance(hyperparameters["vae_units"], list)
|
|
748
|
+
else hyperparameters["vae_units"]
|
|
749
|
+
)
|
|
750
|
+
)
|
|
751
|
+
except KeyError:
|
|
752
|
+
vae_units = None
|
|
734
753
|
|
|
735
754
|
model = call_existing_code(
|
|
736
755
|
units=units,
|
|
@@ -742,6 +761,8 @@ def build_model(
|
|
|
742
761
|
num_layers=num_layers,
|
|
743
762
|
dropout=dropout,
|
|
744
763
|
l2_reg=l2_reg,
|
|
764
|
+
vae_mode=vae_mode,
|
|
765
|
+
vae_units=vae_units,
|
|
745
766
|
)
|
|
746
767
|
return model
|
|
747
768
|
|
|
@@ -876,6 +897,8 @@ def setup_model(
|
|
|
876
897
|
tuner.results_summary()
|
|
877
898
|
else:
|
|
878
899
|
best_model = tf.keras.models.load_model(filepath)
|
|
879
|
-
|
|
880
900
|
best_hps = tuner.get_best_hyperparameters(1)[0].values
|
|
881
|
-
|
|
901
|
+
vae_mode = best_hps.get("vae_mode", hyperparameters.get("vae_mode", False))
|
|
902
|
+
best_hps["vae_units"] = None if not vae_mode else best_hps["vae_units"]
|
|
903
|
+
|
|
904
|
+
return best_model, pd.DataFrame(best_hps, index=["Value"]).dropna(axis=1)
|
|
@@ -41,7 +41,7 @@ class GANRegressor(tf.keras.Model):
|
|
|
41
41
|
self.build(dummy_input.shape)
|
|
42
42
|
|
|
43
43
|
def build(self, input_shape):
|
|
44
|
-
self.gan = tf.keras.models.Sequential([self.generator, self.discriminator])
|
|
44
|
+
self.gan = tf.keras.models.Sequential([self.generator, self.discriminator], name="gan")
|
|
45
45
|
|
|
46
46
|
self.generator.compile(
|
|
47
47
|
optimizer=self.optimizer,
|
|
@@ -57,7 +57,7 @@ class GANRegressor(tf.keras.Model):
|
|
|
57
57
|
super(GANRegressor, self).build(input_shape)
|
|
58
58
|
|
|
59
59
|
def _build_generator(self):
|
|
60
|
-
generator = tf.keras.Sequential()
|
|
60
|
+
generator = tf.keras.Sequential(name="generator")
|
|
61
61
|
generator.add(
|
|
62
62
|
tf.keras.layers.Dense(
|
|
63
63
|
self.num_neurons,
|
|
@@ -78,7 +78,7 @@ class GANRegressor(tf.keras.Model):
|
|
|
78
78
|
return generator
|
|
79
79
|
|
|
80
80
|
def _build_discriminator(self):
|
|
81
|
-
discriminator = tf.keras.Sequential()
|
|
81
|
+
discriminator = tf.keras.Sequential(name="discriminator")
|
|
82
82
|
for _ in range(self.depth):
|
|
83
83
|
discriminator.add(
|
|
84
84
|
tf.keras.layers.Dense(
|
|
@@ -109,15 +109,16 @@ class GetInsights:
|
|
|
109
109
|
"in the model's transformation.</p>"
|
|
110
110
|
)
|
|
111
111
|
)
|
|
112
|
-
self.
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
112
|
+
if not self.model.encoder.name.startswith("vae"):
|
|
113
|
+
self.viz_encoder_decoder_graphs(threshold_factor=threshold_factor, top_k=top_k)
|
|
114
|
+
|
|
115
|
+
display(HTML("<h2 style='margin-top:30px;'>🧠 Classifier Layer Graphs</h2>"))
|
|
116
|
+
display(
|
|
117
|
+
HTML(
|
|
118
|
+
"<p>This visualization shows how features propagate through each dense layer in the classifier. "
|
|
119
|
+
"Only the strongest weighted connections are shown to highlight influential paths through the network.</p>"
|
|
120
|
+
)
|
|
119
121
|
)
|
|
120
|
-
)
|
|
121
122
|
self.viz_classifier_graphs(threshold_factor=threshold_factor, top_k=top_k)
|
|
122
123
|
|
|
123
124
|
display(HTML("<h2 style='margin-top:30px;'>📈 Statistical Summary</h2>"))
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
9
|
+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
|
|
10
|
+
import tensorflow as tf
|
|
11
|
+
from pandas.core.frame import DataFrame
|
|
12
|
+
from sklearn.preprocessing import LabelEncoder
|
|
13
|
+
|
|
14
|
+
tf.get_logger().setLevel("ERROR")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CategoricalEmbedder:
|
|
18
|
+
def __init__(self, embedding_dim=32):
|
|
19
|
+
self.embedding_dim = embedding_dim
|
|
20
|
+
self.label_encoders = {}
|
|
21
|
+
self.embeddings = {}
|
|
22
|
+
|
|
23
|
+
def fit(self, df: DataFrame, categorical_cols: List):
|
|
24
|
+
"""
|
|
25
|
+
Fit the embeddings on the given data.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
df : `DataFrame`
|
|
30
|
+
Pandas DataFrame containing the tabular data.
|
|
31
|
+
categorical_cols : `List`
|
|
32
|
+
List of column names representing categorical features.
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
`None`
|
|
37
|
+
"""
|
|
38
|
+
df_processed = df.copy()
|
|
39
|
+
for col in categorical_cols:
|
|
40
|
+
if col not in df_processed.columns:
|
|
41
|
+
raise ValueError(f"Column {col} not found in DataFrame")
|
|
42
|
+
|
|
43
|
+
for col in categorical_cols:
|
|
44
|
+
mode_val = df_processed[col].mode()
|
|
45
|
+
if not mode_val.empty:
|
|
46
|
+
df_processed[col] = df_processed[col].fillna(mode_val[0])
|
|
47
|
+
|
|
48
|
+
for col in categorical_cols:
|
|
49
|
+
le = LabelEncoder()
|
|
50
|
+
df_processed[col] = le.fit_transform(df_processed[col])
|
|
51
|
+
self.label_encoders[col] = le
|
|
52
|
+
|
|
53
|
+
vocab_size = len(le.classes_)
|
|
54
|
+
embedding_matrix = np.random.rand(vocab_size, self.embedding_dim)
|
|
55
|
+
self.embeddings[col] = tf.Variable(embedding_matrix, dtype=tf.float32)
|
|
56
|
+
|
|
57
|
+
def transform(self, df: DataFrame, categorical_cols: List[str]):
|
|
58
|
+
"""
|
|
59
|
+
Transform the data using the fitted embeddings.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
df : `DataFrame`
|
|
64
|
+
Pandas DataFrame containing the tabular data.
|
|
65
|
+
categorical_cols : `List[str]`
|
|
66
|
+
List of column names representing categorical features.
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
Transformed Pandas DataFrame with original columns except `categorical_cols` replaced by their embedding representations.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
df_processed = df.copy()
|
|
74
|
+
|
|
75
|
+
for col in categorical_cols:
|
|
76
|
+
if col not in self.label_encoders:
|
|
77
|
+
raise ValueError(
|
|
78
|
+
f"Column {col} has not been fitted. Please call fit() on this column first."
|
|
79
|
+
)
|
|
80
|
+
mode_val = df_processed[col].mode()
|
|
81
|
+
if not mode_val.empty:
|
|
82
|
+
df_processed[col] = df_processed[col].fillna(mode_val[0])
|
|
83
|
+
le = self.label_encoders[col]
|
|
84
|
+
df_processed[col] = le.transform(df_processed[col])
|
|
85
|
+
|
|
86
|
+
for col in categorical_cols:
|
|
87
|
+
indices_tensor = tf.constant(df_processed[col], dtype=tf.int32)
|
|
88
|
+
embedding_layer = tf.nn.embedding_lookup(
|
|
89
|
+
params=self.embeddings[col], ids=indices_tensor
|
|
90
|
+
)
|
|
91
|
+
if len(embedding_layer.shape) == 1:
|
|
92
|
+
embedding_layer = tf.expand_dims(embedding_layer, axis=0)
|
|
93
|
+
|
|
94
|
+
for i in range(self.embedding_dim):
|
|
95
|
+
df_processed[f"{col}_embed_{i}"] = embedding_layer[:, i]
|
|
96
|
+
df_processed.drop(columns=[col], inplace=True)
|
|
97
|
+
|
|
98
|
+
return df_processed
|
|
99
|
+
|
|
100
|
+
def inverse_transform(self, df: pd.DataFrame, categorical_cols: List[str]):
|
|
101
|
+
"""
|
|
102
|
+
Inverse transform the data using the fitted embeddings.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
df : `DataFrame`
|
|
107
|
+
Pandas DataFrame containing the tabular data with embedded representations.
|
|
108
|
+
categorical_cols : `List[str]`
|
|
109
|
+
List of column names representing categorical features.
|
|
110
|
+
|
|
111
|
+
Returns
|
|
112
|
+
-------
|
|
113
|
+
Transformed Pandas DataFrame with original columns replaced by their categorical labels.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
df_processed = df.copy()
|
|
117
|
+
|
|
118
|
+
for col in categorical_cols:
|
|
119
|
+
if col not in self.label_encoders:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
f"Column {col} has not been fitted. Please call fit() on this column first."
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
embedding_matrix = self.embeddings[col].numpy()
|
|
125
|
+
label_encoder = self.label_encoders[col]
|
|
126
|
+
|
|
127
|
+
embedded_columns = [f"{col}_embed_{i}" for i in range(self.embedding_dim)]
|
|
128
|
+
embeddings = df_processed[embedded_columns].values
|
|
129
|
+
|
|
130
|
+
distances = np.linalg.norm(embedding_matrix - embeddings[:, np.newaxis], axis=2)
|
|
131
|
+
original_indices = np.argmin(distances, axis=1)
|
|
132
|
+
original_labels = label_encoder.inverse_transform(original_indices)
|
|
133
|
+
|
|
134
|
+
df_processed[col] = original_labels
|
|
135
|
+
df_processed.drop(columns=embedded_columns, inplace=True)
|
|
136
|
+
|
|
137
|
+
return df_processed
|
|
138
|
+
|
|
139
|
+
def save_embeddings(self, path: str):
|
|
140
|
+
"""
|
|
141
|
+
Save the embeddings to a directory.
|
|
142
|
+
|
|
143
|
+
Parameters
|
|
144
|
+
----------
|
|
145
|
+
path : `str`
|
|
146
|
+
Path to the directory where embeddings will be saved.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
os.makedirs(path, exist_ok=True)
|
|
150
|
+
for col, embedding in self.embeddings.items():
|
|
151
|
+
np.save(os.path.join(path, f"{col}_embedding.npy"), embedding.numpy())
|
|
152
|
+
|
|
153
|
+
def load_embeddings(self, path: str):
|
|
154
|
+
"""
|
|
155
|
+
Load the embeddings from a directory.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
path : `str`
|
|
160
|
+
Path to the directory where embeddings are saved.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
for col in self.label_encoders.keys():
|
|
164
|
+
embedding_path = os.path.join(path, f"{col}_embedding.npy")
|
|
165
|
+
if not os.path.exists(embedding_path):
|
|
166
|
+
raise FileNotFoundError(f"Embedding file {embedding_path} not found.")
|
|
167
|
+
embedding_matrix = np.load(embedding_path)
|
|
168
|
+
self.embeddings[col] = tf.Variable(embedding_matrix, dtype=tf.float32)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
if __name__ == "__main__":
|
|
172
|
+
data = {
|
|
173
|
+
"color": ["red", "blue", None, "green", "blue"],
|
|
174
|
+
"size": ["S", "M", "XL", "XS", None],
|
|
175
|
+
"price": [10.99, 25.50, 30.00, 8.75, 12.25],
|
|
176
|
+
}
|
|
177
|
+
df = pd.DataFrame(data)
|
|
178
|
+
|
|
179
|
+
# Initialize the embedder
|
|
180
|
+
embedder = CategoricalEmbedder(embedding_dim=3)
|
|
181
|
+
|
|
182
|
+
# Fit the embeddings on the data
|
|
183
|
+
embedder.fit(df, categorical_cols=["color", "size"])
|
|
184
|
+
|
|
185
|
+
# Transform the data using the fitted embeddings
|
|
186
|
+
processed_df = embedder.transform(df, categorical_cols=["color", "size"])
|
|
187
|
+
|
|
188
|
+
print("Processed DataFrame:")
|
|
189
|
+
print(processed_df.head())
|
|
190
|
+
|
|
191
|
+
# Save the embeddings to disk
|
|
192
|
+
embedder.save_embeddings("./embeddings")
|
|
193
|
+
|
|
194
|
+
# Load the embeddings from disk
|
|
195
|
+
new_embedder = CategoricalEmbedder(embedding_dim=3)
|
|
196
|
+
new_embedder.label_encoders = (
|
|
197
|
+
embedder.label_encoders
|
|
198
|
+
) # Assuming label encodings are consistent across runs
|
|
199
|
+
new_embedder.load_embeddings("./embeddings")
|
|
200
|
+
|
|
201
|
+
# Transform the data using the loaded embeddings
|
|
202
|
+
processed_df_loaded = new_embedder.transform(df, categorical_cols=["color", "size"])
|
|
203
|
+
print("\nProcessed DataFrame with Loaded Embeddings:")
|
|
204
|
+
print(processed_df_loaded.head())
|
|
205
|
+
|
|
206
|
+
# Inverse transform the data
|
|
207
|
+
df_loaded = new_embedder.inverse_transform(
|
|
208
|
+
processed_df_loaded, categorical_cols=["color", "size"]
|
|
209
|
+
)
|
|
210
|
+
print("\nOriginal DataFrame:")
|
|
211
|
+
print(df.head())
|
|
212
|
+
print("\nProcessed DataFrame with Inverse Transform:")
|
|
213
|
+
print(df_loaded.head())
|
|
@@ -21,6 +21,7 @@ likelihood/models/deep/autoencoders.py
|
|
|
21
21
|
likelihood/models/deep/gan.py
|
|
22
22
|
likelihood/models/deep/predictor.py
|
|
23
23
|
likelihood/tools/__init__.py
|
|
24
|
+
likelihood/tools/cat_embed.py
|
|
24
25
|
likelihood/tools/figures.py
|
|
25
26
|
likelihood/tools/impute.py
|
|
26
27
|
likelihood/tools/models_tools.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|