pg-sui 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pg-sui might be problematic. Click here for more details.
- {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/METADATA +51 -70
- pg_sui-1.6.8.dist-info/RECORD +78 -0
- {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/WHEEL +1 -1
- pg_sui-1.6.8.dist-info/entry_points.txt +4 -0
- pg_sui-1.6.8.dist-info/top_level.txt +1 -0
- pgsui/__init__.py +35 -54
- pgsui/_version.py +34 -0
- pgsui/cli.py +635 -0
- pgsui/data_processing/config.py +576 -0
- pgsui/data_processing/containers.py +1782 -0
- pgsui/data_processing/transformers.py +121 -1103
- pgsui/electron/app/__main__.py +5 -0
- pgsui/electron/app/icons/icons/1024x1024.png +0 -0
- pgsui/electron/app/icons/icons/128x128.png +0 -0
- pgsui/electron/app/icons/icons/16x16.png +0 -0
- pgsui/electron/app/icons/icons/24x24.png +0 -0
- pgsui/electron/app/icons/icons/256x256.png +0 -0
- pgsui/electron/app/icons/icons/32x32.png +0 -0
- pgsui/electron/app/icons/icons/48x48.png +0 -0
- pgsui/electron/app/icons/icons/512x512.png +0 -0
- pgsui/electron/app/icons/icons/64x64.png +0 -0
- pgsui/electron/app/icons/icons/icon.icns +0 -0
- pgsui/electron/app/icons/icons/icon.ico +0 -0
- pgsui/electron/app/main.js +189 -0
- pgsui/electron/app/package-lock.json +6893 -0
- pgsui/electron/app/package.json +50 -0
- pgsui/electron/app/preload.js +15 -0
- pgsui/electron/app/server.py +146 -0
- pgsui/electron/app/ui/logo.png +0 -0
- pgsui/electron/app/ui/renderer.js +130 -0
- pgsui/electron/app/ui/styles.css +59 -0
- pgsui/electron/app/ui/ui_shim.js +72 -0
- pgsui/electron/bootstrap.py +43 -0
- pgsui/electron/launch.py +59 -0
- pgsui/electron/package.json +14 -0
- pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
- pgsui/impute/deterministic/imputers/allele_freq.py +691 -0
- pgsui/impute/deterministic/imputers/mode.py +679 -0
- pgsui/impute/deterministic/imputers/nmf.py +221 -0
- pgsui/impute/deterministic/imputers/phylo.py +971 -0
- pgsui/impute/deterministic/imputers/ref_allele.py +530 -0
- pgsui/impute/supervised/base.py +339 -0
- pgsui/impute/supervised/imputers/hist_gradient_boosting.py +293 -0
- pgsui/impute/supervised/imputers/random_forest.py +287 -0
- pgsui/impute/unsupervised/base.py +924 -0
- pgsui/impute/unsupervised/callbacks.py +89 -263
- pgsui/impute/unsupervised/imputers/autoencoder.py +972 -0
- pgsui/impute/unsupervised/imputers/nlpca.py +1264 -0
- pgsui/impute/unsupervised/imputers/ubp.py +1288 -0
- pgsui/impute/unsupervised/imputers/vae.py +957 -0
- pgsui/impute/unsupervised/loss_functions.py +158 -0
- pgsui/impute/unsupervised/models/autoencoder_model.py +208 -558
- pgsui/impute/unsupervised/models/nlpca_model.py +149 -468
- pgsui/impute/unsupervised/models/ubp_model.py +198 -1317
- pgsui/impute/unsupervised/models/vae_model.py +259 -618
- pgsui/impute/unsupervised/nn_scorers.py +215 -0
- pgsui/utils/classification_viz.py +591 -0
- pgsui/utils/misc.py +35 -480
- pgsui/utils/plotting.py +514 -824
- pgsui/utils/scorers.py +212 -438
- pg_sui-1.0.2.1.dist-info/RECORD +0 -75
- pg_sui-1.0.2.1.dist-info/top_level.txt +0 -3
- pgsui/example_data/phylip_files/test_n10.phy +0 -118
- pgsui/example_data/phylip_files/test_n100.phy +0 -118
- pgsui/example_data/phylip_files/test_n2.phy +0 -118
- pgsui/example_data/phylip_files/test_n500.phy +0 -118
- pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
- pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
- pgsui/example_data/trees/test.iqtree +0 -376
- pgsui/example_data/trees/test.qmat +0 -5
- pgsui/example_data/trees/test.rate +0 -2033
- pgsui/example_data/trees/test.tre +0 -1
- pgsui/example_data/trees/test_n10.rate +0 -19
- pgsui/example_data/trees/test_n100.rate +0 -109
- pgsui/example_data/trees/test_n500.rate +0 -509
- pgsui/example_data/trees/test_siterates.txt +0 -2024
- pgsui/example_data/trees/test_siterates_n10.txt +0 -10
- pgsui/example_data/trees/test_siterates_n100.txt +0 -100
- pgsui/example_data/trees/test_siterates_n500.txt +0 -500
- pgsui/example_data/vcf_files/test.vcf +0 -244
- pgsui/example_data/vcf_files/test.vcf.gz +0 -0
- pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
- pgsui/impute/estimators.py +0 -735
- pgsui/impute/impute.py +0 -1486
- pgsui/impute/simple_imputers.py +0 -1439
- pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -785
- pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1027
- pgsui/impute/unsupervised/keras_classifiers.py +0 -702
- pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
- pgsui/impute/unsupervised/neural_network_imputers.py +0 -1424
- pgsui/impute/unsupervised/neural_network_methods.py +0 -1549
- pgsui/pg_sui.py +0 -261
- pgsui/utils/sequence_tools.py +0 -407
- simulation/sim_benchmarks.py +0 -333
- simulation/sim_treeparams.py +0 -475
- test/__init__.py +0 -0
- test/pg_sui_simtest.py +0 -215
- test/pg_sui_testing.py +0 -523
- test/test.py +0 -297
- test/test_pgsui.py +0 -374
- test/test_tkc.py +0 -214
- {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info/licenses}/LICENSE +0 -0
- /pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
- /pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
- {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
|
@@ -1,702 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import tensorflow as tf
|
|
3
|
-
|
|
4
|
-
from scikeras.wrappers import KerasClassifier
|
|
5
|
-
|
|
6
|
-
try:
|
|
7
|
-
from ...utils.scorers import Scorers
|
|
8
|
-
from .models.autoencoder_model import AutoEncoderModel
|
|
9
|
-
from .models.nlpca_model import NLPCAModel
|
|
10
|
-
from .models.ubp_model import UBPPhase1, UBPPhase2, UBPPhase3
|
|
11
|
-
from .models.vae_model import VAEModel
|
|
12
|
-
from ...data_processing.transformers import (
|
|
13
|
-
MLPTargetTransformer,
|
|
14
|
-
UBPInputTransformer,
|
|
15
|
-
AutoEncoderFeatureTransformer,
|
|
16
|
-
)
|
|
17
|
-
except (ModuleNotFoundError, ValueError, ImportError):
|
|
18
|
-
from utils.scorers import Scorers
|
|
19
|
-
from impute.unsupervised.neural_network_methods import NeuralNetworkMethods
|
|
20
|
-
from impute.unsupervised.models.vae_model import (
|
|
21
|
-
VAEModel,
|
|
22
|
-
)
|
|
23
|
-
from impute.unsupervised.models.autoencoder_model import AutoEncoderModel
|
|
24
|
-
from impute.unsupervised.models.nlpca_model import NLPCAModel
|
|
25
|
-
from impute.unsupervised.models.ubp_model import (
|
|
26
|
-
UBPPhase1,
|
|
27
|
-
UBPPhase2,
|
|
28
|
-
UBPPhase3,
|
|
29
|
-
)
|
|
30
|
-
from data_processing.transformers import (
|
|
31
|
-
MLPTargetTransformer,
|
|
32
|
-
UBPInputTransformer,
|
|
33
|
-
AutoEncoderFeatureTransformer,
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class SAEClassifier(KerasClassifier):
|
|
38
|
-
"""Estimator to be used with the scikit-learn API.
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
output_shape (int): Number of units in model output layer. Defaults to None.
|
|
42
|
-
|
|
43
|
-
weights_initializer (str): Kernel initializer to use for model weights. Defaults to "glorot_normal".
|
|
44
|
-
|
|
45
|
-
hidden_layer_sizes (List[int]): Output unit size for each hidden layer. Should be list of length num_hidden_layers. Defaults to None.
|
|
46
|
-
|
|
47
|
-
num_hidden_layers (int): Number of hidden layers to use. Defaults to 1.
|
|
48
|
-
|
|
49
|
-
hidden_activation (str): Hidden activation function to use. Defaults to "elu".
|
|
50
|
-
|
|
51
|
-
l1_penalty (float): L1 regularization penalty to use to reduce overfitting. Defautls to 0.01.
|
|
52
|
-
|
|
53
|
-
l2_penalty (float): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
|
|
54
|
-
|
|
55
|
-
dropout_rate (float): Dropout rate for each hidden layer to reduce overfitting. Defaults to 0.2.
|
|
56
|
-
|
|
57
|
-
n_components (int): Number of components to use for input V. Defaults to 3.
|
|
58
|
-
|
|
59
|
-
num_classes (int, optional): Number of classes in y_train. 012-encoded data should have 3 classes. Defaults to 3.
|
|
60
|
-
|
|
61
|
-
kwargs (Any): Other keyword arguments to route to fit, compile, callbacks, etc. Should have the routing prefix (e.g., optimizer__learning_rate=0.01).
|
|
62
|
-
"""
|
|
63
|
-
|
|
64
|
-
def __init__(
|
|
65
|
-
self,
|
|
66
|
-
y=None,
|
|
67
|
-
output_shape=None,
|
|
68
|
-
weights_initializer="glorot_normal",
|
|
69
|
-
hidden_layer_sizes=None,
|
|
70
|
-
num_hidden_layers=1,
|
|
71
|
-
hidden_activation="elu",
|
|
72
|
-
l1_penalty=0.01,
|
|
73
|
-
l2_penalty=0.01,
|
|
74
|
-
dropout_rate=0.2,
|
|
75
|
-
n_components=3,
|
|
76
|
-
sample_weight=None,
|
|
77
|
-
missing_mask=None,
|
|
78
|
-
num_classes=4,
|
|
79
|
-
activate="sigmoid",
|
|
80
|
-
**kwargs,
|
|
81
|
-
):
|
|
82
|
-
super().__init__(**kwargs)
|
|
83
|
-
|
|
84
|
-
self.y = y
|
|
85
|
-
self.output_shape = output_shape
|
|
86
|
-
self.weights_initializer = weights_initializer
|
|
87
|
-
self.hidden_layer_sizes = hidden_layer_sizes
|
|
88
|
-
self.num_hidden_layers = num_hidden_layers
|
|
89
|
-
self.hidden_activation = hidden_activation
|
|
90
|
-
self.l1_penalty = l1_penalty
|
|
91
|
-
self.l2_penalty = l2_penalty
|
|
92
|
-
self.dropout_rate = dropout_rate
|
|
93
|
-
self.n_components = n_components
|
|
94
|
-
self.sample_weight = sample_weight
|
|
95
|
-
self.missing_mask = missing_mask
|
|
96
|
-
self.num_classes = num_classes
|
|
97
|
-
self.activate = activate
|
|
98
|
-
|
|
99
|
-
self.classes_ = np.arange(self.num_classes)
|
|
100
|
-
self.n_classes_ = self.num_classes
|
|
101
|
-
|
|
102
|
-
def _keras_build_fn(self, compile_kwargs):
|
|
103
|
-
"""Build model with custom parameters.
|
|
104
|
-
|
|
105
|
-
Args:
|
|
106
|
-
compile_kwargs (Dict[str, Any]): Dictionary with parameters: values. The parameters should be passed to the class constructor, but should be captured as kwargs. They should also have the routing prefix (e.g., optimizer__learning_rate=0.01). compile_kwargs will automatically be parsed from **kwargs by KerasClassifier and sent here.
|
|
107
|
-
|
|
108
|
-
Returns:
|
|
109
|
-
tf.keras.Model: Model instance. The chosen model depends on which phase is passed to the class constructor.
|
|
110
|
-
"""
|
|
111
|
-
|
|
112
|
-
######### REMOVING THIS LINE WILL BREAK THE MODEL!!!!! ########
|
|
113
|
-
self.classes_ = np.arange(self.num_classes)
|
|
114
|
-
|
|
115
|
-
model = AutoEncoderModel(
|
|
116
|
-
self.y,
|
|
117
|
-
output_shape=self.output_shape,
|
|
118
|
-
n_components=self.n_components,
|
|
119
|
-
weights_initializer=self.weights_initializer,
|
|
120
|
-
hidden_layer_sizes=self.hidden_layer_sizes,
|
|
121
|
-
num_hidden_layers=self.num_hidden_layers,
|
|
122
|
-
hidden_activation=self.hidden_activation,
|
|
123
|
-
l1_penalty=self.l1_penalty,
|
|
124
|
-
l2_penalty=self.l2_penalty,
|
|
125
|
-
dropout_rate=self.dropout_rate,
|
|
126
|
-
sample_weight=self.sample_weight,
|
|
127
|
-
missing_mask=self.missing_mask,
|
|
128
|
-
num_classes=self.num_classes,
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
model.compile(
|
|
132
|
-
optimizer=compile_kwargs["optimizer"],
|
|
133
|
-
loss=compile_kwargs["loss"],
|
|
134
|
-
metrics=compile_kwargs["metrics"],
|
|
135
|
-
run_eagerly=False,
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
return model
|
|
139
|
-
|
|
140
|
-
@staticmethod
|
|
141
|
-
def scorer(y_true, y_pred, **kwargs):
|
|
142
|
-
"""Scorer for grid search that masks missing data.
|
|
143
|
-
|
|
144
|
-
To use this, do not specify a scoring metric when initializing the grid search object. By default if the scoring_metric option is left as None, then it uses the estimator's scoring metric (this one).
|
|
145
|
-
|
|
146
|
-
Args:
|
|
147
|
-
y_true (numpy.ndarray): True target values input to fit().
|
|
148
|
-
y_pred (numpy.ndarray): Predicted target values from estimator. The predictions are modified by self.target_encoder().inverse_transform() before being sent here.
|
|
149
|
-
kwargs (Any): Other parameters sent to sklearn scoring metric. Supported options include missing_mask, scoring_metric, and testing.
|
|
150
|
-
|
|
151
|
-
Returns:
|
|
152
|
-
float: Calculated score.
|
|
153
|
-
"""
|
|
154
|
-
n_classes_ = kwargs.get("num_classes", 3)
|
|
155
|
-
classes_ = np.arange(n_classes_)
|
|
156
|
-
missing_mask = kwargs.get("missing_mask")
|
|
157
|
-
|
|
158
|
-
num_classes = kwargs.get("num_classes", 3)
|
|
159
|
-
testing = kwargs.get("testing", False)
|
|
160
|
-
|
|
161
|
-
scorers = Scorers()
|
|
162
|
-
|
|
163
|
-
return scorers.scorer(
|
|
164
|
-
y_true,
|
|
165
|
-
y_pred,
|
|
166
|
-
missing_mask=missing_mask,
|
|
167
|
-
num_classes=num_classes,
|
|
168
|
-
testing=testing,
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
@property
|
|
172
|
-
def feature_encoder(self):
|
|
173
|
-
"""Handles feature input, X, before training.
|
|
174
|
-
|
|
175
|
-
Returns:
|
|
176
|
-
MLPTargetTransformer: InputTransformer object that includes fit() and transform() methods to transform input before estimator fitting.
|
|
177
|
-
"""
|
|
178
|
-
return AutoEncoderFeatureTransformer(num_classes=self.num_classes)
|
|
179
|
-
|
|
180
|
-
@property
|
|
181
|
-
def target_encoder(self):
|
|
182
|
-
"""Handles target input and output, y_true and y_pred, both before and after training.
|
|
183
|
-
|
|
184
|
-
Returns:
|
|
185
|
-
NNOutputTransformer: NNOutputTransformer object that includes fit(), transform(), and inverse_transform() methods.
|
|
186
|
-
"""
|
|
187
|
-
return AutoEncoderFeatureTransformer(
|
|
188
|
-
num_classes=self.num_classes, activate=self.activate
|
|
189
|
-
)
|
|
190
|
-
|
|
191
|
-
def predict(self, X, **kwargs):
|
|
192
|
-
"""Returns predictions for the given test data.
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
X (Union[array-like, sparse matrix, dataframe] of shape (n_samples, n_features)): Training samples where n_samples is the number of samples and n_features is the number of features.
|
|
196
|
-
kwargs (Dict[str, Any]): Extra arguments to route to ``Model.predict``\.
|
|
197
|
-
|
|
198
|
-
Warnings:
|
|
199
|
-
Passing estimator parameters as keyword arguments (aka as ``**kwargs``) to ``predict`` is not supported by the Scikit-Learn API, and will be removed in a future version of SciKeras. These parameters can also be specified by prefixing ``predict__`` to a parameter at initialization (``BaseWrapper(..., fit__batch_size=32, predict__batch_size=1000)``) or by using ``set_params`` (``est.set_params(fit__batch_size=32, predict__batch_size=1000)``\).
|
|
200
|
-
|
|
201
|
-
Returns:
|
|
202
|
-
array-like: Predictions, of shape shape (n_samples,) or (n_samples, n_outputs).
|
|
203
|
-
|
|
204
|
-
Notes:
|
|
205
|
-
Had to override predict() here in order to do the __call__ with the refined input, V_latent.
|
|
206
|
-
"""
|
|
207
|
-
X_train = self.target_encoder_.transform(X)
|
|
208
|
-
y_pred = self.model_(X_train, training=False)
|
|
209
|
-
return self.target_encoder_.inverse_transform(y_pred)
|
|
210
|
-
|
|
211
|
-
def get_metadata(self):
|
|
212
|
-
"""Returns a dictionary of meta-parameters generated when this transformer was fitted.
|
|
213
|
-
|
|
214
|
-
Used by SciKeras to bind these parameters to the SciKeras estimator itself and make them available as inputs to the Keras model.
|
|
215
|
-
|
|
216
|
-
Returns:
|
|
217
|
-
Dict[str, Any]: Dictionary of meta-parameters generated when this transfromer was fitted.
|
|
218
|
-
"""
|
|
219
|
-
return {
|
|
220
|
-
"classes_": self.classes_,
|
|
221
|
-
"n_classes_": self.n_classes_,
|
|
222
|
-
"n_outputs_": self.n_outputs_,
|
|
223
|
-
"n_outputs_expected_": self.n_outputs_expected_,
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
class VAEClassifier(KerasClassifier):
|
|
228
|
-
"""Estimator to be used with the scikit-learn API and a keras model.
|
|
229
|
-
|
|
230
|
-
Args:
|
|
231
|
-
output_shape (int): Number of units in model output layer. Defaults to None.
|
|
232
|
-
|
|
233
|
-
weights_initializer (str, optional): Kernel initializer to use for model weights. Defaults to "glorot_normal".
|
|
234
|
-
|
|
235
|
-
hidden_layer_sizes (List[int]): Output unit size for each hidden layer. Should be list of length num_hidden_layers. Defaults to None.
|
|
236
|
-
|
|
237
|
-
num_hidden_layers (int, optional): Number of hidden layers to use. Defaults to 1.
|
|
238
|
-
|
|
239
|
-
hidden_activation (str, optional): Hidden activation function to use. Defaults to "elu".
|
|
240
|
-
|
|
241
|
-
l1_penalty (float, optional): L1 regularization penalty to use to reduce overfitting. Defautls to 0.01.
|
|
242
|
-
|
|
243
|
-
l2_penalty (float, optional): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
|
|
244
|
-
|
|
245
|
-
dropout_rate (float, optional): Dropout rate for each hidden layer to reduce overfitting. Defaults to 0.2.
|
|
246
|
-
|
|
247
|
-
kl_beta (float, optional): Kullback-Liebler divergence weight (beta) to apply to KL loss. 1.0 means unweighted, 0.0 means KL loss is not applied at all. Defaults to 1.0.
|
|
248
|
-
|
|
249
|
-
n_components (int, optional): Number of components to use for input V. Defaults to 3.
|
|
250
|
-
|
|
251
|
-
num_classes (int, optional): Number of classes in y_train. [A,G,C,T...IUPAC codes]-encoded data should have 10 classes. Defaults to 4.
|
|
252
|
-
|
|
253
|
-
activate (str or None, optional): If not None, then does the appropriate activation. Multilabel learning uses sigmoid activation, and multiclass uses softmax. If set to None, then the function assumes that the input has already been activated. Possible values include: {None, 'sigmoid', 'softmax'}. Defaults to None.
|
|
254
|
-
|
|
255
|
-
kwargs (Any): Other keyword arguments to route to fit, compile, callbacks, etc. Should have the routing prefix (e.g., optimizer__learning_rate=0.01).
|
|
256
|
-
"""
|
|
257
|
-
|
|
258
|
-
def __init__(
|
|
259
|
-
self,
|
|
260
|
-
output_shape=None,
|
|
261
|
-
weights_initializer="glorot_normal",
|
|
262
|
-
hidden_layer_sizes=None,
|
|
263
|
-
num_hidden_layers=1,
|
|
264
|
-
hidden_activation="elu",
|
|
265
|
-
l1_penalty=0.01,
|
|
266
|
-
l2_penalty=0.01,
|
|
267
|
-
dropout_rate=0.2,
|
|
268
|
-
kl_beta=1.0,
|
|
269
|
-
n_components=3,
|
|
270
|
-
num_classes=4,
|
|
271
|
-
sample_weight=None,
|
|
272
|
-
activate=None,
|
|
273
|
-
y=None,
|
|
274
|
-
missing_mask=None,
|
|
275
|
-
batch_size=None,
|
|
276
|
-
**kwargs,
|
|
277
|
-
):
|
|
278
|
-
super().__init__(**kwargs)
|
|
279
|
-
|
|
280
|
-
self.output_shape = output_shape
|
|
281
|
-
self.weights_initializer = weights_initializer
|
|
282
|
-
self.hidden_layer_sizes = hidden_layer_sizes
|
|
283
|
-
self.num_hidden_layers = num_hidden_layers
|
|
284
|
-
self.hidden_activation = hidden_activation
|
|
285
|
-
self.l1_penalty = l1_penalty
|
|
286
|
-
self.l2_penalty = l2_penalty
|
|
287
|
-
self.dropout_rate = dropout_rate
|
|
288
|
-
self.kl_beta = kl_beta
|
|
289
|
-
self.n_components = n_components
|
|
290
|
-
self.num_classes = num_classes
|
|
291
|
-
self.sample_weight = sample_weight
|
|
292
|
-
self.activate = activate
|
|
293
|
-
self.y = y
|
|
294
|
-
self.missing_mask = missing_mask
|
|
295
|
-
self.batch_size = batch_size
|
|
296
|
-
|
|
297
|
-
def _keras_build_fn(self, compile_kwargs):
|
|
298
|
-
"""Build model with custom parameters.
|
|
299
|
-
|
|
300
|
-
Args:
|
|
301
|
-
compile_kwargs (Dict[str, Any]): Dictionary with parameters: values. The parameters should be passed to the class constructor, but should be captured as kwargs. They should also have the routing prefix (e.g., optimizer__learning_rate=0.01). compile_kwargs will automatically be parsed from **kwargs by KerasClassifier and sent here.
|
|
302
|
-
|
|
303
|
-
Returns:
|
|
304
|
-
tf.keras.Model: Model instance. The chosen model depends on which phase is passed to the class constructor.
|
|
305
|
-
"""
|
|
306
|
-
|
|
307
|
-
######### REMOVING THIS LINE WILL BREAK THE MODEL!!!!! ########
|
|
308
|
-
self.classes_ = np.arange(self.num_classes)
|
|
309
|
-
|
|
310
|
-
model = VAEModel(
|
|
311
|
-
output_shape=self.output_shape,
|
|
312
|
-
n_components=self.n_components,
|
|
313
|
-
weights_initializer=self.weights_initializer,
|
|
314
|
-
hidden_layer_sizes=self.hidden_layer_sizes,
|
|
315
|
-
num_hidden_layers=self.num_hidden_layers,
|
|
316
|
-
hidden_activation=self.hidden_activation,
|
|
317
|
-
l1_penalty=self.l1_penalty,
|
|
318
|
-
l2_penalty=self.l2_penalty,
|
|
319
|
-
dropout_rate=self.dropout_rate,
|
|
320
|
-
kl_beta=self.kl_beta,
|
|
321
|
-
num_classes=self.num_classes,
|
|
322
|
-
sample_weight=self.sample_weight,
|
|
323
|
-
missing_mask=self.missing_mask,
|
|
324
|
-
batch_size=self.batch_size,
|
|
325
|
-
y=self.y,
|
|
326
|
-
final_activation=self.activate,
|
|
327
|
-
)
|
|
328
|
-
|
|
329
|
-
model.compile(
|
|
330
|
-
optimizer=compile_kwargs["optimizer"],
|
|
331
|
-
loss=compile_kwargs["loss"],
|
|
332
|
-
metrics=compile_kwargs["metrics"],
|
|
333
|
-
run_eagerly=compile_kwargs["run_eagerly"],
|
|
334
|
-
# sample_weight_mode="temporal",
|
|
335
|
-
)
|
|
336
|
-
|
|
337
|
-
return model
|
|
338
|
-
|
|
339
|
-
@property
|
|
340
|
-
def feature_encoder(self):
|
|
341
|
-
"""Handles feature input, X, before training.
|
|
342
|
-
|
|
343
|
-
Returns:
|
|
344
|
-
MLPTargetTransformer: InputTransformer object that includes fit() and transform() methods to transform input before estimator fitting.
|
|
345
|
-
"""
|
|
346
|
-
return AutoEncoderFeatureTransformer(num_classes=self.num_classes)
|
|
347
|
-
|
|
348
|
-
@property
|
|
349
|
-
def target_encoder(self):
|
|
350
|
-
"""Handles target input and output, y_true and y_pred, both before and after training.
|
|
351
|
-
|
|
352
|
-
Returns:
|
|
353
|
-
NNOutputTransformer: NNOutputTransformer object that includes fit(), transform(), and inverse_transform() methods.
|
|
354
|
-
"""
|
|
355
|
-
return AutoEncoderFeatureTransformer(
|
|
356
|
-
num_classes=self.num_classes,
|
|
357
|
-
activate=self.activate,
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
def predict(self, X, **kwargs):
|
|
361
|
-
"""Returns predictions for the given test data.
|
|
362
|
-
|
|
363
|
-
Args:
|
|
364
|
-
X (Union[array-like, sparse matrix, dataframe] of shape (n_samples, n_features)): Training samples where n_samples is the number of samples and n_features is the number of features.
|
|
365
|
-
kwargs (Dict[str, Any]): Extra arguments to route to ``Model.predict``\.
|
|
366
|
-
|
|
367
|
-
Warnings:
|
|
368
|
-
Passing estimator parameters as keyword arguments (aka as ``**kwargs``) to ``predict`` is not supported by the Scikit-Learn API, and will be removed in a future version of SciKeras. These parameters can also be specified by prefixing ``predict__`` to a parameter at initialization (``BaseWrapper(..., fit__batch_size=32, predict__batch_size=1000)``) or by using ``set_params`` (``est.set_params(fit__batch_size=32, predict__batch_size=1000)``\).
|
|
369
|
-
|
|
370
|
-
Returns:
|
|
371
|
-
array-like: Predictions, of shape shape (n_samples,) or (n_samples, n_outputs).
|
|
372
|
-
|
|
373
|
-
Notes:
|
|
374
|
-
Had to override predict() here in order to do the __call__ with the refined input, V_latent.
|
|
375
|
-
"""
|
|
376
|
-
X_train = self.target_encoder_.transform(X)
|
|
377
|
-
y_pred = self.model_(X_train, training=False)
|
|
378
|
-
return self.target_encoder_.inverse_transform(y_pred)
|
|
379
|
-
|
|
380
|
-
def get_metadata(self):
|
|
381
|
-
"""Returns a dictionary of meta-parameters generated when this transformer was fitted.
|
|
382
|
-
|
|
383
|
-
Used by SciKeras to bind these parameters to the SciKeras estimator itself and make them available as inputs to the Keras model.
|
|
384
|
-
|
|
385
|
-
Returns:
|
|
386
|
-
Dict[str, Any]: Dictionary of meta-parameters generated when this transfromer was fitted.
|
|
387
|
-
"""
|
|
388
|
-
return {
|
|
389
|
-
"classes_": self.classes_,
|
|
390
|
-
"n_classes_": self.n_classes_,
|
|
391
|
-
"n_outputs_": self.n_outputs_,
|
|
392
|
-
"n_outputs_expected_": self.n_outputs_expected_,
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
@staticmethod
|
|
396
|
-
def scorer(y_true, y_pred, **kwargs):
|
|
397
|
-
"""Scorer for grid search that masks missing data.
|
|
398
|
-
|
|
399
|
-
To use this, do not specify a scoring metric when initializing the grid search object. By default if the scoring_metric option is left as None, then it uses the estimator's scoring metric (this one).
|
|
400
|
-
|
|
401
|
-
Args:
|
|
402
|
-
y_true (numpy.ndarray): True target values input to fit().
|
|
403
|
-
|
|
404
|
-
y_pred (numpy.ndarray): Predicted target values from estimator. The predictions are modified by self.target_encoder().inverse_transform() before being sent here.
|
|
405
|
-
|
|
406
|
-
kwargs (Any): Other parameters sent to sklearn scoring metric. Supported options include missing_mask, scoring_metric, and testing.
|
|
407
|
-
|
|
408
|
-
Returns:
|
|
409
|
-
float: Calculated score.
|
|
410
|
-
"""
|
|
411
|
-
|
|
412
|
-
n_classes_ = kwargs.get("num_classes", 3)
|
|
413
|
-
classes_ = np.arange(n_classes_)
|
|
414
|
-
missing_mask = kwargs.get("missing_mask")
|
|
415
|
-
|
|
416
|
-
num_classes = kwargs.get("num_classes", 3)
|
|
417
|
-
testing = kwargs.get("testing", False)
|
|
418
|
-
|
|
419
|
-
y_pred = y_pred.reshape(y_pred.shape[0], -1, num_classes)
|
|
420
|
-
|
|
421
|
-
scorers = Scorers()
|
|
422
|
-
|
|
423
|
-
return scorers.scorer(
|
|
424
|
-
y_true,
|
|
425
|
-
y_pred,
|
|
426
|
-
missing_mask=missing_mask,
|
|
427
|
-
num_classes=num_classes,
|
|
428
|
-
testing=testing,
|
|
429
|
-
)
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
class MLPClassifier(KerasClassifier):
|
|
433
|
-
"""Estimator to be used with the scikit-learn API.
|
|
434
|
-
|
|
435
|
-
Args:
|
|
436
|
-
V (numpy.ndarray or Dict[str, Any]): Input X values of shape (n_samples, n_components). If a dictionary is passed, each key: value pair should have randomly initialized values for n_components: V. self.feature_encoder() will parse it and select the key: value pair with the current n_components. This allows n_components to be grid searched using GridSearchCV. Otherwise, it throws an error that the dimensions are off. Defaults to None.
|
|
437
|
-
|
|
438
|
-
y_train (numpy.ndarray): One-hot encoded target data. Defaults to None.
|
|
439
|
-
|
|
440
|
-
ubp_weights (tensorflow.Tensor): Weights from UBP model. Fetched by doing model.get_weights() on phase 2 model. Only used if phase 3. Defaults to None.
|
|
441
|
-
|
|
442
|
-
batch_size (int): Batch size to train with. Defaults to 32.
|
|
443
|
-
|
|
444
|
-
missing_mask (np.ndarray): Missing mask with missing values set to False (0) and observed values as True (1). Defaults to None. Defaults to None.
|
|
445
|
-
|
|
446
|
-
output_shape (int): Number of units in model output layer. Defaults to None.
|
|
447
|
-
|
|
448
|
-
weights_initializer (str): Kernel initializer to use for model weights. Defaults to "glorot_normal".
|
|
449
|
-
|
|
450
|
-
hidden_layer_sizes (List[int]): Output unit size for each hidden layer. Should be list of length num_hidden_layers. Defaults to None.
|
|
451
|
-
|
|
452
|
-
num_hidden_layers (int): Number of hidden layers to use. Defaults to 1.
|
|
453
|
-
|
|
454
|
-
hidden_activation (str): Hidden activation function to use. Defaults to "elu".
|
|
455
|
-
|
|
456
|
-
l1_penalty (float): L1 regularization penalty to use to reduce overfitting. Defautls to 0.01.
|
|
457
|
-
|
|
458
|
-
l2_penalty (float): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
|
|
459
|
-
|
|
460
|
-
dropout_rate (float): Dropout rate for each hidden layer to reduce overfitting. Defaults to 0.2.
|
|
461
|
-
|
|
462
|
-
num_classes (int): Number of classes in output predictions. Defaults to 3.
|
|
463
|
-
|
|
464
|
-
phase (int or None): Current phase (if doing UBP), or None if doing NLPCA. Defults to None.
|
|
465
|
-
|
|
466
|
-
sample_weight (numpy.ndarray): Sample weight matrix for reducing the impact of class imbalance. Should be of shape (n_samples, n_features).
|
|
467
|
-
|
|
468
|
-
n_components (int): Number of components to use for input V. Defaults to 3.
|
|
469
|
-
|
|
470
|
-
kwargs (Any): Other keyword arguments to route to fit, compile, callbacks, etc. Should have the routing prefix (e.g., optimizer__learning_rate=0.01).
|
|
471
|
-
"""
|
|
472
|
-
|
|
473
|
-
def __init__(
|
|
474
|
-
self,
|
|
475
|
-
V,
|
|
476
|
-
y_train,
|
|
477
|
-
ubp_weights=None,
|
|
478
|
-
batch_size=32,
|
|
479
|
-
missing_mask=None,
|
|
480
|
-
output_shape=None,
|
|
481
|
-
weights_initializer="glorot_normal",
|
|
482
|
-
hidden_layer_sizes=None,
|
|
483
|
-
num_hidden_layers=1,
|
|
484
|
-
hidden_activation="elu",
|
|
485
|
-
l1_penalty=0.01,
|
|
486
|
-
l2_penalty=0.01,
|
|
487
|
-
dropout_rate=0.2,
|
|
488
|
-
num_classes=3,
|
|
489
|
-
phase=None,
|
|
490
|
-
sample_weight=None,
|
|
491
|
-
n_components=3,
|
|
492
|
-
activate=None,
|
|
493
|
-
**kwargs,
|
|
494
|
-
):
|
|
495
|
-
super().__init__(**kwargs)
|
|
496
|
-
self.V = V
|
|
497
|
-
self.y_train = y_train
|
|
498
|
-
self.ubp_weights = ubp_weights
|
|
499
|
-
self.batch_size = batch_size
|
|
500
|
-
self.missing_mask = missing_mask
|
|
501
|
-
self.output_shape = output_shape
|
|
502
|
-
self.weights_initializer = weights_initializer
|
|
503
|
-
self.hidden_layer_sizes = hidden_layer_sizes
|
|
504
|
-
self.num_hidden_layers = num_hidden_layers
|
|
505
|
-
self.hidden_activation = hidden_activation
|
|
506
|
-
self.l1_penalty = l1_penalty
|
|
507
|
-
self.l2_penalty = l2_penalty
|
|
508
|
-
self.dropout_rate = dropout_rate
|
|
509
|
-
self.num_classes = num_classes
|
|
510
|
-
self.phase = phase
|
|
511
|
-
self.sample_weight = sample_weight
|
|
512
|
-
self.n_components = n_components
|
|
513
|
-
self.activate = activate
|
|
514
|
-
|
|
515
|
-
def _keras_build_fn(self, compile_kwargs):
|
|
516
|
-
"""Build model with custom parameters.
|
|
517
|
-
|
|
518
|
-
Args:
|
|
519
|
-
compile_kwargs (Dict[str, Any]): Dictionary with parameters: values. The parameters should be passed to the class constructor, but should be captured as kwargs. They should also have the routing prefix (e.g., optimizer__learning_rate=0.01). compile_kwargs will automatically be parsed from **kwargs by KerasClassifier and sent here.
|
|
520
|
-
|
|
521
|
-
Returns:
|
|
522
|
-
tf.keras.Model: Model instance. The chosen model depends on which phase is passed to the class constructor.
|
|
523
|
-
"""
|
|
524
|
-
######### REMOVING THIS LINE WILL BREAK THE MODEL!!!!! ########
|
|
525
|
-
self.classes_ = np.arange(self.num_classes)
|
|
526
|
-
|
|
527
|
-
if self.phase is None:
|
|
528
|
-
model = NLPCAModel(
|
|
529
|
-
V=self.V,
|
|
530
|
-
y=self.y_train,
|
|
531
|
-
batch_size=self.batch_size,
|
|
532
|
-
missing_mask=self.missing_mask,
|
|
533
|
-
output_shape=self.output_shape,
|
|
534
|
-
n_components=self.n_components,
|
|
535
|
-
weights_initializer=self.weights_initializer,
|
|
536
|
-
hidden_layer_sizes=self.hidden_layer_sizes,
|
|
537
|
-
num_hidden_layers=self.num_hidden_layers,
|
|
538
|
-
hidden_activation=self.hidden_activation,
|
|
539
|
-
l1_penalty=self.l1_penalty,
|
|
540
|
-
l2_penalty=self.l2_penalty,
|
|
541
|
-
dropout_rate=self.dropout_rate,
|
|
542
|
-
num_classes=self.num_classes,
|
|
543
|
-
phase=self.phase,
|
|
544
|
-
sample_weight=self.sample_weight,
|
|
545
|
-
)
|
|
546
|
-
|
|
547
|
-
elif self.phase == 1:
|
|
548
|
-
model = UBPPhase1(
|
|
549
|
-
V=self.V,
|
|
550
|
-
y=self.y_train,
|
|
551
|
-
batch_size=self.batch_size,
|
|
552
|
-
missing_mask=self.missing_mask,
|
|
553
|
-
output_shape=self.output_shape,
|
|
554
|
-
n_components=self.n_components,
|
|
555
|
-
weights_initializer=self.weights_initializer,
|
|
556
|
-
hidden_layer_sizes=self.hidden_layer_sizes,
|
|
557
|
-
num_hidden_layers=self.num_hidden_layers,
|
|
558
|
-
l1_penalty=self.l1_penalty,
|
|
559
|
-
l2_penalty=self.l2_penalty,
|
|
560
|
-
dropout_rate=self.dropout_rate,
|
|
561
|
-
num_classes=self.num_classes,
|
|
562
|
-
phase=self.phase,
|
|
563
|
-
)
|
|
564
|
-
|
|
565
|
-
elif self.phase == 2:
|
|
566
|
-
model = UBPPhase2(
|
|
567
|
-
V=self.V,
|
|
568
|
-
y=self.y_train,
|
|
569
|
-
batch_size=self.batch_size,
|
|
570
|
-
missing_mask=self.missing_mask,
|
|
571
|
-
output_shape=self.output_shape,
|
|
572
|
-
n_components=self.n_components,
|
|
573
|
-
weights_initializer=self.weights_initializer,
|
|
574
|
-
hidden_layer_sizes=self.hidden_layer_sizes,
|
|
575
|
-
num_hidden_layers=self.num_hidden_layers,
|
|
576
|
-
hidden_activation=self.hidden_activation,
|
|
577
|
-
l1_penalty=self.l1_penalty,
|
|
578
|
-
l2_penalty=self.l2_penalty,
|
|
579
|
-
dropout_rate=self.dropout_rate,
|
|
580
|
-
num_classes=self.num_classes,
|
|
581
|
-
phase=self.phase,
|
|
582
|
-
)
|
|
583
|
-
|
|
584
|
-
elif self.phase == 3:
|
|
585
|
-
model = UBPPhase3(
|
|
586
|
-
V=self.V,
|
|
587
|
-
y=self.y_train,
|
|
588
|
-
batch_size=self.batch_size,
|
|
589
|
-
missing_mask=self.missing_mask,
|
|
590
|
-
output_shape=self.output_shape,
|
|
591
|
-
n_components=self.n_components,
|
|
592
|
-
weights_initializer=self.weights_initializer,
|
|
593
|
-
hidden_layer_sizes=self.hidden_layer_sizes,
|
|
594
|
-
num_hidden_layers=self.num_hidden_layers,
|
|
595
|
-
hidden_activation=self.hidden_activation,
|
|
596
|
-
dropout_rate=self.dropout_rate,
|
|
597
|
-
num_classes=self.num_classes,
|
|
598
|
-
phase=self.phase,
|
|
599
|
-
)
|
|
600
|
-
|
|
601
|
-
model.build((None, self.n_components))
|
|
602
|
-
|
|
603
|
-
model.compile(
|
|
604
|
-
optimizer=compile_kwargs["optimizer"],
|
|
605
|
-
loss=compile_kwargs["loss"],
|
|
606
|
-
metrics=compile_kwargs["metrics"],
|
|
607
|
-
run_eagerly=True,
|
|
608
|
-
)
|
|
609
|
-
|
|
610
|
-
model.set_model_outputs()
|
|
611
|
-
|
|
612
|
-
if self.phase == 3:
|
|
613
|
-
model.set_weights(self.ubp_weights)
|
|
614
|
-
|
|
615
|
-
return model
|
|
616
|
-
|
|
617
|
-
@staticmethod
|
|
618
|
-
def scorer(y_true, y_pred, **kwargs):
|
|
619
|
-
"""Scorer for grid search that masks missing data.
|
|
620
|
-
|
|
621
|
-
To use this, do not specify a scoring metric when initializing the grid search object. By default if the scoring_metric option is left as None, then it uses the estimator's scoring metric (this one).
|
|
622
|
-
|
|
623
|
-
Args:
|
|
624
|
-
y_true (numpy.ndarray): True target values input to fit().
|
|
625
|
-
|
|
626
|
-
y_pred (numpy.ndarray): Predicted target values from estimator. The predictions are modified by self.target_encoder().inverse_transform() before being sent here.
|
|
627
|
-
|
|
628
|
-
kwargs (Any): Other parameters sent to sklearn scoring metric. Supported options include missing_mask, scoring_metric, and testing.
|
|
629
|
-
|
|
630
|
-
Returns:
|
|
631
|
-
float: Calculated score.
|
|
632
|
-
"""
|
|
633
|
-
missing_mask = kwargs.get(
|
|
634
|
-
"missing_mask", np.ones(y_true.shape, dtype=bool)
|
|
635
|
-
)
|
|
636
|
-
num_classes = kwargs.get("num_classes", 3)
|
|
637
|
-
testing = kwargs.get("testing", False)
|
|
638
|
-
|
|
639
|
-
scorers = Scorers()
|
|
640
|
-
|
|
641
|
-
return scorers.scorer(
|
|
642
|
-
y_true,
|
|
643
|
-
y_pred,
|
|
644
|
-
missing_mask=missing_mask,
|
|
645
|
-
num_classes=num_classes,
|
|
646
|
-
testing=testing,
|
|
647
|
-
)
|
|
648
|
-
|
|
649
|
-
@property
|
|
650
|
-
def feature_encoder(self):
|
|
651
|
-
"""Handles feature input, X, before training.
|
|
652
|
-
|
|
653
|
-
Returns:
|
|
654
|
-
UBPInputTransformer: InputTransformer object that includes fit() and transform() methods to transform input before estimator fitting.
|
|
655
|
-
"""
|
|
656
|
-
return UBPInputTransformer(self.n_components, self.V)
|
|
657
|
-
|
|
658
|
-
@property
|
|
659
|
-
def target_encoder(self):
|
|
660
|
-
"""Handles target input and output, y_true and y_pred, both before and after training.
|
|
661
|
-
|
|
662
|
-
Returns:
|
|
663
|
-
NNOutputTransformer: NNOutputTransformer object that includes fit(), transform(), and inverse_transform() methods.
|
|
664
|
-
"""
|
|
665
|
-
return AutoEncoderFeatureTransformer(
|
|
666
|
-
num_classes=self.num_classes,
|
|
667
|
-
activate=None,
|
|
668
|
-
)
|
|
669
|
-
|
|
670
|
-
def predict(self, X, **kwargs):
|
|
671
|
-
"""Returns predictions for the given test data.
|
|
672
|
-
|
|
673
|
-
Args:
|
|
674
|
-
X (Union[array-like, sparse matrix, dataframe] of shape (n_samples, n_features)): Training samples where n_samples is the number of samples and n_features is the number of features.
|
|
675
|
-
kwargs (Dict[str, Any]): Extra arguments to route to ``Model.predict``\.
|
|
676
|
-
|
|
677
|
-
Warnings:
|
|
678
|
-
Passing estimator parameters as keyword arguments (aka as ``**kwargs``) to ``predict`` is not supported by the Scikit-Learn API, and will be removed in a future version of SciKeras. These parameters can also be specified by prefixing ``predict__`` to a parameter at initialization (``BaseWrapper(..., fit__batch_size=32, predict__batch_size=1000)``) or by using ``set_params`` (``est.set_params(fit__batch_size=32, predict__batch_size=1000)``\).
|
|
679
|
-
|
|
680
|
-
Returns:
|
|
681
|
-
array-like: Predictions, of shape shape (n_samples,) or (n_samples, n_outputs).
|
|
682
|
-
|
|
683
|
-
Notes:
|
|
684
|
-
Had to override predict() here in order to do the __call__ with the refined input, V_latent.
|
|
685
|
-
"""
|
|
686
|
-
y_pred_proba = self.model_(self.model_.V_latent, training=False)
|
|
687
|
-
return self.target_encoder_.inverse_transform(y_pred_proba)
|
|
688
|
-
|
|
689
|
-
def get_metadata(self):
|
|
690
|
-
"""Returns a dictionary of meta-parameters generated when this transformer was fitted.
|
|
691
|
-
|
|
692
|
-
Used by SciKeras to bind these parameters to the SciKeras estimator itself and make them available as inputs to the Keras model.
|
|
693
|
-
|
|
694
|
-
Returns:
|
|
695
|
-
Dict[str, Any]: Dictionary of meta-parameters generated when this transfromer was fitted.
|
|
696
|
-
"""
|
|
697
|
-
return {
|
|
698
|
-
"classes_": self.classes_,
|
|
699
|
-
"n_classes_": self.n_classes_,
|
|
700
|
-
"n_outputs_": self.n_outputs_,
|
|
701
|
-
"n_outputs_expected_": self.n_outputs_expected_,
|
|
702
|
-
}
|