pg-sui 0.2.3__py3-none-any.whl → 1.6.16a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pg_sui-1.6.16a3.dist-info/METADATA +292 -0
- pg_sui-1.6.16a3.dist-info/RECORD +81 -0
- {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info}/WHEEL +1 -1
- pg_sui-1.6.16a3.dist-info/entry_points.txt +4 -0
- {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info/licenses}/LICENSE +0 -0
- pg_sui-1.6.16a3.dist-info/top_level.txt +1 -0
- pgsui/__init__.py +35 -54
- pgsui/_version.py +34 -0
- pgsui/cli.py +922 -0
- pgsui/data_processing/__init__.py +0 -0
- pgsui/data_processing/config.py +565 -0
- pgsui/data_processing/containers.py +1436 -0
- pgsui/data_processing/transformers.py +557 -907
- pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
- pgsui/electron/app/__main__.py +5 -0
- pgsui/electron/app/extra-resources/.gitkeep +1 -0
- pgsui/electron/app/icons/icons/1024x1024.png +0 -0
- pgsui/electron/app/icons/icons/128x128.png +0 -0
- pgsui/electron/app/icons/icons/16x16.png +0 -0
- pgsui/electron/app/icons/icons/24x24.png +0 -0
- pgsui/electron/app/icons/icons/256x256.png +0 -0
- pgsui/electron/app/icons/icons/32x32.png +0 -0
- pgsui/electron/app/icons/icons/48x48.png +0 -0
- pgsui/electron/app/icons/icons/512x512.png +0 -0
- pgsui/electron/app/icons/icons/64x64.png +0 -0
- pgsui/electron/app/icons/icons/icon.icns +0 -0
- pgsui/electron/app/icons/icons/icon.ico +0 -0
- pgsui/electron/app/main.js +227 -0
- pgsui/electron/app/package-lock.json +6894 -0
- pgsui/electron/app/package.json +51 -0
- pgsui/electron/app/preload.js +15 -0
- pgsui/electron/app/server.py +157 -0
- pgsui/electron/app/ui/logo.png +0 -0
- pgsui/electron/app/ui/renderer.js +131 -0
- pgsui/electron/app/ui/styles.css +59 -0
- pgsui/electron/app/ui/ui_shim.js +72 -0
- pgsui/electron/bootstrap.py +43 -0
- pgsui/electron/launch.py +57 -0
- pgsui/electron/package.json +14 -0
- pgsui/example_data/__init__.py +0 -0
- pgsui/example_data/phylip_files/__init__.py +0 -0
- pgsui/example_data/phylip_files/test.phy +0 -0
- pgsui/example_data/popmaps/__init__.py +0 -0
- pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
- pgsui/example_data/structure_files/__init__.py +0 -0
- pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
- pgsui/impute/__init__.py +0 -0
- pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
- pgsui/impute/deterministic/imputers/mode.py +844 -0
- pgsui/impute/deterministic/imputers/nmf.py +221 -0
- pgsui/impute/deterministic/imputers/phylo.py +973 -0
- pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
- pgsui/impute/supervised/__init__.py +0 -0
- pgsui/impute/supervised/base.py +343 -0
- pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
- pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
- pgsui/impute/supervised/imputers/random_forest.py +291 -0
- pgsui/impute/unsupervised/__init__.py +0 -0
- pgsui/impute/unsupervised/base.py +1121 -0
- pgsui/impute/unsupervised/callbacks.py +92 -262
- {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
- pgsui/impute/unsupervised/imputers/autoencoder.py +1361 -0
- pgsui/impute/unsupervised/imputers/nlpca.py +1666 -0
- pgsui/impute/unsupervised/imputers/ubp.py +1660 -0
- pgsui/impute/unsupervised/imputers/vae.py +1316 -0
- pgsui/impute/unsupervised/loss_functions.py +261 -0
- pgsui/impute/unsupervised/models/__init__.py +0 -0
- pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
- pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
- pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
- pgsui/impute/unsupervised/models/vae_model.py +269 -630
- pgsui/impute/unsupervised/nn_scorers.py +255 -0
- pgsui/utils/__init__.py +0 -0
- pgsui/utils/classification_viz.py +608 -0
- pgsui/utils/logging_utils.py +22 -0
- pgsui/utils/misc.py +35 -480
- pgsui/utils/plotting.py +996 -829
- pgsui/utils/pretty_metrics.py +290 -0
- pgsui/utils/scorers.py +213 -666
- pg_sui-0.2.3.dist-info/METADATA +0 -322
- pg_sui-0.2.3.dist-info/RECORD +0 -75
- pg_sui-0.2.3.dist-info/top_level.txt +0 -3
- pgsui/example_data/phylip_files/test_n10.phy +0 -118
- pgsui/example_data/phylip_files/test_n100.phy +0 -118
- pgsui/example_data/phylip_files/test_n2.phy +0 -118
- pgsui/example_data/phylip_files/test_n500.phy +0 -118
- pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
- pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
- pgsui/example_data/trees/test.iqtree +0 -376
- pgsui/example_data/trees/test.qmat +0 -5
- pgsui/example_data/trees/test.rate +0 -2033
- pgsui/example_data/trees/test.tre +0 -1
- pgsui/example_data/trees/test_n10.rate +0 -19
- pgsui/example_data/trees/test_n100.rate +0 -109
- pgsui/example_data/trees/test_n500.rate +0 -509
- pgsui/example_data/trees/test_siterates.txt +0 -2024
- pgsui/example_data/trees/test_siterates_n10.txt +0 -10
- pgsui/example_data/trees/test_siterates_n100.txt +0 -100
- pgsui/example_data/trees/test_siterates_n500.txt +0 -500
- pgsui/example_data/vcf_files/test.vcf +0 -244
- pgsui/example_data/vcf_files/test.vcf.gz +0 -0
- pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
- pgsui/impute/estimators.py +0 -1268
- pgsui/impute/impute.py +0 -1463
- pgsui/impute/simple_imputers.py +0 -1431
- pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
- pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
- pgsui/impute/unsupervised/keras_classifiers.py +0 -697
- pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
- pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
- pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
- pgsui/pg_sui.py +0 -261
- pgsui/utils/sequence_tools.py +0 -407
- simulation/sim_benchmarks.py +0 -333
- simulation/sim_treeparams.py +0 -475
- test/__init__.py +0 -0
- test/pg_sui_simtest.py +0 -215
- test/pg_sui_testing.py +0 -523
- test/test.py +0 -151
- test/test_pgsui.py +0 -374
- test/test_tkc.py +0 -185
|
@@ -1,1126 +1,200 @@
|
|
|
1
|
-
import
|
|
2
|
-
import os
|
|
3
|
-
import sys
|
|
4
|
-
import warnings
|
|
1
|
+
from typing import Callable, List, Literal
|
|
5
2
|
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
import numpy as np
|
|
4
|
+
import torch
|
|
5
|
+
import torch.nn as nn
|
|
6
|
+
from snpio.utils.logging import LoggerManager
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
logging.getLogger("tensorflow").disabled = True
|
|
12
|
-
warnings.filterwarnings("ignore", category=UserWarning)
|
|
8
|
+
from pgsui.impute.unsupervised.loss_functions import MaskedFocalLoss
|
|
9
|
+
from pgsui.utils.logging_utils import configure_logger
|
|
13
10
|
|
|
14
|
-
# noinspection PyPackageRequirements
|
|
15
|
-
import tensorflow as tf
|
|
16
11
|
|
|
17
|
-
|
|
18
|
-
|
|
12
|
+
class UBPModel(nn.Module):
|
|
13
|
+
"""An Unsupervised Backpropagation (UBP) decoder for genotype logits.
|
|
19
14
|
|
|
20
|
-
from
|
|
15
|
+
The model reconstructs locus-level genotype probabilities (two states for haploid data or three for diploid data) from a latent vector. It exposes two decoding branches so the training schedule can follow the UBP recipe:
|
|
21
16
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
tf.get_logger().setLevel(logging.ERROR)
|
|
17
|
+
1. **Phase 1 decoder** - a shallow linear layer that co-trains with latent codes.
|
|
18
|
+
2. **Phase 2/3 decoder** - a deeper MLP with batch normalization and dropout that is first trained in isolation and later fine-tuned jointly with the latents.
|
|
25
19
|
|
|
26
|
-
|
|
27
|
-
# Monkey patching deprecation utils to supress warnings.
|
|
28
|
-
# noinspection PyUnusedLocal
|
|
29
|
-
def deprecated(
|
|
30
|
-
date, instructions, warn_once=True
|
|
31
|
-
): # pylint: disable=unused-argument
|
|
32
|
-
def deprecated_wrapper(func):
|
|
33
|
-
return func
|
|
34
|
-
|
|
35
|
-
return deprecated_wrapper
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
deprecation.deprecated = deprecated
|
|
39
|
-
|
|
40
|
-
from tensorflow.keras.layers import (
|
|
41
|
-
Dropout,
|
|
42
|
-
Dense,
|
|
43
|
-
Reshape,
|
|
44
|
-
LeakyReLU,
|
|
45
|
-
PReLU,
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
from tensorflow.keras.regularizers import l1_l2
|
|
49
|
-
|
|
50
|
-
# Custom Modules
|
|
51
|
-
try:
|
|
52
|
-
from ..neural_network_methods import NeuralNetworkMethods
|
|
53
|
-
except (ModuleNotFoundError, ValueError, ImportError):
|
|
54
|
-
from impute.unsupervised.neural_network_methods import NeuralNetworkMethods
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class UBPPhase1(tf.keras.Model):
|
|
58
|
-
"""UBP Phase 1 single layer perceptron model to train predict imputations.
|
|
59
|
-
|
|
60
|
-
This model is subclassed from the tensorflow/ Keras framework.
|
|
61
|
-
|
|
62
|
-
UBPPhase1 subclasses the tf.keras.Model and overrides the train_step function, which does training and evalutation for each batch in each epoch.
|
|
63
|
-
|
|
64
|
-
UBPPhase1 is a single-layer perceptron model used to initially refine V. After Phase 1 the Phase 1 weights are discarded.
|
|
65
|
-
|
|
66
|
-
Args:
|
|
67
|
-
V (numpy.ndarray(float)): V should have been randomly initialized and will be used as the input data that gets refined during training. Defaults to None.
|
|
68
|
-
|
|
69
|
-
y (numpy.ndarray): Target values to predict. Actual input data. Defaults to None.
|
|
70
|
-
|
|
71
|
-
batch_size (int, optional): Batch size per epoch. Defaults to 32.
|
|
72
|
-
|
|
73
|
-
missing_mask (numpy.ndarray): Missing data mask for y. Defaults to None.
|
|
74
|
-
|
|
75
|
-
output_shape (int): Output units for n_features dimension. Output will be of shape (batch_size, n_features). Defaults to None.
|
|
76
|
-
|
|
77
|
-
n_components (int, optional): Number of features in input V to use. Defaults to 3.
|
|
78
|
-
|
|
79
|
-
weights_initializer (str, optional): Kernel initializer to use for initializing model weights. Defaults to "glorot_normal".
|
|
80
|
-
|
|
81
|
-
hidden_layer_sizes (NoneType, optional): Output units for each hidden layer. List should be of same length as the number of hidden layers. Not used for UBP Phase 1, but is here for compatibility. Defaults to "midpoint".
|
|
82
|
-
|
|
83
|
-
num_hidden_layers (int, optional): Number of hidden layers to use. Not used in UBP Phase 1, but is here for compatibility. Defaults to 1.
|
|
84
|
-
|
|
85
|
-
hidden_activation (str, optional): Activation function to use for hidden layers. Defaults to "elu".
|
|
86
|
-
|
|
87
|
-
l1_penalty (float, optional): L1 regularization penalty to use to reduce overfitting. Defaults to 0.01.
|
|
88
|
-
|
|
89
|
-
l2_penalty (float, optional): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
|
|
90
|
-
|
|
91
|
-
dropout_rate (float, optional): Dropout rate during training to reduce overfitting. Must be a float between 0 and 1. Defaults to 0.2.
|
|
92
|
-
|
|
93
|
-
num_classes (int, optional): Number of classes in output. Corresponds to the 3rd dimension of the output shape (batch_size, n_features, num_classes). Defaults to 3.
|
|
94
|
-
|
|
95
|
-
phase (int, optional): Current phase if doing UBP model. Defaults to 1.
|
|
96
|
-
|
|
97
|
-
sample_weight (numpy.ndarray, optional): 2D sample weights of shape (n_samples, n_features). Should have values for each class weighted. Defaults to None.
|
|
98
|
-
|
|
99
|
-
Example:
|
|
100
|
-
>>>model = UBPPhase1(V=V, y=y, batch_size=32, missing_mask=missing_mask, output_shape=y_train.shape[1], n_components=3, weights_initializer="glorot_normal", hidden_layer_sizes="midpoint", num_hidden_layers=1, hidden_activation="elu", l1_penalty=1e-6, l2_penalty=1e-6, num_classes=3, phase=3)
|
|
101
|
-
>>>model.compile(optimizer=optimizer, loss=loss_func, metrics=[my_metrics], run_eagerly=True)
|
|
102
|
-
>>>history = model.fit(X, y, batch_size=batch_size, epochs=epochs, callbacks=[MyCallback()], validation_split=validation_split, shuffle=False)
|
|
103
|
-
|
|
104
|
-
Raises:
|
|
105
|
-
TypeError: V, y, missing_mask, output_shape must not be NoneType.
|
|
106
|
-
ValueError: Maximum of 5 hidden layers.
|
|
20
|
+
Both paths ultimately reshape their logits to ``(batch_size, n_features, num_classes)`` and training uses ``MaskedFocalLoss`` to focus on hard examples while masking missing entries.
|
|
107
21
|
"""
|
|
108
22
|
|
|
109
23
|
def __init__(
|
|
110
24
|
self,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
num_classes=3,
|
|
124
|
-
phase=1,
|
|
125
|
-
sample_weight=None,
|
|
25
|
+
n_features: int,
|
|
26
|
+
prefix: str,
|
|
27
|
+
*,
|
|
28
|
+
num_classes: int = 3,
|
|
29
|
+
hidden_layer_sizes: List[int] | np.ndarray = [128, 64],
|
|
30
|
+
latent_dim: int = 2,
|
|
31
|
+
dropout_rate: float = 0.2,
|
|
32
|
+
activation: Literal["relu", "elu", "selu", "leaky_relu"] = "relu",
|
|
33
|
+
gamma: float = 2.0,
|
|
34
|
+
device: Literal["cpu", "gpu", "mps"] = "cpu",
|
|
35
|
+
verbose: bool = False,
|
|
36
|
+
debug: bool = False,
|
|
126
37
|
):
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
nn = NeuralNetworkMethods()
|
|
130
|
-
self.nn = nn
|
|
131
|
-
|
|
132
|
-
if V is None:
|
|
133
|
-
self._V = nn.init_weights(y.shape[0], n_components)
|
|
134
|
-
elif isinstance(V, dict):
|
|
135
|
-
self._V = V[n_components]
|
|
136
|
-
else:
|
|
137
|
-
self._V = V
|
|
138
|
-
|
|
139
|
-
self._y = y
|
|
140
|
-
|
|
141
|
-
hidden_layer_sizes = nn.validate_hidden_layers(
|
|
142
|
-
hidden_layer_sizes, num_hidden_layers
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
hidden_layer_sizes = nn.get_hidden_layer_sizes(
|
|
146
|
-
y.shape[1], self._V.shape[1], hidden_layer_sizes
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
nn.validate_model_inputs(y, missing_mask, output_shape)
|
|
150
|
-
|
|
151
|
-
self._missing_mask = missing_mask
|
|
152
|
-
self.weights_initializer = weights_initializer
|
|
153
|
-
self.phase = phase
|
|
154
|
-
self.dropout_rate = dropout_rate
|
|
155
|
-
self._sample_weight = sample_weight
|
|
156
|
-
|
|
157
|
-
### NOTE: I tried using just _V as the input to be refined, but it
|
|
158
|
-
# wasn't getting updated. So I copy it here and it works.
|
|
159
|
-
# V_latent is refined during train_step.
|
|
160
|
-
self.V_latent_ = self._V.copy()
|
|
161
|
-
|
|
162
|
-
# Initialize parameters used during train_step() and test_step.
|
|
163
|
-
# input_with_mask_ is set during the UBPCallbacks() execution.
|
|
164
|
-
self._batch_idx = 0
|
|
165
|
-
self._batch_size = batch_size
|
|
166
|
-
self.n_components = n_components
|
|
167
|
-
|
|
168
|
-
if l1_penalty == 0.0 and l2_penalty == 0.0:
|
|
169
|
-
kernel_regularizer = None
|
|
170
|
-
else:
|
|
171
|
-
kernel_regularizer = l1_l2(l1_penalty, l2_penalty)
|
|
172
|
-
|
|
173
|
-
self.kernel_regularizer = kernel_regularizer
|
|
174
|
-
kernel_initializer = weights_initializer
|
|
175
|
-
|
|
176
|
-
# Construct single-layer perceptron.
|
|
177
|
-
|
|
178
|
-
self.dense1 = Dense(
|
|
179
|
-
output_shape * num_classes,
|
|
180
|
-
input_shape=(n_components,),
|
|
181
|
-
kernel_initializer=kernel_initializer,
|
|
182
|
-
kernel_regularizer=kernel_regularizer,
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
self.rshp = Reshape((output_shape, num_classes))
|
|
186
|
-
|
|
187
|
-
def call(self, inputs):
|
|
188
|
-
x = self.dense1(inputs)
|
|
189
|
-
return self.rshp(x)
|
|
190
|
-
|
|
191
|
-
def model(self):
|
|
192
|
-
x = tf.keras.Input(shape=(self.n_components,))
|
|
193
|
-
return tf.keras.Model(inputs=[x], outputs=self.call(x))
|
|
194
|
-
|
|
195
|
-
def set_model_outputs(self):
|
|
196
|
-
x = tf.keras.Input(shape=(self.n_components,))
|
|
197
|
-
model = tf.keras.Model(inputs=[x], outputs=self.call(x))
|
|
198
|
-
self.outputs = model.outputs
|
|
199
|
-
|
|
200
|
-
def train_step(self, data):
|
|
201
|
-
"""Train step function. Parameters are set in the UBPCallbacks callback"""
|
|
202
|
-
y = self._y
|
|
203
|
-
|
|
204
|
-
(
|
|
205
|
-
v,
|
|
206
|
-
y_true,
|
|
207
|
-
sample_weight,
|
|
208
|
-
missing_mask,
|
|
209
|
-
batch_start,
|
|
210
|
-
batch_end,
|
|
211
|
-
) = self.nn.prepare_training_batches(
|
|
212
|
-
self.V_latent_,
|
|
213
|
-
y,
|
|
214
|
-
self._batch_size,
|
|
215
|
-
self._batch_idx,
|
|
216
|
-
True,
|
|
217
|
-
self.n_components,
|
|
218
|
-
self._sample_weight,
|
|
219
|
-
self._missing_mask,
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
src = [v]
|
|
223
|
-
|
|
224
|
-
if sample_weight is not None:
|
|
225
|
-
sample_weight_masked = tf.convert_to_tensor(
|
|
226
|
-
sample_weight[~missing_mask], dtype=tf.float32
|
|
227
|
-
)
|
|
228
|
-
else:
|
|
229
|
-
sample_weight_masked = None
|
|
230
|
-
|
|
231
|
-
y_true_masked = tf.boolean_mask(
|
|
232
|
-
tf.convert_to_tensor(y_true, dtype=tf.float32),
|
|
233
|
-
tf.reduce_any(tf.not_equal(y_true, -1), axis=2),
|
|
234
|
-
)
|
|
235
|
-
|
|
236
|
-
# NOTE: Earlier model architectures incorrectly
|
|
237
|
-
# applied one gradient to all the variables, including
|
|
238
|
-
# the weights and v. Here we apply them separately, per
|
|
239
|
-
# the UBP manuscript.
|
|
240
|
-
with tf.GradientTape(persistent=True) as tape:
|
|
241
|
-
# Forward pass. Watch input tensor v.
|
|
242
|
-
tape.watch(v)
|
|
243
|
-
y_pred = self(v, training=True)
|
|
244
|
-
y_pred_masked = tf.boolean_mask(
|
|
245
|
-
y_pred, tf.reduce_any(tf.not_equal(y_true, -1), axis=2)
|
|
246
|
-
)
|
|
247
|
-
### NOTE: If you get the error, "'tuple' object has no attribute
|
|
248
|
-
### 'rank'", then convert y_true to a tensor object."
|
|
249
|
-
loss = self.compiled_loss(
|
|
250
|
-
y_true_masked,
|
|
251
|
-
y_pred_masked,
|
|
252
|
-
sample_weight=sample_weight_masked,
|
|
253
|
-
regularization_losses=self.losses,
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
# Refine the watched variables with
|
|
257
|
-
# gradient descent backpropagation
|
|
258
|
-
gradients = tape.gradient(loss, self.trainable_variables)
|
|
259
|
-
self.optimizer.apply_gradients(
|
|
260
|
-
zip(gradients, self.trainable_variables)
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
# Apply separate gradients to v.
|
|
264
|
-
vgrad = tape.gradient(loss, src)
|
|
265
|
-
self.optimizer.apply_gradients(zip(vgrad, src))
|
|
266
|
-
|
|
267
|
-
del tape
|
|
268
|
-
|
|
269
|
-
### NOTE: If you get the error, "'tuple' object has no attribute
|
|
270
|
-
### 'rank', then convert y_true to a tensor object."
|
|
271
|
-
self.compiled_metrics.update_state(
|
|
272
|
-
y_true_masked,
|
|
273
|
-
y_pred_masked,
|
|
274
|
-
sample_weight=sample_weight_masked,
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
# NOTE: run_eagerly must be set to True in the compile() method for this
|
|
278
|
-
# to work. Otherwise it can't convert a Tensor object to a numpy array.
|
|
279
|
-
# There is really no other way to set v back to V_latent_ in graph
|
|
280
|
-
# mode as far as I know. eager execution is slower, so it would be nice
|
|
281
|
-
# to find a way to do this without converting to numpy.
|
|
282
|
-
self.V_latent_[batch_start:batch_end, :] = v.numpy()
|
|
283
|
-
|
|
284
|
-
# history object that gets returned from model.fit().
|
|
285
|
-
return {m.name: m.result() for m in self.metrics}
|
|
286
|
-
|
|
287
|
-
@property
|
|
288
|
-
def V_latent(self):
|
|
289
|
-
"""Randomly initialized input that gets refined during training.
|
|
290
|
-
:noindex:
|
|
291
|
-
"""
|
|
292
|
-
return self.V_latent_
|
|
293
|
-
|
|
294
|
-
@property
|
|
295
|
-
def batch_size(self):
|
|
296
|
-
"""Batch (=step) size per epoch.
|
|
297
|
-
:noindex:
|
|
298
|
-
"""
|
|
299
|
-
return self._batch_size
|
|
300
|
-
|
|
301
|
-
@property
|
|
302
|
-
def batch_idx(self):
|
|
303
|
-
"""Current batch (=step) index.
|
|
304
|
-
:noindex:
|
|
305
|
-
"""
|
|
306
|
-
return self._batch_idx
|
|
307
|
-
|
|
308
|
-
@property
|
|
309
|
-
def y(self):
|
|
310
|
-
"""Input dataset.
|
|
311
|
-
:noindex:
|
|
312
|
-
"""
|
|
313
|
-
return self._y
|
|
314
|
-
|
|
315
|
-
@property
|
|
316
|
-
def missing_mask(self):
|
|
317
|
-
"""Missing mask of shape (y.shape[0], y.shape[1])
|
|
318
|
-
:noindex:
|
|
319
|
-
"""
|
|
320
|
-
return self._missing_mask
|
|
321
|
-
|
|
322
|
-
@property
|
|
323
|
-
def sample_weight(self):
|
|
324
|
-
"""Sample weights of shape (y.shape[0], y.shape[1])
|
|
325
|
-
:noindex:
|
|
326
|
-
"""
|
|
327
|
-
return self._sample_weight
|
|
38
|
+
"""Initializes the UBPModel.
|
|
328
39
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
40
|
+
Args:
|
|
41
|
+
n_features (int): The number of features (SNPs) in the input data.
|
|
42
|
+
prefix (str): A prefix used for logging.
|
|
43
|
+
num_classes (int): Number of genotype states per locus (typically 2 or 3). Defaults to 3.
|
|
44
|
+
hidden_layer_sizes (list[int] | np.ndarray): A list of integers specifying the size of each hidden layer in the deep (Phase 2/3) decoder. Defaults to [128, 64].
|
|
45
|
+
latent_dim (int): The dimensionality of the input latent space. Defaults to 2.
|
|
46
|
+
dropout_rate (float): The dropout rate for regularization in the deep decoder. Defaults to 0.2.
|
|
47
|
+
activation (str): The non-linear activation function to use in the deep decoder's hidden layers. Defaults to 'relu'.
|
|
48
|
+
gamma (float): The focusing parameter for the focal loss function. Defaults to 2.0.
|
|
49
|
+
device (Literal["cpu", "gpu", "mps"]): The PyTorch device to run the model on. Defaults to 'cpu'.
|
|
50
|
+
verbose (bool): If True, enables detailed logging. Defaults to False.
|
|
51
|
+
debug (bool): If True, enables debug mode. Defaults to False.
|
|
333
52
|
"""
|
|
334
|
-
self.
|
|
53
|
+
super(UBPModel, self).__init__()
|
|
54
|
+
|
|
55
|
+
logman = LoggerManager(
|
|
56
|
+
name=__name__, prefix=prefix, verbose=verbose, debug=debug
|
|
57
|
+
)
|
|
58
|
+
self.logger = configure_logger(
|
|
59
|
+
logman.get_logger(), verbose=verbose, debug=debug
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
self.n_features = n_features
|
|
63
|
+
self.num_classes = num_classes
|
|
64
|
+
self.latent_dim = latent_dim
|
|
65
|
+
self.gamma = gamma
|
|
66
|
+
self.device = device
|
|
67
|
+
|
|
68
|
+
if isinstance(hidden_layer_sizes, np.ndarray):
|
|
69
|
+
hidden_layer_sizes = hidden_layer_sizes.tolist()
|
|
70
|
+
|
|
71
|
+
# Final layer output size is now n_features * num_classes
|
|
72
|
+
final_output_size = n_features * num_classes
|
|
73
|
+
|
|
74
|
+
# Phase 1 decoder: Simple linear model
|
|
75
|
+
self.phase1_decoder = nn.Sequential(
|
|
76
|
+
nn.Linear(latent_dim, final_output_size, device=device),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Phase 2 & 3 uses the Convolutional Decoder
|
|
80
|
+
act_factory = self._resolve_activation_factory(activation)
|
|
81
|
+
|
|
82
|
+
if hidden_layer_sizes[0] > hidden_layer_sizes[-1]:
|
|
83
|
+
hidden_layer_sizes = list(reversed(hidden_layer_sizes))
|
|
84
|
+
|
|
85
|
+
# Phase 2 & 3: Flexible deeper network
|
|
86
|
+
layers = []
|
|
87
|
+
input_dim = latent_dim
|
|
88
|
+
for size in hidden_layer_sizes:
|
|
89
|
+
layers.append(nn.Linear(input_dim, size))
|
|
90
|
+
layers.append(nn.BatchNorm1d(size))
|
|
91
|
+
layers.append(nn.Dropout(dropout_rate))
|
|
92
|
+
layers.append(act_factory())
|
|
93
|
+
input_dim = size
|
|
94
|
+
|
|
95
|
+
layers.append(nn.Linear(hidden_layer_sizes[-1], final_output_size))
|
|
96
|
+
|
|
97
|
+
self.phase23_decoder = nn.Sequential(*layers)
|
|
98
|
+
self.reshape = (self.n_features, self.num_classes)
|
|
99
|
+
|
|
100
|
+
def _resolve_activation_factory(
|
|
101
|
+
self, activation: Literal["relu", "elu", "selu", "leaky_relu"]
|
|
102
|
+
) -> Callable[[], nn.Module]:
|
|
103
|
+
"""Resolves an activation function factory from a string name.
|
|
104
|
+
|
|
105
|
+
This method acts as a factory, returning a callable (lambda function) that produces the desired PyTorch activation function module when called.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
activation (Literal["relu", "elu", "selu", "leaky_relu"]): The name of the activation function.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Callable[[], nn.Module]: A factory function that, when called, returns an instance of the specified activation layer.
|
|
112
|
+
|
|
113
|
+
Raises:
|
|
114
|
+
ValueError: If the provided activation name is not supported.
|
|
115
|
+
"""
|
|
116
|
+
a = activation.lower()
|
|
117
|
+
if a == "relu":
|
|
118
|
+
return lambda: nn.ReLU()
|
|
119
|
+
if a == "elu":
|
|
120
|
+
return lambda: nn.ELU()
|
|
121
|
+
if a == "leaky_relu":
|
|
122
|
+
return lambda: nn.LeakyReLU()
|
|
123
|
+
if a == "selu":
|
|
124
|
+
return lambda: nn.SELU()
|
|
125
|
+
|
|
126
|
+
msg = f"Activation function {activation} not supported."
|
|
127
|
+
self.logger.error(msg)
|
|
128
|
+
raise ValueError(msg)
|
|
129
|
+
|
|
130
|
+
def forward(self, x: torch.Tensor, phase: int = 1) -> torch.Tensor:
|
|
131
|
+
"""Performs the forward pass through the UBP model.
|
|
132
|
+
|
|
133
|
+
This method routes the input tensor through the appropriate decoder based on
|
|
134
|
+
the specified training ``phase`` and reshapes the logits to the
|
|
135
|
+
`(batch_size, n_features, num_classes)` grid expected by the loss.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
x (torch.Tensor): The input latent tensor of shape `(batch_size, latent_dim)`.
|
|
139
|
+
phase (int): The training phase (1, 2, or 3), which determines which decoder path to use.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
torch.Tensor: Logits shaped as `(batch_size, n_features, num_classes)`.
|
|
335
143
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
"""Set batch_size parameter.
|
|
339
|
-
:noindex:
|
|
144
|
+
Raises:
|
|
145
|
+
ValueError: If an invalid phase is provided.
|
|
340
146
|
"""
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
self._batch_idx = int(value)
|
|
349
|
-
|
|
350
|
-
@y.setter
|
|
351
|
-
def y(self, value):
|
|
352
|
-
"""Set y after each epoch.
|
|
353
|
-
:noindex:
|
|
354
|
-
"""
|
|
355
|
-
self._y = value
|
|
356
|
-
|
|
357
|
-
@missing_mask.setter
|
|
358
|
-
def missing_mask(self, value):
|
|
359
|
-
"""Set missing_mask after each epoch.
|
|
360
|
-
:noindex:
|
|
361
|
-
"""
|
|
362
|
-
self._missing_mask = value
|
|
363
|
-
|
|
364
|
-
@sample_weight.setter
|
|
365
|
-
def sample_weight(self, value):
|
|
366
|
-
"""Set sample_weight after each epoch.
|
|
367
|
-
:noindex:
|
|
368
|
-
"""
|
|
369
|
-
self._sample_weight = value
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
class UBPPhase2(tf.keras.Model):
|
|
373
|
-
"""UBP Phase 2 model to train and use to predict imputations.
|
|
374
|
-
|
|
375
|
-
UBPPhase2 subclasses the tf.keras.Model and overrides the train_step function, which does training for each batch in each epoch.
|
|
376
|
-
|
|
377
|
-
Phase 2 does not refine V, it just refines the weights.
|
|
378
|
-
|
|
379
|
-
Args:
|
|
380
|
-
V (numpy.ndarray(float)): V should have been randomly initialized and will be used as the input data that gets refined during training. Defaults to None.
|
|
381
|
-
|
|
382
|
-
y (numpy.ndarray): Target values to predict. Actual input data. Defaults to None.
|
|
383
|
-
|
|
384
|
-
batch_size (int, optional): Batch size per epoch. Defaults to 32.
|
|
385
|
-
|
|
386
|
-
missing_mask (numpy.ndarray): Missing data mask for y. Defaults to None.
|
|
387
|
-
|
|
388
|
-
output_shape (int): Output units for n_features dimension. Output will be of shape (batch_size, n_features). Defaults to None.
|
|
389
|
-
|
|
390
|
-
n_components (int, optional): Number of features in input V to use. Defaults to 3.
|
|
391
|
-
|
|
392
|
-
weights_initializer (str, optional): Kernel initializer to use for initializing model weights. Defaults to "glorot_normal".
|
|
393
|
-
|
|
394
|
-
hidden_layer_sizes (NoneType, optional): Output units for each hidden layer. List should be of same length as the number of hidden layers. Not used for UBP Phase 1, but is here for compatibility. Defaults to "midpoint".
|
|
395
|
-
|
|
396
|
-
num_hidden_layers (int, optional): Number of hidden layers to use. Not used in UBP Phase 1, but is here for compatibility. Defaults to 1.
|
|
397
|
-
|
|
398
|
-
hidden_activation (str, optional): Activation function to use for hidden layers. Defaults to "elu".
|
|
399
|
-
|
|
400
|
-
l1_penalty (float, optional): L1 regularization penalty to use to reduce overfitting. Defaults to 0.01.
|
|
401
|
-
|
|
402
|
-
l2_penalty (float, optional): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
|
|
403
|
-
|
|
404
|
-
dropout_rate (float, optional): Dropout rate during training to reduce overfitting. Must be a float between 0 and 1. Defaults to 0.2.
|
|
405
|
-
|
|
406
|
-
num_classes (int, optional): Number of classes in output. Corresponds to the 3rd dimension of the output shape (batch_size, n_features, num_classes). Defaults to 3.
|
|
407
|
-
|
|
408
|
-
phase (int, optional): Current phase if doing UBP model. Defaults to 1.
|
|
409
|
-
|
|
410
|
-
sample_weight (numpy.ndarray, optional): 2D sample weights of shape (n_samples, n_features). Should have values for each class weighted. Defaults to None.
|
|
411
|
-
|
|
412
|
-
Example:
|
|
413
|
-
>>>model = UBPPhase2(V=V, y=y, batch_size=32, missing_mask=missing_mask, output_shape=y_train.shape[1], n_components=3, weights_initializer="glorot_normal", hidden_layer_sizes="midpoint", num_hidden_layers=1, hidden_activation="elu", l1_penalty=1e-6, l2_penalty=1e-6, num_classes=3, phase=3)
|
|
414
|
-
>>>
|
|
415
|
-
>>>model.compile(optimizer=optimizer, loss=loss_func, metrics=[my_metrics], run_eagerly=True)
|
|
416
|
-
>>>
|
|
417
|
-
>>>history = model.fit(X, y, batch_size=batch_size, epochs=epochs, callbacks=[MyCallback()], validation_split=validation_split, shuffle=False)
|
|
418
|
-
|
|
419
|
-
Raises:
|
|
420
|
-
TypeError: V, y, missing_mask, output_shape must not be NoneType.
|
|
421
|
-
ValueError: Maximum of 5 hidden layers.
|
|
422
|
-
"""
|
|
423
|
-
|
|
424
|
-
def __init__(
|
|
425
|
-
self,
|
|
426
|
-
V=None,
|
|
427
|
-
y=None,
|
|
428
|
-
batch_size=32,
|
|
429
|
-
missing_mask=None,
|
|
430
|
-
output_shape=None,
|
|
431
|
-
n_components=3,
|
|
432
|
-
weights_initializer="glorot_normal",
|
|
433
|
-
hidden_layer_sizes="midpoint",
|
|
434
|
-
num_hidden_layers=1,
|
|
435
|
-
hidden_activation="elu",
|
|
436
|
-
l1_penalty=0.01,
|
|
437
|
-
l2_penalty=0.01,
|
|
438
|
-
dropout_rate=0.2,
|
|
439
|
-
num_classes=3,
|
|
440
|
-
phase=2,
|
|
441
|
-
sample_weight=None,
|
|
442
|
-
):
|
|
443
|
-
super(UBPPhase2, self).__init__()
|
|
444
|
-
|
|
445
|
-
nn = NeuralNetworkMethods()
|
|
446
|
-
self.nn = nn
|
|
447
|
-
|
|
448
|
-
if V is None:
|
|
449
|
-
self._V = nn.init_weights(y.shape[0], n_components)
|
|
450
|
-
elif isinstance(V, dict):
|
|
451
|
-
self._V = V[n_components]
|
|
452
|
-
else:
|
|
453
|
-
self._V = V
|
|
454
|
-
|
|
455
|
-
self._y = y
|
|
456
|
-
|
|
457
|
-
hidden_layer_sizes = nn.validate_hidden_layers(
|
|
458
|
-
hidden_layer_sizes, num_hidden_layers
|
|
459
|
-
)
|
|
460
|
-
|
|
461
|
-
hidden_layer_sizes = nn.get_hidden_layer_sizes(
|
|
462
|
-
y.shape[1], self._V.shape[1], hidden_layer_sizes
|
|
463
|
-
)
|
|
464
|
-
|
|
465
|
-
nn.validate_model_inputs(y, missing_mask, output_shape)
|
|
466
|
-
|
|
467
|
-
self._missing_mask = missing_mask
|
|
468
|
-
self.weights_initializer = weights_initializer
|
|
469
|
-
self.phase = phase
|
|
470
|
-
self.dropout_rate = dropout_rate
|
|
471
|
-
self._sample_weight = sample_weight
|
|
472
|
-
|
|
473
|
-
### NOTE: I tried using just _V as the input to be refined, but it
|
|
474
|
-
# wasn't getting updated. So I copy it here and it works.
|
|
475
|
-
# V_latent is refined during train_step.
|
|
476
|
-
self.V_latent_ = self._V.copy()
|
|
477
|
-
|
|
478
|
-
# Initialize parameters used during train_step.
|
|
479
|
-
self._batch_idx = 0
|
|
480
|
-
self._batch_size = batch_size
|
|
481
|
-
self.n_components = n_components
|
|
482
|
-
|
|
483
|
-
if l1_penalty == 0.0 and l2_penalty == 0.0:
|
|
484
|
-
kernel_regularizer = None
|
|
485
|
-
else:
|
|
486
|
-
kernel_regularizer = l1_l2(l1_penalty, l2_penalty)
|
|
487
|
-
|
|
488
|
-
self.kernel_regularizer = kernel_regularizer
|
|
489
|
-
kernel_initializer = weights_initializer
|
|
490
|
-
|
|
491
|
-
if hidden_activation.lower() == "leaky_relu":
|
|
492
|
-
activation = LeakyReLU(alpha=0.01)
|
|
493
|
-
|
|
494
|
-
elif hidden_activation.lower() == "prelu":
|
|
495
|
-
activation = PReLU()
|
|
496
|
-
|
|
497
|
-
elif hidden_activation.lower() == "selu":
|
|
498
|
-
activation = "selu"
|
|
499
|
-
kernel_initializer = "lecun_normal"
|
|
500
|
-
|
|
147
|
+
if phase == 1:
|
|
148
|
+
# Linear decoder for phase 1
|
|
149
|
+
x = self.phase1_decoder(x)
|
|
150
|
+
return x.view(-1, *self.reshape)
|
|
151
|
+
elif phase in {2, 3}:
|
|
152
|
+
x = self.phase23_decoder(x)
|
|
153
|
+
return x.view(-1, *self.reshape)
|
|
501
154
|
else:
|
|
502
|
-
|
|
155
|
+
msg = f"Invalid phase: {phase}. Expected 1, 2, or 3."
|
|
156
|
+
self.logger.error(msg)
|
|
157
|
+
raise ValueError(msg)
|
|
503
158
|
|
|
504
|
-
|
|
505
|
-
raise ValueError(
|
|
506
|
-
f"The maximum number of hidden layers is 5, but got "
|
|
507
|
-
f"{num_hidden_layers}"
|
|
508
|
-
)
|
|
509
|
-
|
|
510
|
-
self.dense2 = None
|
|
511
|
-
self.dense3 = None
|
|
512
|
-
self.dense4 = None
|
|
513
|
-
self.dense5 = None
|
|
514
|
-
|
|
515
|
-
# Construct multi-layer perceptron.
|
|
516
|
-
# Add hidden layers dynamically.
|
|
517
|
-
self.dense1 = Dense(
|
|
518
|
-
hidden_layer_sizes[0],
|
|
519
|
-
input_shape=(n_components,),
|
|
520
|
-
activation=activation,
|
|
521
|
-
kernel_initializer=kernel_initializer,
|
|
522
|
-
kernel_regularizer=kernel_regularizer,
|
|
523
|
-
)
|
|
524
|
-
|
|
525
|
-
if num_hidden_layers >= 2:
|
|
526
|
-
self.dense2 = Dense(
|
|
527
|
-
hidden_layer_sizes[1],
|
|
528
|
-
activation=activation,
|
|
529
|
-
kernel_initializer=kernel_initializer,
|
|
530
|
-
kernel_regularizer=kernel_regularizer,
|
|
531
|
-
)
|
|
532
|
-
|
|
533
|
-
if num_hidden_layers >= 3:
|
|
534
|
-
self.dense3 = Dense(
|
|
535
|
-
hidden_layer_sizes[2],
|
|
536
|
-
activation=activation,
|
|
537
|
-
kernel_initializer=kernel_initializer,
|
|
538
|
-
kernel_regularizer=kernel_regularizer,
|
|
539
|
-
)
|
|
540
|
-
|
|
541
|
-
if num_hidden_layers >= 4:
|
|
542
|
-
self.dense4 = Dense(
|
|
543
|
-
hidden_layer_sizes[3],
|
|
544
|
-
activation=activation,
|
|
545
|
-
kernel_initializer=kernel_initializer,
|
|
546
|
-
kernel_regularizer=kernel_regularizer,
|
|
547
|
-
)
|
|
548
|
-
|
|
549
|
-
if num_hidden_layers == 5:
|
|
550
|
-
self.dense5 = Dense(
|
|
551
|
-
hidden_layer_sizes[4],
|
|
552
|
-
activation=activation,
|
|
553
|
-
kernel_initializer=kernel_initializer,
|
|
554
|
-
kernel_regularizer=kernel_regularizer,
|
|
555
|
-
)
|
|
556
|
-
|
|
557
|
-
self.output1 = Dense(
|
|
558
|
-
output_shape * num_classes,
|
|
559
|
-
kernel_initializer=kernel_initializer,
|
|
560
|
-
kernel_regularizer=kernel_regularizer,
|
|
561
|
-
)
|
|
562
|
-
|
|
563
|
-
self.rshp = Reshape((output_shape, num_classes))
|
|
564
|
-
|
|
565
|
-
self.dropout_layer = Dropout(rate=dropout_rate)
|
|
566
|
-
|
|
567
|
-
def call(self, inputs, training=None):
|
|
568
|
-
x = self.dense1(inputs)
|
|
569
|
-
x = self.dropout_layer(x, training=training)
|
|
570
|
-
if self.dense2 is not None:
|
|
571
|
-
x = self.dense2(x)
|
|
572
|
-
x = self.dropout_layer(x, training=training)
|
|
573
|
-
if self.dense3 is not None:
|
|
574
|
-
x = self.dense3(x)
|
|
575
|
-
x = self.dropout_layer(x, training=training)
|
|
576
|
-
if self.dense4 is not None:
|
|
577
|
-
x = self.dense4(x)
|
|
578
|
-
x = self.dropout_layer(x, training=training)
|
|
579
|
-
if self.dense5 is not None:
|
|
580
|
-
x = self.dense5(x)
|
|
581
|
-
x = self.dropout_layer(x, training=training)
|
|
582
|
-
|
|
583
|
-
x = self.output1(x)
|
|
584
|
-
return self.rshp(x)
|
|
585
|
-
|
|
586
|
-
def model(self):
|
|
587
|
-
x = tf.keras.Input(shape=(self.n_components,))
|
|
588
|
-
return tf.keras.Model(inputs=[x], outputs=self.call(x))
|
|
589
|
-
|
|
590
|
-
def set_model_outputs(self):
|
|
591
|
-
x = tf.keras.Input(shape=(self.n_components,))
|
|
592
|
-
model = tf.keras.Model(inputs=[x], outputs=self.call(x))
|
|
593
|
-
self.outputs = model.outputs
|
|
594
|
-
|
|
595
|
-
def train_step(self, data):
|
|
596
|
-
"""Train step function. Parameters are set in the UBPCallbacks callback"""
|
|
597
|
-
y = self._y
|
|
598
|
-
|
|
599
|
-
(
|
|
600
|
-
v,
|
|
601
|
-
y_true,
|
|
602
|
-
sample_weight,
|
|
603
|
-
missing_mask,
|
|
604
|
-
_,
|
|
605
|
-
__,
|
|
606
|
-
) = self.nn.prepare_training_batches(
|
|
607
|
-
self.V_latent_,
|
|
608
|
-
y,
|
|
609
|
-
self._batch_size,
|
|
610
|
-
self._batch_idx,
|
|
611
|
-
True,
|
|
612
|
-
self.n_components,
|
|
613
|
-
self._sample_weight,
|
|
614
|
-
self._missing_mask,
|
|
615
|
-
)
|
|
616
|
-
|
|
617
|
-
if sample_weight is not None:
|
|
618
|
-
sample_weight_masked = tf.convert_to_tensor(
|
|
619
|
-
sample_weight[~missing_mask], dtype=tf.float32
|
|
620
|
-
)
|
|
621
|
-
else:
|
|
622
|
-
sample_weight_masked = None
|
|
623
|
-
|
|
624
|
-
y_true_masked = tf.boolean_mask(
|
|
625
|
-
tf.convert_to_tensor(y_true, dtype=tf.float32),
|
|
626
|
-
tf.reduce_any(tf.not_equal(y_true, -1), axis=2),
|
|
627
|
-
)
|
|
628
|
-
|
|
629
|
-
# NOTE: Earlier model architectures incorrectly
|
|
630
|
-
# applied one gradient to all the variables, including
|
|
631
|
-
# the weights and v. Here we apply them separately, per
|
|
632
|
-
# the UBP manuscript.
|
|
633
|
-
with tf.GradientTape() as tape:
|
|
634
|
-
# Forward pass
|
|
635
|
-
y_pred = self(v, training=True)
|
|
636
|
-
y_pred_masked = tf.boolean_mask(
|
|
637
|
-
y_pred, tf.reduce_any(tf.not_equal(y_true, -1), axis=2)
|
|
638
|
-
)
|
|
639
|
-
### NOTE: If you get the error, "'tuple' object has no attribute
|
|
640
|
-
### 'rank'", then convert y_true to a tensor object."
|
|
641
|
-
loss = self.compiled_loss(
|
|
642
|
-
y_true_masked,
|
|
643
|
-
y_pred_masked,
|
|
644
|
-
sample_weight=sample_weight_masked,
|
|
645
|
-
regularization_losses=self.losses,
|
|
646
|
-
)
|
|
647
|
-
|
|
648
|
-
# Refine the watched variables with backpropagation
|
|
649
|
-
gradients = tape.gradient(loss, self.trainable_variables)
|
|
650
|
-
self.optimizer.apply_gradients(
|
|
651
|
-
zip(gradients, self.trainable_variables)
|
|
652
|
-
)
|
|
653
|
-
|
|
654
|
-
### NOTE: If you get the error, "'tuple' object has no attribute
|
|
655
|
-
### 'rank', then convert y_true to a tensor object."
|
|
656
|
-
self.compiled_metrics.update_state(
|
|
657
|
-
y_true_masked,
|
|
658
|
-
y_pred_masked,
|
|
659
|
-
sample_weight=sample_weight_masked,
|
|
660
|
-
)
|
|
661
|
-
|
|
662
|
-
# history object that gets returned from fit().
|
|
663
|
-
return {m.name: m.result() for m in self.metrics}
|
|
664
|
-
|
|
665
|
-
@property
|
|
666
|
-
def V_latent(self):
|
|
667
|
-
"""Randomly initialized input variable that gets refined during training.
|
|
668
|
-
:noindex:
|
|
669
|
-
"""
|
|
670
|
-
return self.V_latent_
|
|
671
|
-
|
|
672
|
-
@property
|
|
673
|
-
def batch_size(self):
|
|
674
|
-
"""Batch (=step) size per epoch.
|
|
675
|
-
:noindex:
|
|
676
|
-
"""
|
|
677
|
-
return self._batch_size
|
|
678
|
-
|
|
679
|
-
@property
|
|
680
|
-
def batch_idx(self):
|
|
681
|
-
"""Current batch (=step) index.
|
|
682
|
-
:noindex:
|
|
683
|
-
"""
|
|
684
|
-
return self._batch_idx
|
|
685
|
-
|
|
686
|
-
@property
|
|
687
|
-
def y(self):
|
|
688
|
-
"""Full input dataset.
|
|
689
|
-
:noindex:
|
|
690
|
-
"""
|
|
691
|
-
return self._y
|
|
692
|
-
|
|
693
|
-
@property
|
|
694
|
-
def missing_mask(self):
|
|
695
|
-
"""Get missing_mask for current epoch.
|
|
696
|
-
:noindex:
|
|
697
|
-
"""
|
|
698
|
-
return self._missing_mask
|
|
699
|
-
|
|
700
|
-
@property
|
|
701
|
-
def sample_weight(self):
|
|
702
|
-
"""Get sample_weight for current epoch.
|
|
703
|
-
:noindex:
|
|
704
|
-
"""
|
|
705
|
-
return self._sample_weight
|
|
706
|
-
|
|
707
|
-
@V_latent.setter
|
|
708
|
-
def V_latent(self, value):
|
|
709
|
-
"""Set randomly initialized input variable. Gets refined during training.
|
|
710
|
-
:noindex:
|
|
711
|
-
"""
|
|
712
|
-
self.V_latent_ = value
|
|
713
|
-
|
|
714
|
-
@batch_size.setter
|
|
715
|
-
def batch_size(self, value):
|
|
716
|
-
"""Set batch_size parameter.
|
|
717
|
-
:noindex:
|
|
718
|
-
"""
|
|
719
|
-
self._batch_size = int(value)
|
|
720
|
-
|
|
721
|
-
@batch_idx.setter
|
|
722
|
-
def batch_idx(self, value):
|
|
723
|
-
"""Set current batch (=step) index.
|
|
724
|
-
:noindex:
|
|
725
|
-
"""
|
|
726
|
-
self._batch_idx = int(value)
|
|
727
|
-
|
|
728
|
-
@y.setter
|
|
729
|
-
def y(self, value):
|
|
730
|
-
"""Set y after each epoch.
|
|
731
|
-
:noindex:
|
|
732
|
-
"""
|
|
733
|
-
self._y = value
|
|
734
|
-
|
|
735
|
-
@missing_mask.setter
|
|
736
|
-
def missing_mask(self, value):
|
|
737
|
-
"""Set missing_mask after each epoch.
|
|
738
|
-
:noindex:
|
|
739
|
-
"""
|
|
740
|
-
self._missing_mask = value
|
|
741
|
-
|
|
742
|
-
@sample_weight.setter
|
|
743
|
-
def sample_weight(self, value):
|
|
744
|
-
"""Set sample_weight after each epoch.
|
|
745
|
-
:noindex:
|
|
746
|
-
"""
|
|
747
|
-
self._sample_weight = value
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
class UBPPhase3(tf.keras.Model):
|
|
751
|
-
"""UBP Phase 3 model to train and use to predict imputations.
|
|
752
|
-
|
|
753
|
-
UBPPhase3 subclasses the tf.keras.Model and overrides the train_step function, which does training and evaluation for each batch in each single epoch.
|
|
754
|
-
|
|
755
|
-
Phase 3 Refines both the weights and V.
|
|
756
|
-
|
|
757
|
-
Args:
|
|
758
|
-
V (numpy.ndarray(float)): V should have been randomly initialized and will be used as the input data that gets refined during training. Defaults to None.
|
|
759
|
-
|
|
760
|
-
y (numpy.ndarray): Target values to predict. Actual input data. Defaults to None.
|
|
761
|
-
|
|
762
|
-
batch_size (int, optional): Batch size per epoch. Defaults to 32.
|
|
763
|
-
|
|
764
|
-
missing_mask (numpy.ndarray): Missing data mask for y. Defaults to None.
|
|
765
|
-
|
|
766
|
-
output_shape (int): Output units for n_features dimension. Output will be of shape (batch_size, n_features). Defaults to None.
|
|
767
|
-
|
|
768
|
-
n_components (int, optional): Number of features in input V to use. Defaults to 3.
|
|
769
|
-
|
|
770
|
-
weights_initializer (str, optional): Kernel initializer to use for initializing model weights. Defaults to "glorot_normal".
|
|
771
|
-
|
|
772
|
-
hidden_layer_sizes (NoneType, optional): Output units for each hidden layer. List should be of same length as the number of hidden layers. Not used for UBP Phase 1, but is here for compatibility. Defaults to "midpoint".
|
|
773
|
-
|
|
774
|
-
num_hidden_layers (int, optional): Number of hidden layers to use. Not used in UBP Phase 1, but is here for compatibility. Defaults to 1.
|
|
775
|
-
|
|
776
|
-
hidden_activation (str, optional): Activation function to use for hidden layers. Defaults to "elu".
|
|
777
|
-
|
|
778
|
-
l1_penalty (float, optional): L1 regularization penalty to use to reduce overfitting. Defaults to 0.01.
|
|
779
|
-
|
|
780
|
-
l2_penalty (float, optional): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
|
|
781
|
-
|
|
782
|
-
dropout_rate (float, optional): Dropout rate during training to reduce overfitting. Must be a float between 0 and 1. Defaults to 0.2.
|
|
783
|
-
|
|
784
|
-
num_classes (int, optional): Number of classes in output. Corresponds to the 3rd dimension of the output shape (batch_size, n_features, num_classes). Defaults to 3.
|
|
785
|
-
|
|
786
|
-
phase (int, optional): Current phase if doing UBP model. Defaults to 1.
|
|
787
|
-
|
|
788
|
-
sample_weight (numpy.ndarray, optional): 2D sample weights of shape (n_samples, n_features). Should have values for each class weighted. Defaults to None.
|
|
789
|
-
|
|
790
|
-
Example:
|
|
791
|
-
>>>model = UBPPhase3(V=V, y=y, batch_size=32, missing_mask=missing_mask, output_shape=y_train.shape[1], n_components=3, weights_initializer="glorot_normal", hidden_layer_sizes="midpoint", num_hidden_layers=1, hidden_activation="elu", l1_penalty=1e-6, l2_penalty=1e-6, num_classes=3, phase=3)
|
|
792
|
-
>>>
|
|
793
|
-
>>>model.compile(optimizer=optimizer, loss=loss_func, metrics=[my_metrics], run_eagerly=True)
|
|
794
|
-
>>>
|
|
795
|
-
>>>history = model.fit(X, y, batch_size=batch_size, epochs=epochs, callbacks=[MyCallback()], validation_split=validation_split, shuffle=False)
|
|
796
|
-
|
|
797
|
-
Raises:
|
|
798
|
-
TypeError: V, y, missing_mask, output_shape must not be NoneType.
|
|
799
|
-
ValueError: Maximum of 5 hidden layers.
|
|
800
|
-
"""
|
|
801
|
-
|
|
802
|
-
def __init__(
|
|
159
|
+
def compute_loss(
|
|
803
160
|
self,
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
if
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
)
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
self._missing_mask = missing_mask
|
|
844
|
-
self.weights_initializer = weights_initializer
|
|
845
|
-
self.phase = phase
|
|
846
|
-
self.dropout_rate = dropout_rate
|
|
847
|
-
self._sample_weight = sample_weight
|
|
848
|
-
|
|
849
|
-
### NOTE: I tried using just _V as the input to be refined, but it
|
|
850
|
-
# wasn't getting updated. So I copy it here and it works.
|
|
851
|
-
# V_latent is refined during train_step.
|
|
852
|
-
self.V_latent_ = self._V.copy()
|
|
853
|
-
|
|
854
|
-
# Initialize parameters used during train_step.
|
|
855
|
-
self._batch_idx = 0
|
|
856
|
-
self._batch_size = batch_size
|
|
857
|
-
self.n_components = n_components
|
|
858
|
-
|
|
859
|
-
# No regularization in phase 3.
|
|
860
|
-
kernel_regularizer = None
|
|
861
|
-
self.kernel_regularizer = kernel_regularizer
|
|
862
|
-
kernel_initializer = None
|
|
863
|
-
|
|
864
|
-
if hidden_activation.lower() == "leaky_relu":
|
|
865
|
-
activation = LeakyReLU(alpha=0.01)
|
|
866
|
-
|
|
867
|
-
elif hidden_activation.lower() == "prelu":
|
|
868
|
-
activation = PReLU()
|
|
869
|
-
|
|
870
|
-
elif hidden_activation.lower() == "selu":
|
|
871
|
-
activation = "selu"
|
|
872
|
-
kernel_initializer = "lecun_normal"
|
|
873
|
-
|
|
874
|
-
else:
|
|
875
|
-
activation = hidden_activation
|
|
876
|
-
|
|
877
|
-
if num_hidden_layers > 5:
|
|
878
|
-
raise ValueError(
|
|
879
|
-
f"The maximum number of hidden layers is 5, but got "
|
|
880
|
-
f"{num_hidden_layers}"
|
|
881
|
-
)
|
|
882
|
-
|
|
883
|
-
self.dense2 = None
|
|
884
|
-
self.dense3 = None
|
|
885
|
-
self.dense4 = None
|
|
886
|
-
self.dense5 = None
|
|
887
|
-
|
|
888
|
-
# Construct multi-layer perceptron.
|
|
889
|
-
# Add hidden layers dynamically.
|
|
890
|
-
self.dense1 = Dense(
|
|
891
|
-
hidden_layer_sizes[0],
|
|
892
|
-
input_shape=(n_components,),
|
|
893
|
-
activation=activation,
|
|
894
|
-
kernel_initializer=kernel_initializer,
|
|
161
|
+
y: torch.Tensor,
|
|
162
|
+
outputs: torch.Tensor,
|
|
163
|
+
mask: torch.Tensor | None = None,
|
|
164
|
+
class_weights: torch.Tensor | None = None,
|
|
165
|
+
gamma: float = 2.0,
|
|
166
|
+
) -> torch.Tensor:
|
|
167
|
+
"""Computes the masked focal loss between model outputs and ground truth.
|
|
168
|
+
|
|
169
|
+
This method calculates the loss value, handling class imbalance with weights and ignoring masked (missing) values in the ground truth tensor.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
y (torch.Tensor): Integer ground-truth genotypes of shape `(batch_size, n_features)`.
|
|
173
|
+
outputs (torch.Tensor): Logits of shape `(batch_size, n_features, num_classes)`.
|
|
174
|
+
mask (torch.Tensor | None): An optional boolean mask indicating which elements should be included in the loss calculation.
|
|
175
|
+
class_weights (torch.Tensor | None): An optional tensor of weights for each class to address imbalance.
|
|
176
|
+
gamma (float): The focusing parameter for the focal loss.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
torch.Tensor: The computed scalar loss value.
|
|
180
|
+
"""
|
|
181
|
+
if class_weights is None:
|
|
182
|
+
class_weights = torch.ones(self.num_classes, device=outputs.device)
|
|
183
|
+
|
|
184
|
+
if mask is None:
|
|
185
|
+
mask = torch.ones_like(y, dtype=torch.bool)
|
|
186
|
+
|
|
187
|
+
# Explicitly flatten all tensors to the (N, C) and (N,) format.
|
|
188
|
+
# This creates a clear contract with the new MaskedFocalLoss function.
|
|
189
|
+
n_classes = outputs.shape[-1]
|
|
190
|
+
logits_flat = outputs.reshape(-1, n_classes)
|
|
191
|
+
targets_flat = y.reshape(-1)
|
|
192
|
+
mask_flat = mask.reshape(-1)
|
|
193
|
+
|
|
194
|
+
criterion = MaskedFocalLoss(gamma=gamma, alpha=class_weights)
|
|
195
|
+
|
|
196
|
+
return criterion(
|
|
197
|
+
logits_flat.to(self.device),
|
|
198
|
+
targets_flat.to(self.device),
|
|
199
|
+
valid_mask=mask_flat.to(self.device),
|
|
895
200
|
)
|
|
896
|
-
|
|
897
|
-
if num_hidden_layers >= 2:
|
|
898
|
-
self.dense2 = Dense(
|
|
899
|
-
hidden_layer_sizes[1],
|
|
900
|
-
activation=activation,
|
|
901
|
-
kernel_initializer=kernel_initializer,
|
|
902
|
-
)
|
|
903
|
-
|
|
904
|
-
if num_hidden_layers >= 3:
|
|
905
|
-
self.dense3 = Dense(
|
|
906
|
-
hidden_layer_sizes[2],
|
|
907
|
-
activation=activation,
|
|
908
|
-
kernel_initializer=kernel_initializer,
|
|
909
|
-
)
|
|
910
|
-
|
|
911
|
-
if num_hidden_layers >= 4:
|
|
912
|
-
self.dense4 = Dense(
|
|
913
|
-
hidden_layer_sizes[3],
|
|
914
|
-
activation=activation,
|
|
915
|
-
kernel_initializer=kernel_initializer,
|
|
916
|
-
)
|
|
917
|
-
|
|
918
|
-
if num_hidden_layers == 5:
|
|
919
|
-
self.dense5 = Dense(
|
|
920
|
-
hidden_layer_sizes[4],
|
|
921
|
-
activation=activation,
|
|
922
|
-
kernel_initializer=kernel_initializer,
|
|
923
|
-
)
|
|
924
|
-
|
|
925
|
-
self.output1 = Dense(
|
|
926
|
-
output_shape * num_classes,
|
|
927
|
-
kernel_initializer=kernel_initializer,
|
|
928
|
-
)
|
|
929
|
-
|
|
930
|
-
self.rshp = Reshape((output_shape, num_classes))
|
|
931
|
-
|
|
932
|
-
self.dropout_layer = Dropout(rate=dropout_rate)
|
|
933
|
-
|
|
934
|
-
def call(self, inputs, training=None):
|
|
935
|
-
x = self.dense1(inputs)
|
|
936
|
-
if self.dense2 is not None:
|
|
937
|
-
x = self.dense2(x)
|
|
938
|
-
if self.dense3 is not None:
|
|
939
|
-
x = self.dense3(x)
|
|
940
|
-
if self.dense4 is not None:
|
|
941
|
-
x = self.dense4(x)
|
|
942
|
-
if self.dense5 is not None:
|
|
943
|
-
x = self.dense5(x)
|
|
944
|
-
|
|
945
|
-
x = self.output1(x)
|
|
946
|
-
return self.rshp(x)
|
|
947
|
-
|
|
948
|
-
def model(self):
|
|
949
|
-
x = tf.keras.Input(shape=(self.n_components,))
|
|
950
|
-
return tf.keras.Model(inputs=[x], outputs=self.call(x))
|
|
951
|
-
|
|
952
|
-
def set_model_outputs(self):
|
|
953
|
-
x = tf.keras.Input(shape=(self.n_components,))
|
|
954
|
-
model = tf.keras.Model(inputs=[x], outputs=self.call(x))
|
|
955
|
-
self.outputs = model.outputs
|
|
956
|
-
|
|
957
|
-
def train_step(self, data):
|
|
958
|
-
"""Train step function. Parameters are set in the UBPCallbacks callback"""
|
|
959
|
-
y = self._y
|
|
960
|
-
|
|
961
|
-
(
|
|
962
|
-
v,
|
|
963
|
-
y_true,
|
|
964
|
-
sample_weight,
|
|
965
|
-
missing_mask,
|
|
966
|
-
batch_start,
|
|
967
|
-
batch_end,
|
|
968
|
-
) = self.nn.prepare_training_batches(
|
|
969
|
-
self.V_latent_,
|
|
970
|
-
y,
|
|
971
|
-
self._batch_size,
|
|
972
|
-
self._batch_idx,
|
|
973
|
-
True,
|
|
974
|
-
self.n_components,
|
|
975
|
-
self._sample_weight,
|
|
976
|
-
self._missing_mask,
|
|
977
|
-
)
|
|
978
|
-
|
|
979
|
-
src = [v]
|
|
980
|
-
|
|
981
|
-
if sample_weight is not None:
|
|
982
|
-
sample_weight_masked = tf.convert_to_tensor(
|
|
983
|
-
sample_weight[~missing_mask], dtype=tf.float32
|
|
984
|
-
)
|
|
985
|
-
else:
|
|
986
|
-
sample_weight_masked = None
|
|
987
|
-
|
|
988
|
-
y_true_masked = tf.boolean_mask(
|
|
989
|
-
tf.convert_to_tensor(y_true, dtype=tf.float32),
|
|
990
|
-
tf.reduce_any(tf.not_equal(y_true, -1), axis=2),
|
|
991
|
-
)
|
|
992
|
-
|
|
993
|
-
# NOTE: Earlier model architectures incorrectly
|
|
994
|
-
# applied one gradient to all the variables, including
|
|
995
|
-
# the weights and v. Here we apply them separately, per
|
|
996
|
-
# the UBP manuscript.
|
|
997
|
-
with tf.GradientTape(persistent=True) as tape:
|
|
998
|
-
# Forward pass. Watch input tensor v.
|
|
999
|
-
tape.watch(v)
|
|
1000
|
-
y_pred = self(v, training=True)
|
|
1001
|
-
y_pred_masked = tf.boolean_mask(
|
|
1002
|
-
y_pred, tf.reduce_any(tf.not_equal(y_true, -1), axis=2)
|
|
1003
|
-
)
|
|
1004
|
-
### NOTE: If you get the error, "'tuple' object has no attribute
|
|
1005
|
-
### 'rank'", then convert y_true to a tensor object."
|
|
1006
|
-
loss = self.compiled_loss(
|
|
1007
|
-
y_true_masked,
|
|
1008
|
-
y_pred_masked,
|
|
1009
|
-
sample_weight=sample_weight_masked,
|
|
1010
|
-
regularization_losses=self.losses,
|
|
1011
|
-
)
|
|
1012
|
-
|
|
1013
|
-
# Refine the watched variables with
|
|
1014
|
-
# gradient descent backpropagation
|
|
1015
|
-
gradients = tape.gradient(loss, self.trainable_variables)
|
|
1016
|
-
self.optimizer.apply_gradients(
|
|
1017
|
-
zip(gradients, self.trainable_variables)
|
|
1018
|
-
)
|
|
1019
|
-
|
|
1020
|
-
# Apply separate gradients to v.
|
|
1021
|
-
vgrad = tape.gradient(loss, src)
|
|
1022
|
-
self.optimizer.apply_gradients(zip(vgrad, src))
|
|
1023
|
-
|
|
1024
|
-
del tape
|
|
1025
|
-
|
|
1026
|
-
### NOTE: If you get the error, "'tuple' object has no attribute
|
|
1027
|
-
### 'rank', then convert y_true to a tensor object."
|
|
1028
|
-
self.compiled_metrics.update_state(
|
|
1029
|
-
y_true_masked,
|
|
1030
|
-
y_pred_masked,
|
|
1031
|
-
sample_weight=sample_weight_masked,
|
|
1032
|
-
)
|
|
1033
|
-
|
|
1034
|
-
# NOTE: run_eagerly must be set to True in the compile() method for this
|
|
1035
|
-
# to work. Otherwise it can't convert a Tensor object to a numpy array.
|
|
1036
|
-
# There is really no other way to set v back to V_latent_ in graph
|
|
1037
|
-
# mode as far as I know. eager execution is slower, so it would be nice
|
|
1038
|
-
# to find a way to do this without converting to numpy.
|
|
1039
|
-
self.V_latent_[batch_start:batch_end, :] = v.numpy()
|
|
1040
|
-
|
|
1041
|
-
# history object that gets returned from fit().
|
|
1042
|
-
return {m.name: m.result() for m in self.metrics}
|
|
1043
|
-
|
|
1044
|
-
@property
|
|
1045
|
-
def V_latent(self):
|
|
1046
|
-
"""Randomly initialized input variable that gets refined during training.
|
|
1047
|
-
:noindex:
|
|
1048
|
-
"""
|
|
1049
|
-
return self.V_latent_
|
|
1050
|
-
|
|
1051
|
-
@property
|
|
1052
|
-
def batch_size(self):
|
|
1053
|
-
"""Batch (=step) size per epoch.
|
|
1054
|
-
:noindex:
|
|
1055
|
-
"""
|
|
1056
|
-
return self._batch_size
|
|
1057
|
-
|
|
1058
|
-
@property
|
|
1059
|
-
def batch_idx(self):
|
|
1060
|
-
"""Current batch (=step) index.
|
|
1061
|
-
:noindex:
|
|
1062
|
-
"""
|
|
1063
|
-
return self._batch_idx
|
|
1064
|
-
|
|
1065
|
-
@property
|
|
1066
|
-
def y(self):
|
|
1067
|
-
"""Full input dataset y.
|
|
1068
|
-
:noindex:
|
|
1069
|
-
"""
|
|
1070
|
-
return self._y
|
|
1071
|
-
|
|
1072
|
-
@property
|
|
1073
|
-
def missing_mask(self):
|
|
1074
|
-
"""Missing mask of shape (y.shape[0], y.shape[1])
|
|
1075
|
-
:noindex:
|
|
1076
|
-
"""
|
|
1077
|
-
return self._missing_mask
|
|
1078
|
-
|
|
1079
|
-
@property
|
|
1080
|
-
def sample_weight(self):
|
|
1081
|
-
"""Sample weights of shpe (y.shape[0], y.shape[1])
|
|
1082
|
-
:noindex:
|
|
1083
|
-
"""
|
|
1084
|
-
return self._sample_weight
|
|
1085
|
-
|
|
1086
|
-
@V_latent.setter
|
|
1087
|
-
def V_latent(self, value):
|
|
1088
|
-
"""Set randomly initialized input variable. Refined during training.
|
|
1089
|
-
:noindex:
|
|
1090
|
-
"""
|
|
1091
|
-
self.V_latent_ = value
|
|
1092
|
-
|
|
1093
|
-
@batch_size.setter
|
|
1094
|
-
def batch_size(self, value):
|
|
1095
|
-
"""Set batch_size parameter.
|
|
1096
|
-
:noindex:
|
|
1097
|
-
"""
|
|
1098
|
-
self._batch_size = int(value)
|
|
1099
|
-
|
|
1100
|
-
@batch_idx.setter
|
|
1101
|
-
def batch_idx(self, value):
|
|
1102
|
-
"""Set current batch (=step) index.
|
|
1103
|
-
:noindex:
|
|
1104
|
-
"""
|
|
1105
|
-
self._batch_idx = int(value)
|
|
1106
|
-
|
|
1107
|
-
@y.setter
|
|
1108
|
-
def y(self, value):
|
|
1109
|
-
"""Set y after each epoch.
|
|
1110
|
-
:noindex:
|
|
1111
|
-
"""
|
|
1112
|
-
self._y = value
|
|
1113
|
-
|
|
1114
|
-
@missing_mask.setter
|
|
1115
|
-
def missing_mask(self, value):
|
|
1116
|
-
"""Set missing_mask after each epoch.
|
|
1117
|
-
:noindex:
|
|
1118
|
-
"""
|
|
1119
|
-
self._missing_mask = value
|
|
1120
|
-
|
|
1121
|
-
@sample_weight.setter
|
|
1122
|
-
def sample_weight(self, value):
|
|
1123
|
-
"""Set sample_weight after each epoch.
|
|
1124
|
-
:noindex:
|
|
1125
|
-
"""
|
|
1126
|
-
self._sample_weight = value
|