pg-sui 0.2.3__py3-none-any.whl → 1.6.16a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. pg_sui-1.6.16a3.dist-info/METADATA +292 -0
  2. pg_sui-1.6.16a3.dist-info/RECORD +81 -0
  3. {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.16a3.dist-info/entry_points.txt +4 -0
  5. {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info/licenses}/LICENSE +0 -0
  6. pg_sui-1.6.16a3.dist-info/top_level.txt +1 -0
  7. pgsui/__init__.py +35 -54
  8. pgsui/_version.py +34 -0
  9. pgsui/cli.py +922 -0
  10. pgsui/data_processing/__init__.py +0 -0
  11. pgsui/data_processing/config.py +565 -0
  12. pgsui/data_processing/containers.py +1436 -0
  13. pgsui/data_processing/transformers.py +557 -907
  14. pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  15. pgsui/electron/app/__main__.py +5 -0
  16. pgsui/electron/app/extra-resources/.gitkeep +1 -0
  17. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  18. pgsui/electron/app/icons/icons/128x128.png +0 -0
  19. pgsui/electron/app/icons/icons/16x16.png +0 -0
  20. pgsui/electron/app/icons/icons/24x24.png +0 -0
  21. pgsui/electron/app/icons/icons/256x256.png +0 -0
  22. pgsui/electron/app/icons/icons/32x32.png +0 -0
  23. pgsui/electron/app/icons/icons/48x48.png +0 -0
  24. pgsui/electron/app/icons/icons/512x512.png +0 -0
  25. pgsui/electron/app/icons/icons/64x64.png +0 -0
  26. pgsui/electron/app/icons/icons/icon.icns +0 -0
  27. pgsui/electron/app/icons/icons/icon.ico +0 -0
  28. pgsui/electron/app/main.js +227 -0
  29. pgsui/electron/app/package-lock.json +6894 -0
  30. pgsui/electron/app/package.json +51 -0
  31. pgsui/electron/app/preload.js +15 -0
  32. pgsui/electron/app/server.py +157 -0
  33. pgsui/electron/app/ui/logo.png +0 -0
  34. pgsui/electron/app/ui/renderer.js +131 -0
  35. pgsui/electron/app/ui/styles.css +59 -0
  36. pgsui/electron/app/ui/ui_shim.js +72 -0
  37. pgsui/electron/bootstrap.py +43 -0
  38. pgsui/electron/launch.py +57 -0
  39. pgsui/electron/package.json +14 -0
  40. pgsui/example_data/__init__.py +0 -0
  41. pgsui/example_data/phylip_files/__init__.py +0 -0
  42. pgsui/example_data/phylip_files/test.phy +0 -0
  43. pgsui/example_data/popmaps/__init__.py +0 -0
  44. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  45. pgsui/example_data/structure_files/__init__.py +0 -0
  46. pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
  47. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  48. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  49. pgsui/impute/__init__.py +0 -0
  50. pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
  51. pgsui/impute/deterministic/imputers/mode.py +844 -0
  52. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  53. pgsui/impute/deterministic/imputers/phylo.py +973 -0
  54. pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
  55. pgsui/impute/supervised/__init__.py +0 -0
  56. pgsui/impute/supervised/base.py +343 -0
  57. pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  58. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
  59. pgsui/impute/supervised/imputers/random_forest.py +291 -0
  60. pgsui/impute/unsupervised/__init__.py +0 -0
  61. pgsui/impute/unsupervised/base.py +1121 -0
  62. pgsui/impute/unsupervised/callbacks.py +92 -262
  63. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
  64. pgsui/impute/unsupervised/imputers/autoencoder.py +1361 -0
  65. pgsui/impute/unsupervised/imputers/nlpca.py +1666 -0
  66. pgsui/impute/unsupervised/imputers/ubp.py +1660 -0
  67. pgsui/impute/unsupervised/imputers/vae.py +1316 -0
  68. pgsui/impute/unsupervised/loss_functions.py +261 -0
  69. pgsui/impute/unsupervised/models/__init__.py +0 -0
  70. pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
  71. pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
  72. pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
  73. pgsui/impute/unsupervised/models/vae_model.py +269 -630
  74. pgsui/impute/unsupervised/nn_scorers.py +255 -0
  75. pgsui/utils/__init__.py +0 -0
  76. pgsui/utils/classification_viz.py +608 -0
  77. pgsui/utils/logging_utils.py +22 -0
  78. pgsui/utils/misc.py +35 -480
  79. pgsui/utils/plotting.py +996 -829
  80. pgsui/utils/pretty_metrics.py +290 -0
  81. pgsui/utils/scorers.py +213 -666
  82. pg_sui-0.2.3.dist-info/METADATA +0 -322
  83. pg_sui-0.2.3.dist-info/RECORD +0 -75
  84. pg_sui-0.2.3.dist-info/top_level.txt +0 -3
  85. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  86. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  87. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  88. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  89. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  90. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  91. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  92. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  93. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  94. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  95. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  96. pgsui/example_data/trees/test.iqtree +0 -376
  97. pgsui/example_data/trees/test.qmat +0 -5
  98. pgsui/example_data/trees/test.rate +0 -2033
  99. pgsui/example_data/trees/test.tre +0 -1
  100. pgsui/example_data/trees/test_n10.rate +0 -19
  101. pgsui/example_data/trees/test_n100.rate +0 -109
  102. pgsui/example_data/trees/test_n500.rate +0 -509
  103. pgsui/example_data/trees/test_siterates.txt +0 -2024
  104. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  105. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  106. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  107. pgsui/example_data/vcf_files/test.vcf +0 -244
  108. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  109. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  110. pgsui/impute/estimators.py +0 -1268
  111. pgsui/impute/impute.py +0 -1463
  112. pgsui/impute/simple_imputers.py +0 -1431
  113. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
  114. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
  115. pgsui/impute/unsupervised/keras_classifiers.py +0 -697
  116. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  117. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
  118. pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
  119. pgsui/pg_sui.py +0 -261
  120. pgsui/utils/sequence_tools.py +0 -407
  121. simulation/sim_benchmarks.py +0 -333
  122. simulation/sim_treeparams.py +0 -475
  123. test/__init__.py +0 -0
  124. test/pg_sui_simtest.py +0 -215
  125. test/pg_sui_testing.py +0 -523
  126. test/test.py +0 -151
  127. test/test_pgsui.py +0 -374
  128. test/test_tkc.py +0 -185
@@ -1,697 +0,0 @@
1
- import numpy as np
2
- import tensorflow as tf
3
-
4
- from scikeras.wrappers import KerasClassifier
5
-
6
- try:
7
- from ...utils.scorers import Scorers
8
- from .models.autoencoder_model import AutoEncoderModel
9
- from .models.nlpca_model import NLPCAModel
10
- from .models.ubp_model import UBPPhase1, UBPPhase2, UBPPhase3
11
- from .models.vae_model import VAEModel
12
- from ...data_processing.transformers import (
13
- MLPTargetTransformer,
14
- UBPInputTransformer,
15
- AutoEncoderFeatureTransformer,
16
- )
17
- except (ModuleNotFoundError, ValueError, ImportError):
18
- from utils.scorers import Scorers
19
- from impute.unsupervised.neural_network_methods import NeuralNetworkMethods
20
- from impute.unsupervised.models.vae_model import (
21
- VAEModel,
22
- )
23
- from impute.unsupervised.models.autoencoder_model import AutoEncoderModel
24
- from impute.unsupervised.models.nlpca_model import NLPCAModel
25
- from impute.unsupervised.models.ubp_model import (
26
- UBPPhase1,
27
- UBPPhase2,
28
- UBPPhase3,
29
- )
30
- from data_processing.transformers import (
31
- MLPTargetTransformer,
32
- UBPInputTransformer,
33
- AutoEncoderFeatureTransformer,
34
- )
35
-
36
-
37
- class SAEClassifier(KerasClassifier):
38
- """Estimator to be used with the scikit-learn API.
39
-
40
- Args:
41
- output_shape (int): Number of units in model output layer. Defaults to None.
42
-
43
- weights_initializer (str): Kernel initializer to use for model weights. Defaults to "glorot_normal".
44
-
45
- hidden_layer_sizes (List[int]): Output unit size for each hidden layer. Should be list of length num_hidden_layers. Defaults to None.
46
-
47
- num_hidden_layers (int): Number of hidden layers to use. Defaults to 1.
48
-
49
- hidden_activation (str): Hidden activation function to use. Defaults to "elu".
50
-
51
- l1_penalty (float): L1 regularization penalty to use to reduce overfitting. Defautls to 0.01.
52
-
53
- l2_penalty (float): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
54
-
55
- dropout_rate (float): Dropout rate for each hidden layer to reduce overfitting. Defaults to 0.2.
56
-
57
- n_components (int): Number of components to use for input V. Defaults to 3.
58
-
59
- num_classes (int, optional): Number of classes in y_train. 012-encoded data should have 3 classes. Defaults to 3.
60
-
61
- kwargs (Any): Other keyword arguments to route to fit, compile, callbacks, etc. Should have the routing prefix (e.g., optimizer__learning_rate=0.01).
62
- """
63
-
64
- def __init__(
65
- self,
66
- y=None,
67
- output_shape=None,
68
- weights_initializer="glorot_normal",
69
- hidden_layer_sizes=None,
70
- num_hidden_layers=1,
71
- hidden_activation="elu",
72
- l1_penalty=0.01,
73
- l2_penalty=0.01,
74
- dropout_rate=0.2,
75
- n_components=3,
76
- sample_weight=None,
77
- missing_mask=None,
78
- num_classes=3,
79
- activate="softmax",
80
- **kwargs,
81
- ):
82
- super().__init__(**kwargs)
83
-
84
- self.y = y
85
- self.output_shape = output_shape
86
- self.weights_initializer = weights_initializer
87
- self.hidden_layer_sizes = hidden_layer_sizes
88
- self.num_hidden_layers = num_hidden_layers
89
- self.hidden_activation = hidden_activation
90
- self.l1_penalty = l1_penalty
91
- self.l2_penalty = l2_penalty
92
- self.dropout_rate = dropout_rate
93
- self.n_components = n_components
94
- self.sample_weight = sample_weight
95
- self.missing_mask = missing_mask
96
- self.num_classes = num_classes
97
- self.activate = activate
98
-
99
- self.classes_ = np.arange(self.num_classes)
100
- self.n_classes_ = self.num_classes
101
-
102
- def _keras_build_fn(self, compile_kwargs):
103
- """Build model with custom parameters.
104
-
105
- Args:
106
- compile_kwargs (Dict[str, Any]): Dictionary with parameters: values. The parameters should be passed to the class constructor, but should be captured as kwargs. They should also have the routing prefix (e.g., optimizer__learning_rate=0.01). compile_kwargs will automatically be parsed from **kwargs by KerasClassifier and sent here.
107
-
108
- Returns:
109
- tf.keras.Model: Model instance. The chosen model depends on which phase is passed to the class constructor.
110
- """
111
-
112
- ######### REMOVING THIS LINE WILL BREAK THE MODEL!!!!! ########
113
- self.classes_ = np.arange(self.num_classes)
114
-
115
- model = AutoEncoderModel(
116
- self.y,
117
- output_shape=self.output_shape,
118
- n_components=self.n_components,
119
- weights_initializer=self.weights_initializer,
120
- hidden_layer_sizes=self.hidden_layer_sizes,
121
- num_hidden_layers=self.num_hidden_layers,
122
- hidden_activation=self.hidden_activation,
123
- l1_penalty=self.l1_penalty,
124
- l2_penalty=self.l2_penalty,
125
- dropout_rate=self.dropout_rate,
126
- sample_weight=self.sample_weight,
127
- missing_mask=self.missing_mask,
128
- num_classes=self.num_classes,
129
- )
130
-
131
- model.compile(
132
- optimizer=compile_kwargs["optimizer"],
133
- loss=compile_kwargs["loss"],
134
- metrics=compile_kwargs["metrics"],
135
- run_eagerly=False,
136
- )
137
-
138
- return model
139
-
140
- @staticmethod
141
- def scorer(y_true, y_pred, **kwargs):
142
- """Scorer for grid search that masks missing data.
143
-
144
- To use this, do not specify a scoring metric when initializing the grid search object. By default if the scoring_metric option is left as None, then it uses the estimator's scoring metric (this one).
145
-
146
- Args:
147
- y_true (numpy.ndarray): True target values input to fit().
148
- y_pred (numpy.ndarray): Predicted target values from estimator. The predictions are modified by self.target_encoder().inverse_transform() before being sent here.
149
- kwargs (Any): Other parameters sent to sklearn scoring metric. Supported options include missing_mask, scoring_metric, and testing.
150
-
151
- Returns:
152
- float: Calculated score.
153
- """
154
- n_classes_ = kwargs.get("num_classes", 3)
155
- classes_ = np.arange(n_classes_)
156
- missing_mask = kwargs.get("missing_mask")
157
-
158
- num_classes = kwargs.get("num_classes", 3)
159
- testing = kwargs.get("testing", False)
160
-
161
- scorers = Scorers()
162
-
163
- return scorers.scorer(
164
- y_true,
165
- y_pred,
166
- missing_mask=missing_mask,
167
- num_classes=num_classes,
168
- testing=testing,
169
- )
170
-
171
- @property
172
- def feature_encoder(self):
173
- """Handles feature input, X, before training.
174
-
175
- Returns:
176
- MLPTargetTransformer: InputTransformer object that includes fit() and transform() methods to transform input before estimator fitting.
177
- """
178
- return AutoEncoderFeatureTransformer(num_classes=self.num_classes)
179
-
180
- @property
181
- def target_encoder(self):
182
- """Handles target input and output, y_true and y_pred, both before and after training.
183
-
184
- Returns:
185
- NNOutputTransformer: NNOutputTransformer object that includes fit(), transform(), and inverse_transform() methods.
186
- """
187
- return AutoEncoderFeatureTransformer(
188
- num_classes=self.num_classes, activate=self.activate
189
- )
190
-
191
- def predict(self, X, **kwargs):
192
- """Returns predictions for the given test data.
193
-
194
- Args:
195
- X (Union[array-like, sparse matrix, dataframe] of shape (n_samples, n_features)): Training samples where n_samples is the number of samples and n_features is the number of features.
196
- kwargs (Dict[str, Any]): Extra arguments to route to ``Model.predict``\.
197
-
198
- Warnings:
199
- Passing estimator parameters as keyword arguments (aka as ``**kwargs``) to ``predict`` is not supported by the Scikit-Learn API, and will be removed in a future version of SciKeras. These parameters can also be specified by prefixing ``predict__`` to a parameter at initialization (``BaseWrapper(..., fit__batch_size=32, predict__batch_size=1000)``) or by using ``set_params`` (``est.set_params(fit__batch_size=32, predict__batch_size=1000)``\).
200
-
201
- Returns:
202
- array-like: Predictions, of shape shape (n_samples,) or (n_samples, n_outputs).
203
-
204
- Notes:
205
- Had to override predict() here in order to do the __call__ with the refined input, V_latent.
206
- """
207
- X_train = self.target_encoder_.transform(X)
208
- y_pred = self.model_(X_train, training=False)
209
- return self.target_encoder_.inverse_transform(y_pred)
210
-
211
- def get_metadata(self):
212
- """Returns a dictionary of meta-parameters generated when this transformer was fitted.
213
-
214
- Used by SciKeras to bind these parameters to the SciKeras estimator itself and make them available as inputs to the Keras model.
215
-
216
- Returns:
217
- Dict[str, Any]: Dictionary of meta-parameters generated when this transfromer was fitted.
218
- """
219
- return {
220
- "classes_": self.classes_,
221
- "n_classes_": self.n_classes_,
222
- "n_outputs_": self.n_outputs_,
223
- "n_outputs_expected_": self.n_outputs_expected_,
224
- }
225
-
226
-
227
- class VAEClassifier(KerasClassifier):
228
- """Estimator to be used with the scikit-learn API and a keras model.
229
-
230
- Args:
231
- output_shape (int): Number of units in model output layer. Defaults to None.
232
-
233
- weights_initializer (str, optional): Kernel initializer to use for model weights. Defaults to "glorot_normal".
234
-
235
- hidden_layer_sizes (List[int]): Output unit size for each hidden layer. Should be list of length num_hidden_layers. Defaults to None.
236
-
237
- num_hidden_layers (int, optional): Number of hidden layers to use. Defaults to 1.
238
-
239
- hidden_activation (str, optional): Hidden activation function to use. Defaults to "elu".
240
-
241
- l1_penalty (float, optional): L1 regularization penalty to use to reduce overfitting. Defautls to 0.01.
242
-
243
- l2_penalty (float, optional): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
244
-
245
- dropout_rate (float, optional): Dropout rate for each hidden layer to reduce overfitting. Defaults to 0.2.
246
-
247
- kl_beta (float, optional): Kullback-Liebler divergence weight (beta) to apply to KL loss. 1.0 means unweighted, 0.0 means KL loss is not applied at all. Defaults to 1.0.
248
-
249
- n_components (int, optional): Number of components to use for input V. Defaults to 3.
250
-
251
- num_classes (int, optional): Number of classes in y_train. [A,G,C,T...IUPAC codes]-encoded data should have 10 classes. Defaults to 4.
252
-
253
- activate (str or None, optional): If not None, then does the appropriate activation. Multilabel learning uses sigmoid activation, and multiclass uses softmax. If set to None, then the function assumes that the input has already been activated. Possible values include: {None, 'sigmoid', 'softmax'}. Defaults to None.
254
-
255
- kwargs (Any): Other keyword arguments to route to fit, compile, callbacks, etc. Should have the routing prefix (e.g., optimizer__learning_rate=0.01).
256
- """
257
-
258
- def __init__(
259
- self,
260
- output_shape=None,
261
- weights_initializer="glorot_normal",
262
- hidden_layer_sizes=None,
263
- num_hidden_layers=1,
264
- hidden_activation="elu",
265
- l1_penalty=0.01,
266
- l2_penalty=0.01,
267
- dropout_rate=0.2,
268
- kl_beta=1.0,
269
- n_components=3,
270
- num_classes=4,
271
- sample_weight=None,
272
- activate=None,
273
- y=None,
274
- missing_mask=None,
275
- batch_size=None,
276
- **kwargs,
277
- ):
278
- super().__init__(**kwargs)
279
-
280
- self.output_shape = output_shape
281
- self.weights_initializer = weights_initializer
282
- self.hidden_layer_sizes = hidden_layer_sizes
283
- self.num_hidden_layers = num_hidden_layers
284
- self.hidden_activation = hidden_activation
285
- self.l1_penalty = l1_penalty
286
- self.l2_penalty = l2_penalty
287
- self.dropout_rate = dropout_rate
288
- self.kl_beta = kl_beta
289
- self.n_components = n_components
290
- self.num_classes = num_classes
291
- self.sample_weight = sample_weight
292
- self.activate = activate
293
- self.y = y
294
- self.missing_mask = missing_mask
295
- self.batch_size = batch_size
296
-
297
- def _keras_build_fn(self, compile_kwargs):
298
- """Build model with custom parameters.
299
-
300
- Args:
301
- compile_kwargs (Dict[str, Any]): Dictionary with parameters: values. The parameters should be passed to the class constructor, but should be captured as kwargs. They should also have the routing prefix (e.g., optimizer__learning_rate=0.01). compile_kwargs will automatically be parsed from **kwargs by KerasClassifier and sent here.
302
-
303
- Returns:
304
- tf.keras.Model: Model instance. The chosen model depends on which phase is passed to the class constructor.
305
- """
306
-
307
- ######### REMOVING THIS LINE WILL BREAK THE MODEL!!!!! ########
308
- self.classes_ = np.arange(self.num_classes)
309
-
310
- model = VAEModel(
311
- output_shape=self.output_shape,
312
- n_components=self.n_components,
313
- weights_initializer=self.weights_initializer,
314
- hidden_layer_sizes=self.hidden_layer_sizes,
315
- num_hidden_layers=self.num_hidden_layers,
316
- hidden_activation=self.hidden_activation,
317
- l1_penalty=self.l1_penalty,
318
- l2_penalty=self.l2_penalty,
319
- dropout_rate=self.dropout_rate,
320
- kl_beta=self.kl_beta,
321
- num_classes=self.num_classes,
322
- sample_weight=self.sample_weight,
323
- missing_mask=self.missing_mask,
324
- batch_size=self.batch_size,
325
- y=self.y,
326
- final_activation=self.activate,
327
- )
328
-
329
- model.compile(
330
- optimizer=compile_kwargs["optimizer"],
331
- loss=compile_kwargs["loss"],
332
- metrics=compile_kwargs["metrics"],
333
- run_eagerly=compile_kwargs["run_eagerly"],
334
- # sample_weight_mode="temporal",
335
- )
336
-
337
- return model
338
-
339
- @property
340
- def feature_encoder(self):
341
- """Handles feature input, X, before training.
342
-
343
- Returns:
344
- MLPTargetTransformer: InputTransformer object that includes fit() and transform() methods to transform input before estimator fitting.
345
- """
346
- return AutoEncoderFeatureTransformer(num_classes=self.num_classes)
347
-
348
- @property
349
- def target_encoder(self):
350
- """Handles target input and output, y_true and y_pred, both before and after training.
351
-
352
- Returns:
353
- NNOutputTransformer: NNOutputTransformer object that includes fit(), transform(), and inverse_transform() methods.
354
- """
355
- return AutoEncoderFeatureTransformer(
356
- num_classes=self.num_classes,
357
- activate=self.activate,
358
- )
359
-
360
- def predict(self, X, **kwargs):
361
- """Returns predictions for the given test data.
362
-
363
- Args:
364
- X (Union[array-like, sparse matrix, dataframe] of shape (n_samples, n_features)): Training samples where n_samples is the number of samples and n_features is the number of features.
365
- kwargs (Dict[str, Any]): Extra arguments to route to ``Model.predict``\.
366
-
367
- Warnings:
368
- Passing estimator parameters as keyword arguments (aka as ``**kwargs``) to ``predict`` is not supported by the Scikit-Learn API, and will be removed in a future version of SciKeras. These parameters can also be specified by prefixing ``predict__`` to a parameter at initialization (``BaseWrapper(..., fit__batch_size=32, predict__batch_size=1000)``) or by using ``set_params`` (``est.set_params(fit__batch_size=32, predict__batch_size=1000)``\).
369
-
370
- Returns:
371
- array-like: Predictions, of shape shape (n_samples,) or (n_samples, n_outputs).
372
-
373
- Notes:
374
- Had to override predict() here in order to do the __call__ with the refined input, V_latent.
375
- """
376
- X_train = self.target_encoder_.transform(X)
377
- y_pred = self.model_(X_train, training=False)
378
- return self.target_encoder_.inverse_transform(y_pred)
379
-
380
- def get_metadata(self):
381
- """Returns a dictionary of meta-parameters generated when this transformer was fitted.
382
-
383
- Used by SciKeras to bind these parameters to the SciKeras estimator itself and make them available as inputs to the Keras model.
384
-
385
- Returns:
386
- Dict[str, Any]: Dictionary of meta-parameters generated when this transfromer was fitted.
387
- """
388
- return {
389
- "classes_": self.classes_,
390
- "n_classes_": self.n_classes_,
391
- "n_outputs_": self.n_outputs_,
392
- "n_outputs_expected_": self.n_outputs_expected_,
393
- }
394
-
395
- @staticmethod
396
- def scorer(y_true, y_pred, **kwargs):
397
- """Scorer for grid search that masks missing data.
398
-
399
- To use this, do not specify a scoring metric when initializing the grid search object. By default if the scoring_metric option is left as None, then it uses the estimator's scoring metric (this one).
400
-
401
- Args:
402
- y_true (numpy.ndarray): True target values input to fit().
403
-
404
- y_pred (numpy.ndarray): Predicted target values from estimator. The predictions are modified by self.target_encoder().inverse_transform() before being sent here.
405
-
406
- kwargs (Any): Other parameters sent to sklearn scoring metric. Supported options include missing_mask, scoring_metric, and testing.
407
-
408
- Returns:
409
- float: Calculated score.
410
- """
411
-
412
- n_classes_ = kwargs.get("num_classes", 3)
413
- classes_ = np.arange(n_classes_)
414
- missing_mask = kwargs.get("missing_mask")
415
-
416
- num_classes = kwargs.get("num_classes", 3)
417
- testing = kwargs.get("testing", False)
418
-
419
- y_pred = y_pred.reshape(y_pred.shape[0], -1, num_classes)
420
-
421
- scorers = Scorers()
422
-
423
- return scorers.scorer(
424
- y_true,
425
- y_pred,
426
- missing_mask=missing_mask,
427
- num_classes=num_classes,
428
- testing=testing,
429
- )
430
-
431
-
432
- class MLPClassifier(KerasClassifier):
433
- """Estimator to be used with the scikit-learn API.
434
-
435
- Args:
436
- V (numpy.ndarray or Dict[str, Any]): Input X values of shape (n_samples, n_components). If a dictionary is passed, each key: value pair should have randomly initialized values for n_components: V. self.feature_encoder() will parse it and select the key: value pair with the current n_components. This allows n_components to be grid searched using GridSearchCV. Otherwise, it throws an error that the dimensions are off. Defaults to None.
437
-
438
- y_train (numpy.ndarray): One-hot encoded target data. Defaults to None.
439
-
440
- ubp_weights (tensorflow.Tensor): Weights from UBP model. Fetched by doing model.get_weights() on phase 2 model. Only used if phase 3. Defaults to None.
441
-
442
- batch_size (int): Batch size to train with. Defaults to 32.
443
-
444
- missing_mask (np.ndarray): Missing mask with missing values set to False (0) and observed values as True (1). Defaults to None. Defaults to None.
445
-
446
- output_shape (int): Number of units in model output layer. Defaults to None.
447
-
448
- weights_initializer (str): Kernel initializer to use for model weights. Defaults to "glorot_normal".
449
-
450
- hidden_layer_sizes (List[int]): Output unit size for each hidden layer. Should be list of length num_hidden_layers. Defaults to None.
451
-
452
- num_hidden_layers (int): Number of hidden layers to use. Defaults to 1.
453
-
454
- hidden_activation (str): Hidden activation function to use. Defaults to "elu".
455
-
456
- l1_penalty (float): L1 regularization penalty to use to reduce overfitting. Defautls to 0.01.
457
-
458
- l2_penalty (float): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
459
-
460
- dropout_rate (float): Dropout rate for each hidden layer to reduce overfitting. Defaults to 0.2.
461
-
462
- num_classes (int): Number of classes in output predictions. Defaults to 3.
463
-
464
- phase (int or None): Current phase (if doing UBP), or None if doing NLPCA. Defults to None.
465
-
466
- sample_weight (numpy.ndarray): Sample weight matrix for reducing the impact of class imbalance. Should be of shape (n_samples, n_features).
467
-
468
- n_components (int): Number of components to use for input V. Defaults to 3.
469
-
470
- kwargs (Any): Other keyword arguments to route to fit, compile, callbacks, etc. Should have the routing prefix (e.g., optimizer__learning_rate=0.01).
471
- """
472
-
473
- def __init__(
474
- self,
475
- V,
476
- y_train,
477
- ubp_weights=None,
478
- batch_size=32,
479
- missing_mask=None,
480
- output_shape=None,
481
- weights_initializer="glorot_normal",
482
- hidden_layer_sizes=None,
483
- num_hidden_layers=1,
484
- hidden_activation="elu",
485
- l1_penalty=0.01,
486
- l2_penalty=0.01,
487
- dropout_rate=0.2,
488
- num_classes=3,
489
- phase=None,
490
- sample_weight=None,
491
- n_components=3,
492
- **kwargs,
493
- ):
494
- super().__init__(**kwargs)
495
- self.V = V
496
- self.y_train = y_train
497
- self.ubp_weights = ubp_weights
498
- self.batch_size = batch_size
499
- self.missing_mask = missing_mask
500
- self.output_shape = output_shape
501
- self.weights_initializer = weights_initializer
502
- self.hidden_layer_sizes = hidden_layer_sizes
503
- self.num_hidden_layers = num_hidden_layers
504
- self.hidden_activation = hidden_activation
505
- self.l1_penalty = l1_penalty
506
- self.l2_penalty = l2_penalty
507
- self.dropout_rate = dropout_rate
508
- self.num_classes = num_classes
509
- self.phase = phase
510
- self.sample_weight = sample_weight
511
- self.n_components = n_components
512
-
513
- def _keras_build_fn(self, compile_kwargs):
514
- """Build model with custom parameters.
515
-
516
- Args:
517
- compile_kwargs (Dict[str, Any]): Dictionary with parameters: values. The parameters should be passed to the class constructor, but should be captured as kwargs. They should also have the routing prefix (e.g., optimizer__learning_rate=0.01). compile_kwargs will automatically be parsed from **kwargs by KerasClassifier and sent here.
518
-
519
- Returns:
520
- tf.keras.Model: Model instance. The chosen model depends on which phase is passed to the class constructor.
521
- """
522
- ######### REMOVING THIS LINE WILL BREAK THE MODEL!!!!! ########
523
- self.classes_ = np.arange(self.num_classes)
524
-
525
- if self.phase is None:
526
- model = NLPCAModel(
527
- V=self.V,
528
- y=self.y_train,
529
- batch_size=self.batch_size,
530
- missing_mask=self.missing_mask,
531
- output_shape=self.output_shape,
532
- n_components=self.n_components,
533
- weights_initializer=self.weights_initializer,
534
- hidden_layer_sizes=self.hidden_layer_sizes,
535
- num_hidden_layers=self.num_hidden_layers,
536
- hidden_activation=self.hidden_activation,
537
- l1_penalty=self.l1_penalty,
538
- l2_penalty=self.l2_penalty,
539
- dropout_rate=self.dropout_rate,
540
- num_classes=self.num_classes,
541
- phase=self.phase,
542
- sample_weight=self.sample_weight,
543
- )
544
-
545
- elif self.phase == 1:
546
- model = UBPPhase1(
547
- V=self.V,
548
- y=self.y_train,
549
- batch_size=self.batch_size,
550
- missing_mask=self.missing_mask,
551
- output_shape=self.output_shape,
552
- n_components=self.n_components,
553
- weights_initializer=self.weights_initializer,
554
- hidden_layer_sizes=self.hidden_layer_sizes,
555
- num_hidden_layers=self.num_hidden_layers,
556
- l1_penalty=self.l1_penalty,
557
- l2_penalty=self.l2_penalty,
558
- dropout_rate=self.dropout_rate,
559
- num_classes=self.num_classes,
560
- phase=self.phase,
561
- )
562
-
563
- elif self.phase == 2:
564
- model = UBPPhase2(
565
- V=self.V,
566
- y=self.y_train,
567
- batch_size=self.batch_size,
568
- missing_mask=self.missing_mask,
569
- output_shape=self.output_shape,
570
- n_components=self.n_components,
571
- weights_initializer=self.weights_initializer,
572
- hidden_layer_sizes=self.hidden_layer_sizes,
573
- num_hidden_layers=self.num_hidden_layers,
574
- hidden_activation=self.hidden_activation,
575
- l1_penalty=self.l1_penalty,
576
- l2_penalty=self.l2_penalty,
577
- dropout_rate=self.dropout_rate,
578
- num_classes=self.num_classes,
579
- phase=self.phase,
580
- )
581
-
582
- elif self.phase == 3:
583
- model = UBPPhase3(
584
- V=self.V,
585
- y=self.y_train,
586
- batch_size=self.batch_size,
587
- missing_mask=self.missing_mask,
588
- output_shape=self.output_shape,
589
- n_components=self.n_components,
590
- weights_initializer=self.weights_initializer,
591
- hidden_layer_sizes=self.hidden_layer_sizes,
592
- num_hidden_layers=self.num_hidden_layers,
593
- hidden_activation=self.hidden_activation,
594
- dropout_rate=self.dropout_rate,
595
- num_classes=self.num_classes,
596
- phase=self.phase,
597
- )
598
-
599
- model.build((None, self.n_components))
600
-
601
- model.compile(
602
- optimizer=compile_kwargs["optimizer"],
603
- loss=compile_kwargs["loss"],
604
- metrics=compile_kwargs["metrics"],
605
- run_eagerly=True,
606
- )
607
-
608
- model.set_model_outputs()
609
-
610
- if self.phase == 3:
611
- model.set_weights(self.ubp_weights)
612
-
613
- return model
614
-
615
- @staticmethod
616
- def scorer(y_true, y_pred, **kwargs):
617
- """Scorer for grid search that masks missing data.
618
-
619
- To use this, do not specify a scoring metric when initializing the grid search object. By default if the scoring_metric option is left as None, then it uses the estimator's scoring metric (this one).
620
-
621
- Args:
622
- y_true (numpy.ndarray): True target values input to fit().
623
-
624
- y_pred (numpy.ndarray): Predicted target values from estimator. The predictions are modified by self.target_encoder().inverse_transform() before being sent here.
625
-
626
- kwargs (Any): Other parameters sent to sklearn scoring metric. Supported options include missing_mask, scoring_metric, and testing.
627
-
628
- Returns:
629
- float: Calculated score.
630
- """
631
- missing_mask = kwargs.get(
632
- "missing_mask", np.ones(y_true.shape, dtype=bool)
633
- )
634
- num_classes = kwargs.get("num_classes", 3)
635
- testing = kwargs.get("testing", False)
636
-
637
- scorers = Scorers()
638
-
639
- return scorers.scorer(
640
- y_true,
641
- y_pred,
642
- missing_mask=missing_mask,
643
- num_classes=num_classes,
644
- testing=testing,
645
- )
646
-
647
- @property
648
- def feature_encoder(self):
649
- """Handles feature input, X, before training.
650
-
651
- Returns:
652
- UBPInputTransformer: InputTransformer object that includes fit() and transform() methods to transform input before estimator fitting.
653
- """
654
- return UBPInputTransformer(self.n_components, self.V)
655
-
656
- @property
657
- def target_encoder(self):
658
- """Handles target input and output, y_true and y_pred, both before and after training.
659
-
660
- Returns:
661
- NNOutputTransformer: NNOutputTransformer object that includes fit(), transform(), and inverse_transform() methods.
662
- """
663
- return MLPTargetTransformer()
664
-
665
- def predict(self, X, **kwargs):
666
- """Returns predictions for the given test data.
667
-
668
- Args:
669
- X (Union[array-like, sparse matrix, dataframe] of shape (n_samples, n_features)): Training samples where n_samples is the number of samples and n_features is the number of features.
670
- kwargs (Dict[str, Any]): Extra arguments to route to ``Model.predict``\.
671
-
672
- Warnings:
673
- Passing estimator parameters as keyword arguments (aka as ``**kwargs``) to ``predict`` is not supported by the Scikit-Learn API, and will be removed in a future version of SciKeras. These parameters can also be specified by prefixing ``predict__`` to a parameter at initialization (``BaseWrapper(..., fit__batch_size=32, predict__batch_size=1000)``) or by using ``set_params`` (``est.set_params(fit__batch_size=32, predict__batch_size=1000)``\).
674
-
675
- Returns:
676
- array-like: Predictions, of shape shape (n_samples,) or (n_samples, n_outputs).
677
-
678
- Notes:
679
- Had to override predict() here in order to do the __call__ with the refined input, V_latent.
680
- """
681
- y_pred_proba = self.model_(self.model_.V_latent, training=False)
682
- return self.target_encoder_.inverse_transform(y_pred_proba)
683
-
684
- def get_metadata(self):
685
- """Returns a dictionary of meta-parameters generated when this transformer was fitted.
686
-
687
- Used by SciKeras to bind these parameters to the SciKeras estimator itself and make them available as inputs to the Keras model.
688
-
689
- Returns:
690
- Dict[str, Any]: Dictionary of meta-parameters generated when this transfromer was fitted.
691
- """
692
- return {
693
- "classes_": self.classes_,
694
- "n_classes_": self.n_classes_,
695
- "n_outputs_": self.n_outputs_,
696
- "n_outputs_expected_": self.n_outputs_expected_,
697
- }