pg-sui 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pg-sui might be problematic. Click here for more details.

Files changed (112) hide show
  1. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/METADATA +51 -70
  2. pg_sui-1.6.8.dist-info/RECORD +78 -0
  3. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.8.dist-info/entry_points.txt +4 -0
  5. pg_sui-1.6.8.dist-info/top_level.txt +1 -0
  6. pgsui/__init__.py +35 -54
  7. pgsui/_version.py +34 -0
  8. pgsui/cli.py +635 -0
  9. pgsui/data_processing/config.py +576 -0
  10. pgsui/data_processing/containers.py +1782 -0
  11. pgsui/data_processing/transformers.py +121 -1103
  12. pgsui/electron/app/__main__.py +5 -0
  13. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  14. pgsui/electron/app/icons/icons/128x128.png +0 -0
  15. pgsui/electron/app/icons/icons/16x16.png +0 -0
  16. pgsui/electron/app/icons/icons/24x24.png +0 -0
  17. pgsui/electron/app/icons/icons/256x256.png +0 -0
  18. pgsui/electron/app/icons/icons/32x32.png +0 -0
  19. pgsui/electron/app/icons/icons/48x48.png +0 -0
  20. pgsui/electron/app/icons/icons/512x512.png +0 -0
  21. pgsui/electron/app/icons/icons/64x64.png +0 -0
  22. pgsui/electron/app/icons/icons/icon.icns +0 -0
  23. pgsui/electron/app/icons/icons/icon.ico +0 -0
  24. pgsui/electron/app/main.js +189 -0
  25. pgsui/electron/app/package-lock.json +6893 -0
  26. pgsui/electron/app/package.json +50 -0
  27. pgsui/electron/app/preload.js +15 -0
  28. pgsui/electron/app/server.py +146 -0
  29. pgsui/electron/app/ui/logo.png +0 -0
  30. pgsui/electron/app/ui/renderer.js +130 -0
  31. pgsui/electron/app/ui/styles.css +59 -0
  32. pgsui/electron/app/ui/ui_shim.js +72 -0
  33. pgsui/electron/bootstrap.py +43 -0
  34. pgsui/electron/launch.py +59 -0
  35. pgsui/electron/package.json +14 -0
  36. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  37. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  38. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  39. pgsui/impute/deterministic/imputers/allele_freq.py +691 -0
  40. pgsui/impute/deterministic/imputers/mode.py +679 -0
  41. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  42. pgsui/impute/deterministic/imputers/phylo.py +971 -0
  43. pgsui/impute/deterministic/imputers/ref_allele.py +530 -0
  44. pgsui/impute/supervised/base.py +339 -0
  45. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +293 -0
  46. pgsui/impute/supervised/imputers/random_forest.py +287 -0
  47. pgsui/impute/unsupervised/base.py +924 -0
  48. pgsui/impute/unsupervised/callbacks.py +89 -263
  49. pgsui/impute/unsupervised/imputers/autoencoder.py +972 -0
  50. pgsui/impute/unsupervised/imputers/nlpca.py +1264 -0
  51. pgsui/impute/unsupervised/imputers/ubp.py +1288 -0
  52. pgsui/impute/unsupervised/imputers/vae.py +957 -0
  53. pgsui/impute/unsupervised/loss_functions.py +158 -0
  54. pgsui/impute/unsupervised/models/autoencoder_model.py +208 -558
  55. pgsui/impute/unsupervised/models/nlpca_model.py +149 -468
  56. pgsui/impute/unsupervised/models/ubp_model.py +198 -1317
  57. pgsui/impute/unsupervised/models/vae_model.py +259 -618
  58. pgsui/impute/unsupervised/nn_scorers.py +215 -0
  59. pgsui/utils/classification_viz.py +591 -0
  60. pgsui/utils/misc.py +35 -480
  61. pgsui/utils/plotting.py +514 -824
  62. pgsui/utils/scorers.py +212 -438
  63. pg_sui-1.0.2.1.dist-info/RECORD +0 -75
  64. pg_sui-1.0.2.1.dist-info/top_level.txt +0 -3
  65. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  66. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  67. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  68. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  69. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  70. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  71. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  72. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  73. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  74. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  75. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  76. pgsui/example_data/trees/test.iqtree +0 -376
  77. pgsui/example_data/trees/test.qmat +0 -5
  78. pgsui/example_data/trees/test.rate +0 -2033
  79. pgsui/example_data/trees/test.tre +0 -1
  80. pgsui/example_data/trees/test_n10.rate +0 -19
  81. pgsui/example_data/trees/test_n100.rate +0 -109
  82. pgsui/example_data/trees/test_n500.rate +0 -509
  83. pgsui/example_data/trees/test_siterates.txt +0 -2024
  84. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  85. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  86. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  87. pgsui/example_data/vcf_files/test.vcf +0 -244
  88. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  89. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  90. pgsui/impute/estimators.py +0 -735
  91. pgsui/impute/impute.py +0 -1486
  92. pgsui/impute/simple_imputers.py +0 -1439
  93. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -785
  94. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1027
  95. pgsui/impute/unsupervised/keras_classifiers.py +0 -702
  96. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  97. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1424
  98. pgsui/impute/unsupervised/neural_network_methods.py +0 -1549
  99. pgsui/pg_sui.py +0 -261
  100. pgsui/utils/sequence_tools.py +0 -407
  101. simulation/sim_benchmarks.py +0 -333
  102. simulation/sim_treeparams.py +0 -475
  103. test/__init__.py +0 -0
  104. test/pg_sui_simtest.py +0 -215
  105. test/pg_sui_testing.py +0 -523
  106. test/test.py +0 -297
  107. test/test_pgsui.py +0 -374
  108. test/test_tkc.py +0 -214
  109. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info/licenses}/LICENSE +0 -0
  110. /pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  111. /pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  112. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
@@ -1,702 +0,0 @@
1
- import numpy as np
2
- import tensorflow as tf
3
-
4
- from scikeras.wrappers import KerasClassifier
5
-
6
- try:
7
- from ...utils.scorers import Scorers
8
- from .models.autoencoder_model import AutoEncoderModel
9
- from .models.nlpca_model import NLPCAModel
10
- from .models.ubp_model import UBPPhase1, UBPPhase2, UBPPhase3
11
- from .models.vae_model import VAEModel
12
- from ...data_processing.transformers import (
13
- MLPTargetTransformer,
14
- UBPInputTransformer,
15
- AutoEncoderFeatureTransformer,
16
- )
17
- except (ModuleNotFoundError, ValueError, ImportError):
18
- from utils.scorers import Scorers
19
- from impute.unsupervised.neural_network_methods import NeuralNetworkMethods
20
- from impute.unsupervised.models.vae_model import (
21
- VAEModel,
22
- )
23
- from impute.unsupervised.models.autoencoder_model import AutoEncoderModel
24
- from impute.unsupervised.models.nlpca_model import NLPCAModel
25
- from impute.unsupervised.models.ubp_model import (
26
- UBPPhase1,
27
- UBPPhase2,
28
- UBPPhase3,
29
- )
30
- from data_processing.transformers import (
31
- MLPTargetTransformer,
32
- UBPInputTransformer,
33
- AutoEncoderFeatureTransformer,
34
- )
35
-
36
-
37
- class SAEClassifier(KerasClassifier):
38
- """Estimator to be used with the scikit-learn API.
39
-
40
- Args:
41
- output_shape (int): Number of units in model output layer. Defaults to None.
42
-
43
- weights_initializer (str): Kernel initializer to use for model weights. Defaults to "glorot_normal".
44
-
45
- hidden_layer_sizes (List[int]): Output unit size for each hidden layer. Should be list of length num_hidden_layers. Defaults to None.
46
-
47
- num_hidden_layers (int): Number of hidden layers to use. Defaults to 1.
48
-
49
- hidden_activation (str): Hidden activation function to use. Defaults to "elu".
50
-
51
- l1_penalty (float): L1 regularization penalty to use to reduce overfitting. Defautls to 0.01.
52
-
53
- l2_penalty (float): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
54
-
55
- dropout_rate (float): Dropout rate for each hidden layer to reduce overfitting. Defaults to 0.2.
56
-
57
- n_components (int): Number of components to use for input V. Defaults to 3.
58
-
59
- num_classes (int, optional): Number of classes in y_train. 012-encoded data should have 3 classes. Defaults to 3.
60
-
61
- kwargs (Any): Other keyword arguments to route to fit, compile, callbacks, etc. Should have the routing prefix (e.g., optimizer__learning_rate=0.01).
62
- """
63
-
64
- def __init__(
65
- self,
66
- y=None,
67
- output_shape=None,
68
- weights_initializer="glorot_normal",
69
- hidden_layer_sizes=None,
70
- num_hidden_layers=1,
71
- hidden_activation="elu",
72
- l1_penalty=0.01,
73
- l2_penalty=0.01,
74
- dropout_rate=0.2,
75
- n_components=3,
76
- sample_weight=None,
77
- missing_mask=None,
78
- num_classes=4,
79
- activate="sigmoid",
80
- **kwargs,
81
- ):
82
- super().__init__(**kwargs)
83
-
84
- self.y = y
85
- self.output_shape = output_shape
86
- self.weights_initializer = weights_initializer
87
- self.hidden_layer_sizes = hidden_layer_sizes
88
- self.num_hidden_layers = num_hidden_layers
89
- self.hidden_activation = hidden_activation
90
- self.l1_penalty = l1_penalty
91
- self.l2_penalty = l2_penalty
92
- self.dropout_rate = dropout_rate
93
- self.n_components = n_components
94
- self.sample_weight = sample_weight
95
- self.missing_mask = missing_mask
96
- self.num_classes = num_classes
97
- self.activate = activate
98
-
99
- self.classes_ = np.arange(self.num_classes)
100
- self.n_classes_ = self.num_classes
101
-
102
- def _keras_build_fn(self, compile_kwargs):
103
- """Build model with custom parameters.
104
-
105
- Args:
106
- compile_kwargs (Dict[str, Any]): Dictionary with parameters: values. The parameters should be passed to the class constructor, but should be captured as kwargs. They should also have the routing prefix (e.g., optimizer__learning_rate=0.01). compile_kwargs will automatically be parsed from **kwargs by KerasClassifier and sent here.
107
-
108
- Returns:
109
- tf.keras.Model: Model instance. The chosen model depends on which phase is passed to the class constructor.
110
- """
111
-
112
- ######### REMOVING THIS LINE WILL BREAK THE MODEL!!!!! ########
113
- self.classes_ = np.arange(self.num_classes)
114
-
115
- model = AutoEncoderModel(
116
- self.y,
117
- output_shape=self.output_shape,
118
- n_components=self.n_components,
119
- weights_initializer=self.weights_initializer,
120
- hidden_layer_sizes=self.hidden_layer_sizes,
121
- num_hidden_layers=self.num_hidden_layers,
122
- hidden_activation=self.hidden_activation,
123
- l1_penalty=self.l1_penalty,
124
- l2_penalty=self.l2_penalty,
125
- dropout_rate=self.dropout_rate,
126
- sample_weight=self.sample_weight,
127
- missing_mask=self.missing_mask,
128
- num_classes=self.num_classes,
129
- )
130
-
131
- model.compile(
132
- optimizer=compile_kwargs["optimizer"],
133
- loss=compile_kwargs["loss"],
134
- metrics=compile_kwargs["metrics"],
135
- run_eagerly=False,
136
- )
137
-
138
- return model
139
-
140
- @staticmethod
141
- def scorer(y_true, y_pred, **kwargs):
142
- """Scorer for grid search that masks missing data.
143
-
144
- To use this, do not specify a scoring metric when initializing the grid search object. By default if the scoring_metric option is left as None, then it uses the estimator's scoring metric (this one).
145
-
146
- Args:
147
- y_true (numpy.ndarray): True target values input to fit().
148
- y_pred (numpy.ndarray): Predicted target values from estimator. The predictions are modified by self.target_encoder().inverse_transform() before being sent here.
149
- kwargs (Any): Other parameters sent to sklearn scoring metric. Supported options include missing_mask, scoring_metric, and testing.
150
-
151
- Returns:
152
- float: Calculated score.
153
- """
154
- n_classes_ = kwargs.get("num_classes", 3)
155
- classes_ = np.arange(n_classes_)
156
- missing_mask = kwargs.get("missing_mask")
157
-
158
- num_classes = kwargs.get("num_classes", 3)
159
- testing = kwargs.get("testing", False)
160
-
161
- scorers = Scorers()
162
-
163
- return scorers.scorer(
164
- y_true,
165
- y_pred,
166
- missing_mask=missing_mask,
167
- num_classes=num_classes,
168
- testing=testing,
169
- )
170
-
171
- @property
172
- def feature_encoder(self):
173
- """Handles feature input, X, before training.
174
-
175
- Returns:
176
- MLPTargetTransformer: InputTransformer object that includes fit() and transform() methods to transform input before estimator fitting.
177
- """
178
- return AutoEncoderFeatureTransformer(num_classes=self.num_classes)
179
-
180
- @property
181
- def target_encoder(self):
182
- """Handles target input and output, y_true and y_pred, both before and after training.
183
-
184
- Returns:
185
- NNOutputTransformer: NNOutputTransformer object that includes fit(), transform(), and inverse_transform() methods.
186
- """
187
- return AutoEncoderFeatureTransformer(
188
- num_classes=self.num_classes, activate=self.activate
189
- )
190
-
191
- def predict(self, X, **kwargs):
192
- """Returns predictions for the given test data.
193
-
194
- Args:
195
- X (Union[array-like, sparse matrix, dataframe] of shape (n_samples, n_features)): Training samples where n_samples is the number of samples and n_features is the number of features.
196
- kwargs (Dict[str, Any]): Extra arguments to route to ``Model.predict``\.
197
-
198
- Warnings:
199
- Passing estimator parameters as keyword arguments (aka as ``**kwargs``) to ``predict`` is not supported by the Scikit-Learn API, and will be removed in a future version of SciKeras. These parameters can also be specified by prefixing ``predict__`` to a parameter at initialization (``BaseWrapper(..., fit__batch_size=32, predict__batch_size=1000)``) or by using ``set_params`` (``est.set_params(fit__batch_size=32, predict__batch_size=1000)``\).
200
-
201
- Returns:
202
- array-like: Predictions, of shape shape (n_samples,) or (n_samples, n_outputs).
203
-
204
- Notes:
205
- Had to override predict() here in order to do the __call__ with the refined input, V_latent.
206
- """
207
- X_train = self.target_encoder_.transform(X)
208
- y_pred = self.model_(X_train, training=False)
209
- return self.target_encoder_.inverse_transform(y_pred)
210
-
211
- def get_metadata(self):
212
- """Returns a dictionary of meta-parameters generated when this transformer was fitted.
213
-
214
- Used by SciKeras to bind these parameters to the SciKeras estimator itself and make them available as inputs to the Keras model.
215
-
216
- Returns:
217
- Dict[str, Any]: Dictionary of meta-parameters generated when this transfromer was fitted.
218
- """
219
- return {
220
- "classes_": self.classes_,
221
- "n_classes_": self.n_classes_,
222
- "n_outputs_": self.n_outputs_,
223
- "n_outputs_expected_": self.n_outputs_expected_,
224
- }
225
-
226
-
227
- class VAEClassifier(KerasClassifier):
228
- """Estimator to be used with the scikit-learn API and a keras model.
229
-
230
- Args:
231
- output_shape (int): Number of units in model output layer. Defaults to None.
232
-
233
- weights_initializer (str, optional): Kernel initializer to use for model weights. Defaults to "glorot_normal".
234
-
235
- hidden_layer_sizes (List[int]): Output unit size for each hidden layer. Should be list of length num_hidden_layers. Defaults to None.
236
-
237
- num_hidden_layers (int, optional): Number of hidden layers to use. Defaults to 1.
238
-
239
- hidden_activation (str, optional): Hidden activation function to use. Defaults to "elu".
240
-
241
- l1_penalty (float, optional): L1 regularization penalty to use to reduce overfitting. Defautls to 0.01.
242
-
243
- l2_penalty (float, optional): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
244
-
245
- dropout_rate (float, optional): Dropout rate for each hidden layer to reduce overfitting. Defaults to 0.2.
246
-
247
- kl_beta (float, optional): Kullback-Liebler divergence weight (beta) to apply to KL loss. 1.0 means unweighted, 0.0 means KL loss is not applied at all. Defaults to 1.0.
248
-
249
- n_components (int, optional): Number of components to use for input V. Defaults to 3.
250
-
251
- num_classes (int, optional): Number of classes in y_train. [A,G,C,T...IUPAC codes]-encoded data should have 10 classes. Defaults to 4.
252
-
253
- activate (str or None, optional): If not None, then does the appropriate activation. Multilabel learning uses sigmoid activation, and multiclass uses softmax. If set to None, then the function assumes that the input has already been activated. Possible values include: {None, 'sigmoid', 'softmax'}. Defaults to None.
254
-
255
- kwargs (Any): Other keyword arguments to route to fit, compile, callbacks, etc. Should have the routing prefix (e.g., optimizer__learning_rate=0.01).
256
- """
257
-
258
- def __init__(
259
- self,
260
- output_shape=None,
261
- weights_initializer="glorot_normal",
262
- hidden_layer_sizes=None,
263
- num_hidden_layers=1,
264
- hidden_activation="elu",
265
- l1_penalty=0.01,
266
- l2_penalty=0.01,
267
- dropout_rate=0.2,
268
- kl_beta=1.0,
269
- n_components=3,
270
- num_classes=4,
271
- sample_weight=None,
272
- activate=None,
273
- y=None,
274
- missing_mask=None,
275
- batch_size=None,
276
- **kwargs,
277
- ):
278
- super().__init__(**kwargs)
279
-
280
- self.output_shape = output_shape
281
- self.weights_initializer = weights_initializer
282
- self.hidden_layer_sizes = hidden_layer_sizes
283
- self.num_hidden_layers = num_hidden_layers
284
- self.hidden_activation = hidden_activation
285
- self.l1_penalty = l1_penalty
286
- self.l2_penalty = l2_penalty
287
- self.dropout_rate = dropout_rate
288
- self.kl_beta = kl_beta
289
- self.n_components = n_components
290
- self.num_classes = num_classes
291
- self.sample_weight = sample_weight
292
- self.activate = activate
293
- self.y = y
294
- self.missing_mask = missing_mask
295
- self.batch_size = batch_size
296
-
297
- def _keras_build_fn(self, compile_kwargs):
298
- """Build model with custom parameters.
299
-
300
- Args:
301
- compile_kwargs (Dict[str, Any]): Dictionary with parameters: values. The parameters should be passed to the class constructor, but should be captured as kwargs. They should also have the routing prefix (e.g., optimizer__learning_rate=0.01). compile_kwargs will automatically be parsed from **kwargs by KerasClassifier and sent here.
302
-
303
- Returns:
304
- tf.keras.Model: Model instance. The chosen model depends on which phase is passed to the class constructor.
305
- """
306
-
307
- ######### REMOVING THIS LINE WILL BREAK THE MODEL!!!!! ########
308
- self.classes_ = np.arange(self.num_classes)
309
-
310
- model = VAEModel(
311
- output_shape=self.output_shape,
312
- n_components=self.n_components,
313
- weights_initializer=self.weights_initializer,
314
- hidden_layer_sizes=self.hidden_layer_sizes,
315
- num_hidden_layers=self.num_hidden_layers,
316
- hidden_activation=self.hidden_activation,
317
- l1_penalty=self.l1_penalty,
318
- l2_penalty=self.l2_penalty,
319
- dropout_rate=self.dropout_rate,
320
- kl_beta=self.kl_beta,
321
- num_classes=self.num_classes,
322
- sample_weight=self.sample_weight,
323
- missing_mask=self.missing_mask,
324
- batch_size=self.batch_size,
325
- y=self.y,
326
- final_activation=self.activate,
327
- )
328
-
329
- model.compile(
330
- optimizer=compile_kwargs["optimizer"],
331
- loss=compile_kwargs["loss"],
332
- metrics=compile_kwargs["metrics"],
333
- run_eagerly=compile_kwargs["run_eagerly"],
334
- # sample_weight_mode="temporal",
335
- )
336
-
337
- return model
338
-
339
- @property
340
- def feature_encoder(self):
341
- """Handles feature input, X, before training.
342
-
343
- Returns:
344
- MLPTargetTransformer: InputTransformer object that includes fit() and transform() methods to transform input before estimator fitting.
345
- """
346
- return AutoEncoderFeatureTransformer(num_classes=self.num_classes)
347
-
348
- @property
349
- def target_encoder(self):
350
- """Handles target input and output, y_true and y_pred, both before and after training.
351
-
352
- Returns:
353
- NNOutputTransformer: NNOutputTransformer object that includes fit(), transform(), and inverse_transform() methods.
354
- """
355
- return AutoEncoderFeatureTransformer(
356
- num_classes=self.num_classes,
357
- activate=self.activate,
358
- )
359
-
360
- def predict(self, X, **kwargs):
361
- """Returns predictions for the given test data.
362
-
363
- Args:
364
- X (Union[array-like, sparse matrix, dataframe] of shape (n_samples, n_features)): Training samples where n_samples is the number of samples and n_features is the number of features.
365
- kwargs (Dict[str, Any]): Extra arguments to route to ``Model.predict``\.
366
-
367
- Warnings:
368
- Passing estimator parameters as keyword arguments (aka as ``**kwargs``) to ``predict`` is not supported by the Scikit-Learn API, and will be removed in a future version of SciKeras. These parameters can also be specified by prefixing ``predict__`` to a parameter at initialization (``BaseWrapper(..., fit__batch_size=32, predict__batch_size=1000)``) or by using ``set_params`` (``est.set_params(fit__batch_size=32, predict__batch_size=1000)``\).
369
-
370
- Returns:
371
- array-like: Predictions, of shape shape (n_samples,) or (n_samples, n_outputs).
372
-
373
- Notes:
374
- Had to override predict() here in order to do the __call__ with the refined input, V_latent.
375
- """
376
- X_train = self.target_encoder_.transform(X)
377
- y_pred = self.model_(X_train, training=False)
378
- return self.target_encoder_.inverse_transform(y_pred)
379
-
380
- def get_metadata(self):
381
- """Returns a dictionary of meta-parameters generated when this transformer was fitted.
382
-
383
- Used by SciKeras to bind these parameters to the SciKeras estimator itself and make them available as inputs to the Keras model.
384
-
385
- Returns:
386
- Dict[str, Any]: Dictionary of meta-parameters generated when this transfromer was fitted.
387
- """
388
- return {
389
- "classes_": self.classes_,
390
- "n_classes_": self.n_classes_,
391
- "n_outputs_": self.n_outputs_,
392
- "n_outputs_expected_": self.n_outputs_expected_,
393
- }
394
-
395
- @staticmethod
396
- def scorer(y_true, y_pred, **kwargs):
397
- """Scorer for grid search that masks missing data.
398
-
399
- To use this, do not specify a scoring metric when initializing the grid search object. By default if the scoring_metric option is left as None, then it uses the estimator's scoring metric (this one).
400
-
401
- Args:
402
- y_true (numpy.ndarray): True target values input to fit().
403
-
404
- y_pred (numpy.ndarray): Predicted target values from estimator. The predictions are modified by self.target_encoder().inverse_transform() before being sent here.
405
-
406
- kwargs (Any): Other parameters sent to sklearn scoring metric. Supported options include missing_mask, scoring_metric, and testing.
407
-
408
- Returns:
409
- float: Calculated score.
410
- """
411
-
412
- n_classes_ = kwargs.get("num_classes", 3)
413
- classes_ = np.arange(n_classes_)
414
- missing_mask = kwargs.get("missing_mask")
415
-
416
- num_classes = kwargs.get("num_classes", 3)
417
- testing = kwargs.get("testing", False)
418
-
419
- y_pred = y_pred.reshape(y_pred.shape[0], -1, num_classes)
420
-
421
- scorers = Scorers()
422
-
423
- return scorers.scorer(
424
- y_true,
425
- y_pred,
426
- missing_mask=missing_mask,
427
- num_classes=num_classes,
428
- testing=testing,
429
- )
430
-
431
-
432
- class MLPClassifier(KerasClassifier):
433
- """Estimator to be used with the scikit-learn API.
434
-
435
- Args:
436
- V (numpy.ndarray or Dict[str, Any]): Input X values of shape (n_samples, n_components). If a dictionary is passed, each key: value pair should have randomly initialized values for n_components: V. self.feature_encoder() will parse it and select the key: value pair with the current n_components. This allows n_components to be grid searched using GridSearchCV. Otherwise, it throws an error that the dimensions are off. Defaults to None.
437
-
438
- y_train (numpy.ndarray): One-hot encoded target data. Defaults to None.
439
-
440
- ubp_weights (tensorflow.Tensor): Weights from UBP model. Fetched by doing model.get_weights() on phase 2 model. Only used if phase 3. Defaults to None.
441
-
442
- batch_size (int): Batch size to train with. Defaults to 32.
443
-
444
- missing_mask (np.ndarray): Missing mask with missing values set to False (0) and observed values as True (1). Defaults to None. Defaults to None.
445
-
446
- output_shape (int): Number of units in model output layer. Defaults to None.
447
-
448
- weights_initializer (str): Kernel initializer to use for model weights. Defaults to "glorot_normal".
449
-
450
- hidden_layer_sizes (List[int]): Output unit size for each hidden layer. Should be list of length num_hidden_layers. Defaults to None.
451
-
452
- num_hidden_layers (int): Number of hidden layers to use. Defaults to 1.
453
-
454
- hidden_activation (str): Hidden activation function to use. Defaults to "elu".
455
-
456
- l1_penalty (float): L1 regularization penalty to use to reduce overfitting. Defautls to 0.01.
457
-
458
- l2_penalty (float): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
459
-
460
- dropout_rate (float): Dropout rate for each hidden layer to reduce overfitting. Defaults to 0.2.
461
-
462
- num_classes (int): Number of classes in output predictions. Defaults to 3.
463
-
464
- phase (int or None): Current phase (if doing UBP), or None if doing NLPCA. Defults to None.
465
-
466
- sample_weight (numpy.ndarray): Sample weight matrix for reducing the impact of class imbalance. Should be of shape (n_samples, n_features).
467
-
468
- n_components (int): Number of components to use for input V. Defaults to 3.
469
-
470
- kwargs (Any): Other keyword arguments to route to fit, compile, callbacks, etc. Should have the routing prefix (e.g., optimizer__learning_rate=0.01).
471
- """
472
-
473
- def __init__(
474
- self,
475
- V,
476
- y_train,
477
- ubp_weights=None,
478
- batch_size=32,
479
- missing_mask=None,
480
- output_shape=None,
481
- weights_initializer="glorot_normal",
482
- hidden_layer_sizes=None,
483
- num_hidden_layers=1,
484
- hidden_activation="elu",
485
- l1_penalty=0.01,
486
- l2_penalty=0.01,
487
- dropout_rate=0.2,
488
- num_classes=3,
489
- phase=None,
490
- sample_weight=None,
491
- n_components=3,
492
- activate=None,
493
- **kwargs,
494
- ):
495
- super().__init__(**kwargs)
496
- self.V = V
497
- self.y_train = y_train
498
- self.ubp_weights = ubp_weights
499
- self.batch_size = batch_size
500
- self.missing_mask = missing_mask
501
- self.output_shape = output_shape
502
- self.weights_initializer = weights_initializer
503
- self.hidden_layer_sizes = hidden_layer_sizes
504
- self.num_hidden_layers = num_hidden_layers
505
- self.hidden_activation = hidden_activation
506
- self.l1_penalty = l1_penalty
507
- self.l2_penalty = l2_penalty
508
- self.dropout_rate = dropout_rate
509
- self.num_classes = num_classes
510
- self.phase = phase
511
- self.sample_weight = sample_weight
512
- self.n_components = n_components
513
- self.activate = activate
514
-
515
- def _keras_build_fn(self, compile_kwargs):
516
- """Build model with custom parameters.
517
-
518
- Args:
519
- compile_kwargs (Dict[str, Any]): Dictionary with parameters: values. The parameters should be passed to the class constructor, but should be captured as kwargs. They should also have the routing prefix (e.g., optimizer__learning_rate=0.01). compile_kwargs will automatically be parsed from **kwargs by KerasClassifier and sent here.
520
-
521
- Returns:
522
- tf.keras.Model: Model instance. The chosen model depends on which phase is passed to the class constructor.
523
- """
524
- ######### REMOVING THIS LINE WILL BREAK THE MODEL!!!!! ########
525
- self.classes_ = np.arange(self.num_classes)
526
-
527
- if self.phase is None:
528
- model = NLPCAModel(
529
- V=self.V,
530
- y=self.y_train,
531
- batch_size=self.batch_size,
532
- missing_mask=self.missing_mask,
533
- output_shape=self.output_shape,
534
- n_components=self.n_components,
535
- weights_initializer=self.weights_initializer,
536
- hidden_layer_sizes=self.hidden_layer_sizes,
537
- num_hidden_layers=self.num_hidden_layers,
538
- hidden_activation=self.hidden_activation,
539
- l1_penalty=self.l1_penalty,
540
- l2_penalty=self.l2_penalty,
541
- dropout_rate=self.dropout_rate,
542
- num_classes=self.num_classes,
543
- phase=self.phase,
544
- sample_weight=self.sample_weight,
545
- )
546
-
547
- elif self.phase == 1:
548
- model = UBPPhase1(
549
- V=self.V,
550
- y=self.y_train,
551
- batch_size=self.batch_size,
552
- missing_mask=self.missing_mask,
553
- output_shape=self.output_shape,
554
- n_components=self.n_components,
555
- weights_initializer=self.weights_initializer,
556
- hidden_layer_sizes=self.hidden_layer_sizes,
557
- num_hidden_layers=self.num_hidden_layers,
558
- l1_penalty=self.l1_penalty,
559
- l2_penalty=self.l2_penalty,
560
- dropout_rate=self.dropout_rate,
561
- num_classes=self.num_classes,
562
- phase=self.phase,
563
- )
564
-
565
- elif self.phase == 2:
566
- model = UBPPhase2(
567
- V=self.V,
568
- y=self.y_train,
569
- batch_size=self.batch_size,
570
- missing_mask=self.missing_mask,
571
- output_shape=self.output_shape,
572
- n_components=self.n_components,
573
- weights_initializer=self.weights_initializer,
574
- hidden_layer_sizes=self.hidden_layer_sizes,
575
- num_hidden_layers=self.num_hidden_layers,
576
- hidden_activation=self.hidden_activation,
577
- l1_penalty=self.l1_penalty,
578
- l2_penalty=self.l2_penalty,
579
- dropout_rate=self.dropout_rate,
580
- num_classes=self.num_classes,
581
- phase=self.phase,
582
- )
583
-
584
- elif self.phase == 3:
585
- model = UBPPhase3(
586
- V=self.V,
587
- y=self.y_train,
588
- batch_size=self.batch_size,
589
- missing_mask=self.missing_mask,
590
- output_shape=self.output_shape,
591
- n_components=self.n_components,
592
- weights_initializer=self.weights_initializer,
593
- hidden_layer_sizes=self.hidden_layer_sizes,
594
- num_hidden_layers=self.num_hidden_layers,
595
- hidden_activation=self.hidden_activation,
596
- dropout_rate=self.dropout_rate,
597
- num_classes=self.num_classes,
598
- phase=self.phase,
599
- )
600
-
601
- model.build((None, self.n_components))
602
-
603
- model.compile(
604
- optimizer=compile_kwargs["optimizer"],
605
- loss=compile_kwargs["loss"],
606
- metrics=compile_kwargs["metrics"],
607
- run_eagerly=True,
608
- )
609
-
610
- model.set_model_outputs()
611
-
612
- if self.phase == 3:
613
- model.set_weights(self.ubp_weights)
614
-
615
- return model
616
-
617
- @staticmethod
618
- def scorer(y_true, y_pred, **kwargs):
619
- """Scorer for grid search that masks missing data.
620
-
621
- To use this, do not specify a scoring metric when initializing the grid search object. By default if the scoring_metric option is left as None, then it uses the estimator's scoring metric (this one).
622
-
623
- Args:
624
- y_true (numpy.ndarray): True target values input to fit().
625
-
626
- y_pred (numpy.ndarray): Predicted target values from estimator. The predictions are modified by self.target_encoder().inverse_transform() before being sent here.
627
-
628
- kwargs (Any): Other parameters sent to sklearn scoring metric. Supported options include missing_mask, scoring_metric, and testing.
629
-
630
- Returns:
631
- float: Calculated score.
632
- """
633
- missing_mask = kwargs.get(
634
- "missing_mask", np.ones(y_true.shape, dtype=bool)
635
- )
636
- num_classes = kwargs.get("num_classes", 3)
637
- testing = kwargs.get("testing", False)
638
-
639
- scorers = Scorers()
640
-
641
- return scorers.scorer(
642
- y_true,
643
- y_pred,
644
- missing_mask=missing_mask,
645
- num_classes=num_classes,
646
- testing=testing,
647
- )
648
-
649
- @property
650
- def feature_encoder(self):
651
- """Handles feature input, X, before training.
652
-
653
- Returns:
654
- UBPInputTransformer: InputTransformer object that includes fit() and transform() methods to transform input before estimator fitting.
655
- """
656
- return UBPInputTransformer(self.n_components, self.V)
657
-
658
- @property
659
- def target_encoder(self):
660
- """Handles target input and output, y_true and y_pred, both before and after training.
661
-
662
- Returns:
663
- NNOutputTransformer: NNOutputTransformer object that includes fit(), transform(), and inverse_transform() methods.
664
- """
665
- return AutoEncoderFeatureTransformer(
666
- num_classes=self.num_classes,
667
- activate=None,
668
- )
669
-
670
- def predict(self, X, **kwargs):
671
- """Returns predictions for the given test data.
672
-
673
- Args:
674
- X (Union[array-like, sparse matrix, dataframe] of shape (n_samples, n_features)): Training samples where n_samples is the number of samples and n_features is the number of features.
675
- kwargs (Dict[str, Any]): Extra arguments to route to ``Model.predict``\.
676
-
677
- Warnings:
678
- Passing estimator parameters as keyword arguments (aka as ``**kwargs``) to ``predict`` is not supported by the Scikit-Learn API, and will be removed in a future version of SciKeras. These parameters can also be specified by prefixing ``predict__`` to a parameter at initialization (``BaseWrapper(..., fit__batch_size=32, predict__batch_size=1000)``) or by using ``set_params`` (``est.set_params(fit__batch_size=32, predict__batch_size=1000)``\).
679
-
680
- Returns:
681
- array-like: Predictions, of shape shape (n_samples,) or (n_samples, n_outputs).
682
-
683
- Notes:
684
- Had to override predict() here in order to do the __call__ with the refined input, V_latent.
685
- """
686
- y_pred_proba = self.model_(self.model_.V_latent, training=False)
687
- return self.target_encoder_.inverse_transform(y_pred_proba)
688
-
689
- def get_metadata(self):
690
- """Returns a dictionary of meta-parameters generated when this transformer was fitted.
691
-
692
- Used by SciKeras to bind these parameters to the SciKeras estimator itself and make them available as inputs to the Keras model.
693
-
694
- Returns:
695
- Dict[str, Any]: Dictionary of meta-parameters generated when this transfromer was fitted.
696
- """
697
- return {
698
- "classes_": self.classes_,
699
- "n_classes_": self.n_classes_,
700
- "n_outputs_": self.n_outputs_,
701
- "n_outputs_expected_": self.n_outputs_expected_,
702
- }