pg-sui 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pg-sui might be problematic. Click here for more details.

Files changed (112) hide show
  1. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/METADATA +51 -70
  2. pg_sui-1.6.8.dist-info/RECORD +78 -0
  3. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.8.dist-info/entry_points.txt +4 -0
  5. pg_sui-1.6.8.dist-info/top_level.txt +1 -0
  6. pgsui/__init__.py +35 -54
  7. pgsui/_version.py +34 -0
  8. pgsui/cli.py +635 -0
  9. pgsui/data_processing/config.py +576 -0
  10. pgsui/data_processing/containers.py +1782 -0
  11. pgsui/data_processing/transformers.py +121 -1103
  12. pgsui/electron/app/__main__.py +5 -0
  13. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  14. pgsui/electron/app/icons/icons/128x128.png +0 -0
  15. pgsui/electron/app/icons/icons/16x16.png +0 -0
  16. pgsui/electron/app/icons/icons/24x24.png +0 -0
  17. pgsui/electron/app/icons/icons/256x256.png +0 -0
  18. pgsui/electron/app/icons/icons/32x32.png +0 -0
  19. pgsui/electron/app/icons/icons/48x48.png +0 -0
  20. pgsui/electron/app/icons/icons/512x512.png +0 -0
  21. pgsui/electron/app/icons/icons/64x64.png +0 -0
  22. pgsui/electron/app/icons/icons/icon.icns +0 -0
  23. pgsui/electron/app/icons/icons/icon.ico +0 -0
  24. pgsui/electron/app/main.js +189 -0
  25. pgsui/electron/app/package-lock.json +6893 -0
  26. pgsui/electron/app/package.json +50 -0
  27. pgsui/electron/app/preload.js +15 -0
  28. pgsui/electron/app/server.py +146 -0
  29. pgsui/electron/app/ui/logo.png +0 -0
  30. pgsui/electron/app/ui/renderer.js +130 -0
  31. pgsui/electron/app/ui/styles.css +59 -0
  32. pgsui/electron/app/ui/ui_shim.js +72 -0
  33. pgsui/electron/bootstrap.py +43 -0
  34. pgsui/electron/launch.py +59 -0
  35. pgsui/electron/package.json +14 -0
  36. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  37. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  38. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  39. pgsui/impute/deterministic/imputers/allele_freq.py +691 -0
  40. pgsui/impute/deterministic/imputers/mode.py +679 -0
  41. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  42. pgsui/impute/deterministic/imputers/phylo.py +971 -0
  43. pgsui/impute/deterministic/imputers/ref_allele.py +530 -0
  44. pgsui/impute/supervised/base.py +339 -0
  45. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +293 -0
  46. pgsui/impute/supervised/imputers/random_forest.py +287 -0
  47. pgsui/impute/unsupervised/base.py +924 -0
  48. pgsui/impute/unsupervised/callbacks.py +89 -263
  49. pgsui/impute/unsupervised/imputers/autoencoder.py +972 -0
  50. pgsui/impute/unsupervised/imputers/nlpca.py +1264 -0
  51. pgsui/impute/unsupervised/imputers/ubp.py +1288 -0
  52. pgsui/impute/unsupervised/imputers/vae.py +957 -0
  53. pgsui/impute/unsupervised/loss_functions.py +158 -0
  54. pgsui/impute/unsupervised/models/autoencoder_model.py +208 -558
  55. pgsui/impute/unsupervised/models/nlpca_model.py +149 -468
  56. pgsui/impute/unsupervised/models/ubp_model.py +198 -1317
  57. pgsui/impute/unsupervised/models/vae_model.py +259 -618
  58. pgsui/impute/unsupervised/nn_scorers.py +215 -0
  59. pgsui/utils/classification_viz.py +591 -0
  60. pgsui/utils/misc.py +35 -480
  61. pgsui/utils/plotting.py +514 -824
  62. pgsui/utils/scorers.py +212 -438
  63. pg_sui-1.0.2.1.dist-info/RECORD +0 -75
  64. pg_sui-1.0.2.1.dist-info/top_level.txt +0 -3
  65. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  66. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  67. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  68. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  69. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  70. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  71. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  72. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  73. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  74. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  75. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  76. pgsui/example_data/trees/test.iqtree +0 -376
  77. pgsui/example_data/trees/test.qmat +0 -5
  78. pgsui/example_data/trees/test.rate +0 -2033
  79. pgsui/example_data/trees/test.tre +0 -1
  80. pgsui/example_data/trees/test_n10.rate +0 -19
  81. pgsui/example_data/trees/test_n100.rate +0 -109
  82. pgsui/example_data/trees/test_n500.rate +0 -509
  83. pgsui/example_data/trees/test_siterates.txt +0 -2024
  84. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  85. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  86. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  87. pgsui/example_data/vcf_files/test.vcf +0 -244
  88. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  89. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  90. pgsui/impute/estimators.py +0 -735
  91. pgsui/impute/impute.py +0 -1486
  92. pgsui/impute/simple_imputers.py +0 -1439
  93. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -785
  94. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1027
  95. pgsui/impute/unsupervised/keras_classifiers.py +0 -702
  96. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  97. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1424
  98. pgsui/impute/unsupervised/neural_network_methods.py +0 -1549
  99. pgsui/pg_sui.py +0 -261
  100. pgsui/utils/sequence_tools.py +0 -407
  101. simulation/sim_benchmarks.py +0 -333
  102. simulation/sim_treeparams.py +0 -475
  103. test/__init__.py +0 -0
  104. test/pg_sui_simtest.py +0 -215
  105. test/pg_sui_testing.py +0 -523
  106. test/test.py +0 -297
  107. test/test_pgsui.py +0 -374
  108. test/test_tkc.py +0 -214
  109. {pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info/licenses}/LICENSE +0 -0
  110. /pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  111. /pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  112. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
test/test.py DELETED
@@ -1,297 +0,0 @@
1
- import unittest
2
- import pprint
3
- from snpio import GenotypeData
4
- from pgsui import *
5
- from pgsui.utils.misc import HiddenPrints
6
- import matplotlib.pyplot as plt
7
- import numpy as np
8
-
9
- from sklearn.metrics import (
10
- roc_auc_score,
11
- precision_recall_fscore_support,
12
- f1_score,
13
- average_precision_score,
14
- accuracy_score,
15
- )
16
-
17
- from sklearn.preprocessing import label_binarize
18
-
19
- from sklearn.utils.class_weight import compute_class_weight
20
-
21
-
22
- # Initialize dictionaries to store metrics for all methods
23
- all_accuracies = {}
24
- all_auc_rocs = {}
25
- all_precisions = {}
26
- all_recalls = {}
27
- all_avg_precisions = {}
28
- all_f1s = {}
29
-
30
-
31
- def plot_scoring_metrics():
32
- """
33
- Plot the accumulated scoring metrics for all test methods in separate subplots.
34
-
35
- Args:
36
- None
37
-
38
- Returns:
39
- None: The function generates a grouped bar chart displaying the scoring metrics.
40
- """
41
-
42
- metrics = [
43
- "Accuracy",
44
- "AUC-ROC",
45
- "Precision",
46
- "Recall",
47
- "Average Precision",
48
- "F1 Score",
49
- ]
50
- metric_dicts = [
51
- all_accuracies,
52
- all_auc_rocs,
53
- all_precisions,
54
- all_recalls,
55
- all_avg_precisions,
56
- all_f1s,
57
- ]
58
-
59
- num_metrics = len(metrics)
60
- fig, axes = plt.subplots(2, num_metrics // 2, figsize=(20, 20))
61
-
62
- # Loop through each metric and its corresponding dictionary
63
- colcount = 0
64
- rowcount = 0
65
-
66
- for i, (metric, metric_dict) in enumerate(zip(metrics, metric_dicts)):
67
- if i > 0 and i % num_metrics // 2 == 0:
68
- rowcount += 1
69
- colcount = 0
70
-
71
- methods = list(metric_dict.keys())
72
- values = list(metric_dict.values())
73
-
74
- # Find the index of the highest bar
75
- highest_bar_idx = np.argmax(values)
76
-
77
- # Create the bar plot on the i-th subplot
78
- bars = axes[rowcount, colcount].bar(methods, values, color="gray")
79
-
80
- # Color the highest bar in orange
81
- bars[highest_bar_idx].set_color("orange")
82
-
83
- # Rotate x-axis labels
84
- axes[rowcount, colcount].tick_params(axis="x", rotation=90)
85
-
86
- # Annotate the bars with the actual values
87
- for j, v in enumerate(values):
88
- axes[rowcount, colcount].text(
89
- j, v, f"{v:.2f}", ha="center", va="bottom"
90
- )
91
-
92
- axes[rowcount, colcount].set_title(metric)
93
- axes[rowcount, colcount].set_ylabel("Score")
94
- colcount += 1
95
-
96
- plt.suptitle("Scoring Metrics for All Methods")
97
-
98
- fig.savefig("scores.png", facecolor="white", bbox_inches="tight")
99
-
100
-
101
- class TestMyClasses(unittest.TestCase):
102
- def setUp(self):
103
- with HiddenPrints():
104
- self.genotype_data = GenotypeData(
105
- filename="pgsui/example_data/phylip_files/test_n100.phy",
106
- popmapfile="pgsui/example_data/popmaps/test.popmap",
107
- guidetree="pgsui/example_data/trees/test.tre",
108
- qmatrix="pgsui/example_data/trees/test.qmat",
109
- siterates="pgsui/example_data/trees/test_siterates_n100.txt",
110
- prefix="test_imputer",
111
- force_popmap=True,
112
- plot_format="png",
113
- )
114
-
115
- # Create a SimGenotypeDataTransformer instance and use it
116
- # to simulate missing data
117
- self.transformer = SimGenotypeDataTransformer(
118
- genotype_data=self.genotype_data,
119
- prop_missing=0.2,
120
- strategy="random",
121
- )
122
- self.transformer.fit(self.genotype_data.genotypes_012(fmt="numpy"))
123
- self.simulated_data = self.genotype_data.copy()
124
-
125
- self.simulated_data.genotypes_012 = self.transformer.transform(
126
- self.genotype_data.genotypes_012(fmt="numpy")
127
- )
128
-
129
- def _test_class(self, class_instance, do_gridsearch=False):
130
- print(f"\nMETHOD: {class_instance.__name__}\n")
131
-
132
- if do_gridsearch:
133
- # Do a simple test.
134
- if class_instance in [ImputeRandomForest, ImputeXGBoost]:
135
- param_grid = {"n_estimators": [50, 100]} # Do a simple test
136
- elif class_instance in [
137
- ImputeVAE,
138
- ImputeStandardAutoEncoder,
139
- ImputeNLPCA,
140
- ImputeUBP,
141
- ]:
142
- param_grid = {"dropout_rate": [0.1, 0.2]}
143
- elif class_instance == ImputeKNN:
144
- param_grid = {"n_neighbors": [5, 8]}
145
- else:
146
- param_grid = None
147
-
148
- instance = class_instance(
149
- self.simulated_data,
150
- gridparams=param_grid,
151
- sample_weights=None,
152
- )
153
- imputed_data = instance.imputed.genotypes_int
154
-
155
- # Test that the imputed values are close to the original values
156
- # accuracy = self.transformer.accuracy(
157
- # self.genotype_data.genotypes_012(fmt="numpy"), imputed_data
158
- # )
159
-
160
- (
161
- accuracy,
162
- auc_roc_scores,
163
- precision_scores,
164
- recall_scores,
165
- avg_precision_scores,
166
- f1,
167
- ) = self._scoring_metrics(
168
- self.genotype_data.genotypes_int, imputed_data
169
- )
170
-
171
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
172
- f"ACCURACY: {accuracy}"
173
- )
174
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
175
- f"AUC-ROC: {auc_roc_scores}"
176
- )
177
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
178
- f"PRECISION: {precision_scores}"
179
- )
180
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
181
- f"RECALL: {recall_scores}"
182
- )
183
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
184
- f"AVERAGE PRECISION: {avg_precision_scores}"
185
- )
186
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
187
- f"F1 SCORE: {f1}"
188
- )
189
- print("\n")
190
-
191
- # Store metrics
192
- all_accuracies[class_instance.__name__] = accuracy
193
- all_auc_rocs[class_instance.__name__] = auc_roc_scores
194
- all_precisions[class_instance.__name__] = precision_scores
195
- all_recalls[class_instance.__name__] = recall_scores
196
- all_avg_precisions[class_instance.__name__] = avg_precision_scores
197
- all_f1s[class_instance.__name__] = f1
198
- plot_scoring_metrics()
199
-
200
- def test_ImputeKNN(self):
201
- self._test_class(ImputeKNN)
202
-
203
- def test_ImputeRandomForest(self):
204
- self._test_class(ImputeRandomForest)
205
-
206
- def test_ImputeXGBoost(self):
207
- self._test_class(ImputeXGBoost)
208
-
209
- def test_ImputeVAE(self):
210
- self._test_class(ImputeVAE)
211
-
212
- def test_ImputeStandardAutoEncoder(self):
213
- self._test_class(ImputeStandardAutoEncoder)
214
-
215
- def test_ImputeUBP(self):
216
- self._test_class(ImputeUBP)
217
-
218
- def test_ImputeNLPCA(self):
219
- self._test_class(ImputeNLPCA)
220
-
221
- def test_ImputeKNN_grid(self):
222
- self._test_class(ImputeKNN, do_gridsearch=True)
223
-
224
- def test_ImputeRandomForest_grid(self):
225
- self._test_class(ImputeRandomForest, do_gridsearch=True)
226
-
227
- def test_ImputeXGBoost_grid(self):
228
- self._test_class(ImputeXGBoost, do_gridsearch=True)
229
-
230
- def test_ImputeVAE_grid(self):
231
- self._test_class(ImputeVAE, do_gridsearch=True)
232
-
233
- def test_ImputeStandardAutoEncoder_grid(self):
234
- self._test_class(ImputeStandardAutoEncoder, do_gridsearch=True)
235
-
236
- def test_ImputeUBP_grid(self):
237
- self._test_class(ImputeUBP, do_gridsearch=True)
238
-
239
- def test_ImputeNLPCA_grid(self):
240
- self._test_class(ImputeNLPCA, do_gridsearch=True)
241
-
242
- def test_ImputePhylo(self):
243
- self._test_class(ImputePhylo)
244
-
245
- def test_ImputeAlleleFreq(self):
246
- self._test_class(ImputeAlleleFreq)
247
-
248
- def test_ImputeMF(self):
249
- self._test_class(ImputeMF)
250
-
251
- def test_ImputeRefAllele(self):
252
- self._test_class(ImputeRefAllele)
253
-
254
- def _scoring_metrics(self, y_true, y_pred):
255
- """Calcuate AUC-ROC, Precision-Recall, and Average Precision (AP).
256
-
257
- Args:
258
- X_true (np.ndarray): True values.
259
-
260
- X_pred (np.ndarray): Imputed values.
261
-
262
- Returns:
263
- List[float]: List of AUC-ROC scores in order of: 0,1,2.
264
- List[float]: List of precision scores in order of: 0,1,2.
265
- List[float]: List of recall scores in order of: 0,1,2.
266
- List[float]: List of average precision scores in order of 0,1,2.
267
-
268
- """
269
- y_true = y_true[self.transformer.sim_missing_mask_]
270
- y_pred = y_pred[self.transformer.sim_missing_mask_]
271
-
272
- # Binarize the output
273
- y_true_bin = label_binarize(y_true, classes=[0, 1, 2])
274
- y_pred_bin = label_binarize(y_pred, classes=[0, 1, 2])
275
-
276
- accuracy = accuracy_score(y_true, y_pred)
277
-
278
- # AUC-ROC score
279
- auc_roc = roc_auc_score(y_true_bin, y_pred_bin, average="weighted")
280
-
281
- # Precision-recall score
282
- precision, recall, _, _ = precision_recall_fscore_support(
283
- y_true_bin, y_pred_bin, average="weighted"
284
- )
285
-
286
- # Average precision score
287
- avg_precision = average_precision_score(
288
- y_true_bin, y_pred_bin, average="weighted"
289
- )
290
-
291
- f1 = f1_score(y_true_bin, y_pred_bin, average="weighted")
292
-
293
- return (accuracy, auc_roc, precision, recall, avg_precision, f1)
294
-
295
-
296
- if __name__ == "__main__":
297
- unittest.main()
test/test_pgsui.py DELETED
@@ -1,374 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- # Standard library imports
4
- import os
5
- import sys
6
-
7
- from contextlib import redirect_stdout
8
-
9
- try:
10
- from importlib.resources import files, as_file
11
- except ImportError:
12
- # Try backported to PY<37 `importlib_resources`.
13
- from importlib_resources import files, as_file
14
-
15
- import numpy as np
16
- import pandas as pd
17
- import scipy.stats as stats
18
-
19
- from sklearn_genetic.space import Continuous, Categorical, Integer
20
-
21
- from pgsui import GenotypeData
22
- from pgsui.impute.estimators import *
23
- from pgsui.impute.simple_imputers import *
24
- from pgsui.example_data import structure_files, phylip_files, popmaps, trees
25
-
26
-
27
- def main():
28
- """Test all PG-SUI Methods.
29
-
30
- Can be invoked by typing 'pgsuitest' on the command line.
31
- """
32
-
33
- # Redirect stdout to logfile
34
- with open("pgsuitest.log.txt", "w") as logfile:
35
- with redirect_stdout(logfile):
36
- testaln = {
37
- "phylip": "test_n10.phy",
38
- "structure2row": "test.nopops.2row.10sites.str",
39
- "structure2rowPopID": "test.pops.2row.10sites.str",
40
- "structure1row": "test.nopops.1row.10sites.str",
41
- "structure1rowPopID": "test.pops.1row.10sites.str",
42
- }
43
-
44
- popmap = "test.popmap"
45
- tre = "test.tre"
46
- iqtre = "test.iqtree"
47
- qmat = "test.qmat"
48
- siterateiqtree = "test_n10.rate"
49
- siterate = "test_siterates_n10.txt"
50
- prefix = "setuptest"
51
- t = ".xxinput_treexx.tre" # temporary treefile
52
-
53
- strfile = files(structure_files).joinpath(testaln["structure2row"])
54
- popmapfile = files(popmaps).joinpath(popmap)
55
- treefile = files(trees).joinpath(tre)
56
- iqtreeqmatfile = files(trees).joinpath(iqtre)
57
- qmatfile = files(trees).joinpath(qmat)
58
- siteratefileiqtree = files(trees).joinpath(siterateiqtree)
59
- siteratefile = files(trees).joinpath(siterate)
60
-
61
- with as_file(popmapfile) as m, as_file(
62
- treefile
63
- ) as guidetree, as_file(iqtreeqmatfile) as i, as_file(
64
- qmatfile
65
- ) as q, as_file(
66
- siteratefileiqtree
67
- ) as siq, as_file(
68
- siteratefile
69
- ) as s:
70
-
71
- # Added this code block because for some reason toytree won't
72
- # read the as_file() temporary file using the context manager.
73
- with open(guidetree, "r") as fin:
74
- input_tree = fin.read()
75
- with open(t, "w") as fout:
76
- fout.write(input_tree)
77
-
78
- print("############################################")
79
- print("### TESTING GenotypeData WITH EACH FILETYPE")
80
- print("############################################")
81
- print("\n")
82
-
83
- for ft, aln in testaln.items():
84
- if ft == "phylip":
85
- data_dir = phylip_files
86
- else:
87
- data_dir = structure_files
88
-
89
- alnfile = files(data_dir).joinpath(aln)
90
-
91
- print("--------------------------------------------------")
92
- print(f"--- Testing GenotypeData with {ft} filetype...")
93
- print("--------------------------------------------------")
94
- print("\n")
95
-
96
- with as_file(alnfile) as a:
97
- data = GenotypeData(
98
- filename=a,
99
- filetype=ft,
100
- popmapfile=m,
101
- guidetree=t,
102
- qmatrix_iqtree=i,
103
- siterates_iqtree=siq,
104
- )
105
-
106
- print("-------------------------------------------------------")
107
- print("--- Testing GenotypeData with non-iqtree rates files...")
108
- print("-------------------------------------------------------")
109
- print("\n")
110
-
111
- with as_file(strfile) as a:
112
- data = GenotypeData(
113
- filename=a,
114
- filetype="structure2row",
115
- popmapfile=m,
116
- guidetree=t,
117
- qmatrix=q,
118
- siterates=s,
119
- )
120
-
121
- data = GenotypeData(
122
- filename=a,
123
- filetype="structure2row",
124
- popmapfile=m,
125
- guidetree=t,
126
- qmatrix_iqtree=i,
127
- siterates_iqtree=siq,
128
- )
129
-
130
- print("++++++++++++++++++++++++++++++++")
131
- print("+++ SUCCESS!")
132
- print("++++++++++++++++++++++++++++++++")
133
- print("\n")
134
-
135
- print("################################")
136
- print("### TESTING SIMPLE IMPUTERS...")
137
- print("################################")
138
- print("\n")
139
-
140
- print("-----------------------------------------------------")
141
- print("--- Testing ImputeAlleleFreq by-population...")
142
- print("-----------------------------------------------------")
143
- print("\n")
144
-
145
- afpops = ImputeAlleleFreq(
146
- genotype_data=data,
147
- by_populations=True,
148
- prefix=prefix,
149
- write_output=False,
150
- )
151
-
152
- print("-----------------------------------------------------")
153
- print("--- Testing ImputeAlleleFreq global...")
154
- print("-----------------------------------------------------")
155
- print("\n")
156
-
157
- afpops = ImputeAlleleFreq(
158
- genotype_data=data,
159
- by_populations=False,
160
- prefix=prefix,
161
- write_output=False,
162
- )
163
-
164
- print("-----------------------------------------------------")
165
- print("--- Testing ImputePhylo...")
166
- print("-----------------------------------------------------")
167
- print("\n")
168
-
169
- phylo = ImputePhylo(
170
- genotype_data=data,
171
- prefix=prefix,
172
- disable_progressbar=True,
173
- write_output=False,
174
- )
175
-
176
- print("-----------------------------------------------------")
177
- print("--- Testing ImputeNMF...")
178
- print("-----------------------------------------------------")
179
- print("\n")
180
-
181
- mf = ImputeNMF(
182
- genotype_data=data,
183
- prefix=prefix,
184
- write_output=False,
185
- )
186
-
187
- print("++++++++++++++++++++++++++++++++")
188
- print("+++ SUCCESS!")
189
- print("++++++++++++++++++++++++++++++++")
190
- print("\n")
191
-
192
- ##############################################
193
- ### Make gridparams
194
- ##############################################
195
-
196
- # For randomizedsearchcv
197
- # Number of trees in random forest
198
- n_estimators = [
199
- int(x) for x in np.linspace(start=100, stop=1000, num=10)
200
- ]
201
-
202
- # Number of features to consider at every split
203
- max_features = ["sqrt", "log2"]
204
-
205
- # Maximum number of levels in the tree
206
- max_depth = [int(x) for x in np.linspace(10, 110, num=11)]
207
- max_depth.append(None)
208
-
209
- # Minimmum number of samples required to split a node
210
- min_samples_split = [int(x) for x in np.linspace(2, 10, num=5)]
211
-
212
- # Minimum number of samples required at each leaf node
213
- min_samples_leaf = [int(x) for x in np.linspace(1, 5, num=5)]
214
-
215
- # Proportion of dataset to use with bootstrapping
216
- # max_samples = [x for x in np.linspace(0.5, 1.0, num=6)]
217
-
218
- # Random Forest gridparams - RandomizedSearchCV
219
- grid_params_random = {
220
- "max_features": max_features,
221
- "max_depth": max_depth,
222
- "min_samples_split": min_samples_split,
223
- "min_samples_leaf": min_samples_leaf,
224
- }
225
-
226
- # Genetic Algorithm grid_params
227
- grid_params_ga = {
228
- "max_features": Categorical(["sqrt", "log2"]),
229
- "min_samples_split": Integer(2, 10),
230
- "min_samples_leaf": Integer(1, 10),
231
- "max_depth": Integer(2, 110),
232
- }
233
-
234
- print("#################################")
235
- print("### TESTING IterativeImputer...")
236
- print("#################################")
237
- print("\n")
238
-
239
- print("-----------------------------------------------------")
240
- print(
241
- "--- Testing ImputeRandomForest with randomized grid\n"
242
- "--- search and initial_strategy == 'populations'..."
243
- )
244
- print("-----------------------------------------------------")
245
- print("\n")
246
-
247
- # Random forest imputation with RandomizedSearchCV grid search
248
- rf_imp = ImputeRandomForest(
249
- data,
250
- prefix=prefix,
251
- n_estimators=50,
252
- n_nearest_features=2,
253
- gridparams=grid_params_random,
254
- cv=3,
255
- grid_iter=40,
256
- n_jobs=-1,
257
- max_iter=2,
258
- column_subset=1.0,
259
- ga=False,
260
- disable_progressbar=True,
261
- extratrees=False,
262
- mutation_probability=0.1,
263
- chunk_size=1.0,
264
- initial_strategy="populations",
265
- )
266
-
267
- print("-----------------------------------------------------")
268
- print(
269
- "--- Testing ImputeRandomForest with GA grid search and\n"
270
- "--- initial_strategy == 'phylogeny'..."
271
- )
272
- print("-----------------------------------------------------")
273
- print("\n")
274
-
275
- # Genetic Algorithm grid search Test
276
- rf_imp2 = ImputeRandomForest(
277
- data,
278
- prefix=prefix,
279
- n_estimators=50,
280
- n_nearest_features=2,
281
- gridparams=grid_params_ga,
282
- cv=3,
283
- grid_iter=40,
284
- n_jobs=-1,
285
- max_iter=2,
286
- column_subset=1.0,
287
- ga=True,
288
- disable_progressbar=True,
289
- extratrees=False,
290
- chunk_size=1.0,
291
- initial_strategy="phylogeny",
292
- )
293
-
294
- print("++++++++++++++++++++++++++++++++")
295
- print("+++ SUCCESS!")
296
- print("++++++++++++++++++++++++++++++++")
297
- print("\n")
298
-
299
- print("#################################")
300
- print("TESTING NEURAL NETWORKS...")
301
- print("#################################")
302
- print("\n")
303
-
304
- print("-----------------------------------------------------")
305
- print(
306
- "--- Testing VAE with validation procedure with\n"
307
- "--- intial_strategy='populations'..."
308
- )
309
- print("-----------------------------------------------------")
310
- print("\n")
311
-
312
- vae = ImputeVAE(
313
- genotype_data=data,
314
- prefix=prefix,
315
- disable_progressbar=True,
316
- validation_only=1.0,
317
- initial_strategy="populations",
318
- cv=3,
319
- )
320
-
321
- print("-----------------------------------------------------")
322
- print(
323
- "--- Testing ImputeNLPCA with\n"
324
- "--- initial_strategy == 'phylogeny'..."
325
- )
326
- print("-----------------------------------------------------")
327
- print("\n")
328
-
329
- nlpca = ImputeNLPCA(
330
- data,
331
- n_components=3,
332
- initial_strategy="phylogeny",
333
- disable_progressbar=True,
334
- cv=3,
335
- hidden_activation="elu",
336
- hidden_layer_sizes="midpoint",
337
- validation_only=None,
338
- num_hidden_layers=1,
339
- learning_rate=0.1,
340
- )
341
-
342
- print("-------------------------------------------------------")
343
- print("--- Testing ImputeUBP with initial_strategy == 'nmf'...")
344
- print("-------------------------------------------------------")
345
- print("\n")
346
-
347
- ubp = ImputeUBP(
348
- genotype_data=data,
349
- initial_strategy="nmf",
350
- disable_progressbar=True,
351
- validation_only=None,
352
- learning_rate=0.1,
353
- num_hidden_layers=1,
354
- hidden_layer_sizes=1,
355
- hidden_activation="elu",
356
- cv=3,
357
- n_components=3,
358
- )
359
-
360
- print("++++++++++++++++++++++++++++++++")
361
- print("+++ SUCCESS!")
362
- print("++++++++++++++++++++++++++++++++")
363
- print("\n")
364
-
365
- # Try to remove temporary treefile.
366
- try:
367
- os.remove(t)
368
- except OSError:
369
- pass
370
-
371
- print("######################################")
372
- print("### ALL TESTS PASSED SUCCESSFULLY!")
373
- print("######################################")
374
- print("\n")