pg-sui 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/METADATA +101 -79
- pg_sui-1.6.14.dev9.dist-info/RECORD +81 -0
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/WHEEL +1 -1
- pg_sui-1.6.14.dev9.dist-info/entry_points.txt +4 -0
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info/licenses}/LICENSE +0 -0
- pg_sui-1.6.14.dev9.dist-info/top_level.txt +1 -0
- pgsui/__init__.py +35 -54
- pgsui/_version.py +34 -0
- pgsui/cli.py +909 -0
- pgsui/data_processing/__init__.py +0 -0
- pgsui/data_processing/config.py +565 -0
- pgsui/data_processing/containers.py +1424 -0
- pgsui/data_processing/transformers.py +557 -907
- pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
- pgsui/electron/app/__main__.py +5 -0
- pgsui/electron/app/extra-resources/.gitkeep +1 -0
- pgsui/electron/app/icons/icons/1024x1024.png +0 -0
- pgsui/electron/app/icons/icons/128x128.png +0 -0
- pgsui/electron/app/icons/icons/16x16.png +0 -0
- pgsui/electron/app/icons/icons/24x24.png +0 -0
- pgsui/electron/app/icons/icons/256x256.png +0 -0
- pgsui/electron/app/icons/icons/32x32.png +0 -0
- pgsui/electron/app/icons/icons/48x48.png +0 -0
- pgsui/electron/app/icons/icons/512x512.png +0 -0
- pgsui/electron/app/icons/icons/64x64.png +0 -0
- pgsui/electron/app/icons/icons/icon.icns +0 -0
- pgsui/electron/app/icons/icons/icon.ico +0 -0
- pgsui/electron/app/main.js +227 -0
- pgsui/electron/app/package-lock.json +6894 -0
- pgsui/electron/app/package.json +51 -0
- pgsui/electron/app/preload.js +15 -0
- pgsui/electron/app/server.py +157 -0
- pgsui/electron/app/ui/logo.png +0 -0
- pgsui/electron/app/ui/renderer.js +131 -0
- pgsui/electron/app/ui/styles.css +59 -0
- pgsui/electron/app/ui/ui_shim.js +72 -0
- pgsui/electron/bootstrap.py +43 -0
- pgsui/electron/launch.py +57 -0
- pgsui/electron/package.json +14 -0
- pgsui/example_data/__init__.py +0 -0
- pgsui/example_data/phylip_files/__init__.py +0 -0
- pgsui/example_data/phylip_files/test.phy +0 -0
- pgsui/example_data/popmaps/__init__.py +0 -0
- pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
- pgsui/example_data/structure_files/__init__.py +0 -0
- pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
- pgsui/impute/__init__.py +0 -0
- pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
- pgsui/impute/deterministic/imputers/mode.py +844 -0
- pgsui/impute/deterministic/imputers/nmf.py +221 -0
- pgsui/impute/deterministic/imputers/phylo.py +973 -0
- pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
- pgsui/impute/supervised/__init__.py +0 -0
- pgsui/impute/supervised/base.py +343 -0
- pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
- pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
- pgsui/impute/supervised/imputers/random_forest.py +291 -0
- pgsui/impute/unsupervised/__init__.py +0 -0
- pgsui/impute/unsupervised/base.py +1118 -0
- pgsui/impute/unsupervised/callbacks.py +92 -262
- {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
- pgsui/impute/unsupervised/imputers/autoencoder.py +1285 -0
- pgsui/impute/unsupervised/imputers/nlpca.py +1554 -0
- pgsui/impute/unsupervised/imputers/ubp.py +1575 -0
- pgsui/impute/unsupervised/imputers/vae.py +1228 -0
- pgsui/impute/unsupervised/loss_functions.py +261 -0
- pgsui/impute/unsupervised/models/__init__.py +0 -0
- pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
- pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
- pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
- pgsui/impute/unsupervised/models/vae_model.py +269 -630
- pgsui/impute/unsupervised/nn_scorers.py +255 -0
- pgsui/utils/__init__.py +0 -0
- pgsui/utils/classification_viz.py +608 -0
- pgsui/utils/logging_utils.py +22 -0
- pgsui/utils/misc.py +35 -480
- pgsui/utils/plotting.py +996 -829
- pgsui/utils/pretty_metrics.py +290 -0
- pgsui/utils/scorers.py +213 -666
- pg_sui-0.2.0.dist-info/RECORD +0 -75
- pg_sui-0.2.0.dist-info/top_level.txt +0 -3
- pgsui/example_data/phylip_files/test_n10.phy +0 -118
- pgsui/example_data/phylip_files/test_n100.phy +0 -118
- pgsui/example_data/phylip_files/test_n2.phy +0 -118
- pgsui/example_data/phylip_files/test_n500.phy +0 -118
- pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
- pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
- pgsui/example_data/trees/test.iqtree +0 -376
- pgsui/example_data/trees/test.qmat +0 -5
- pgsui/example_data/trees/test.rate +0 -2033
- pgsui/example_data/trees/test.tre +0 -1
- pgsui/example_data/trees/test_n10.rate +0 -19
- pgsui/example_data/trees/test_n100.rate +0 -109
- pgsui/example_data/trees/test_n500.rate +0 -509
- pgsui/example_data/trees/test_siterates.txt +0 -2024
- pgsui/example_data/trees/test_siterates_n10.txt +0 -10
- pgsui/example_data/trees/test_siterates_n100.txt +0 -100
- pgsui/example_data/trees/test_siterates_n500.txt +0 -500
- pgsui/example_data/vcf_files/test.vcf +0 -244
- pgsui/example_data/vcf_files/test.vcf.gz +0 -0
- pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
- pgsui/impute/estimators.py +0 -1268
- pgsui/impute/impute.py +0 -1463
- pgsui/impute/simple_imputers.py +0 -1431
- pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
- pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
- pgsui/impute/unsupervised/keras_classifiers.py +0 -697
- pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
- pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
- pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
- pgsui/pg_sui.py +0 -261
- pgsui/utils/sequence_tools.py +0 -407
- simulation/sim_benchmarks.py +0 -333
- simulation/sim_treeparams.py +0 -475
- test/__init__.py +0 -0
- test/pg_sui_simtest.py +0 -215
- test/pg_sui_testing.py +0 -523
- test/test.py +0 -151
- test/test_pgsui.py +0 -374
- test/test_tkc.py +0 -185
test/test_pgsui.py
DELETED
|
@@ -1,374 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
|
|
3
|
-
# Standard library imports
|
|
4
|
-
import os
|
|
5
|
-
import sys
|
|
6
|
-
|
|
7
|
-
from contextlib import redirect_stdout
|
|
8
|
-
|
|
9
|
-
try:
|
|
10
|
-
from importlib.resources import files, as_file
|
|
11
|
-
except ImportError:
|
|
12
|
-
# Try backported to PY<37 `importlib_resources`.
|
|
13
|
-
from importlib_resources import files, as_file
|
|
14
|
-
|
|
15
|
-
import numpy as np
|
|
16
|
-
import pandas as pd
|
|
17
|
-
import scipy.stats as stats
|
|
18
|
-
|
|
19
|
-
from sklearn_genetic.space import Continuous, Categorical, Integer
|
|
20
|
-
|
|
21
|
-
from pgsui import GenotypeData
|
|
22
|
-
from pgsui.impute.estimators import *
|
|
23
|
-
from pgsui.impute.simple_imputers import *
|
|
24
|
-
from pgsui.example_data import structure_files, phylip_files, popmaps, trees
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def main():
|
|
28
|
-
"""Test all PG-SUI Methods.
|
|
29
|
-
|
|
30
|
-
Can be invoked by typing 'pgsuitest' on the command line.
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
# Redirect stdout to logfile
|
|
34
|
-
with open("pgsuitest.log.txt", "w") as logfile:
|
|
35
|
-
with redirect_stdout(logfile):
|
|
36
|
-
testaln = {
|
|
37
|
-
"phylip": "test_n10.phy",
|
|
38
|
-
"structure2row": "test.nopops.2row.10sites.str",
|
|
39
|
-
"structure2rowPopID": "test.pops.2row.10sites.str",
|
|
40
|
-
"structure1row": "test.nopops.1row.10sites.str",
|
|
41
|
-
"structure1rowPopID": "test.pops.1row.10sites.str",
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
popmap = "test.popmap"
|
|
45
|
-
tre = "test.tre"
|
|
46
|
-
iqtre = "test.iqtree"
|
|
47
|
-
qmat = "test.qmat"
|
|
48
|
-
siterateiqtree = "test_n10.rate"
|
|
49
|
-
siterate = "test_siterates_n10.txt"
|
|
50
|
-
prefix = "setuptest"
|
|
51
|
-
t = ".xxinput_treexx.tre" # temporary treefile
|
|
52
|
-
|
|
53
|
-
strfile = files(structure_files).joinpath(testaln["structure2row"])
|
|
54
|
-
popmapfile = files(popmaps).joinpath(popmap)
|
|
55
|
-
treefile = files(trees).joinpath(tre)
|
|
56
|
-
iqtreeqmatfile = files(trees).joinpath(iqtre)
|
|
57
|
-
qmatfile = files(trees).joinpath(qmat)
|
|
58
|
-
siteratefileiqtree = files(trees).joinpath(siterateiqtree)
|
|
59
|
-
siteratefile = files(trees).joinpath(siterate)
|
|
60
|
-
|
|
61
|
-
with as_file(popmapfile) as m, as_file(
|
|
62
|
-
treefile
|
|
63
|
-
) as guidetree, as_file(iqtreeqmatfile) as i, as_file(
|
|
64
|
-
qmatfile
|
|
65
|
-
) as q, as_file(
|
|
66
|
-
siteratefileiqtree
|
|
67
|
-
) as siq, as_file(
|
|
68
|
-
siteratefile
|
|
69
|
-
) as s:
|
|
70
|
-
|
|
71
|
-
# Added this code block because for some reason toytree won't
|
|
72
|
-
# read the as_file() temporary file using the context manager.
|
|
73
|
-
with open(guidetree, "r") as fin:
|
|
74
|
-
input_tree = fin.read()
|
|
75
|
-
with open(t, "w") as fout:
|
|
76
|
-
fout.write(input_tree)
|
|
77
|
-
|
|
78
|
-
print("############################################")
|
|
79
|
-
print("### TESTING GenotypeData WITH EACH FILETYPE")
|
|
80
|
-
print("############################################")
|
|
81
|
-
print("\n")
|
|
82
|
-
|
|
83
|
-
for ft, aln in testaln.items():
|
|
84
|
-
if ft == "phylip":
|
|
85
|
-
data_dir = phylip_files
|
|
86
|
-
else:
|
|
87
|
-
data_dir = structure_files
|
|
88
|
-
|
|
89
|
-
alnfile = files(data_dir).joinpath(aln)
|
|
90
|
-
|
|
91
|
-
print("--------------------------------------------------")
|
|
92
|
-
print(f"--- Testing GenotypeData with {ft} filetype...")
|
|
93
|
-
print("--------------------------------------------------")
|
|
94
|
-
print("\n")
|
|
95
|
-
|
|
96
|
-
with as_file(alnfile) as a:
|
|
97
|
-
data = GenotypeData(
|
|
98
|
-
filename=a,
|
|
99
|
-
filetype=ft,
|
|
100
|
-
popmapfile=m,
|
|
101
|
-
guidetree=t,
|
|
102
|
-
qmatrix_iqtree=i,
|
|
103
|
-
siterates_iqtree=siq,
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
print("-------------------------------------------------------")
|
|
107
|
-
print("--- Testing GenotypeData with non-iqtree rates files...")
|
|
108
|
-
print("-------------------------------------------------------")
|
|
109
|
-
print("\n")
|
|
110
|
-
|
|
111
|
-
with as_file(strfile) as a:
|
|
112
|
-
data = GenotypeData(
|
|
113
|
-
filename=a,
|
|
114
|
-
filetype="structure2row",
|
|
115
|
-
popmapfile=m,
|
|
116
|
-
guidetree=t,
|
|
117
|
-
qmatrix=q,
|
|
118
|
-
siterates=s,
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
data = GenotypeData(
|
|
122
|
-
filename=a,
|
|
123
|
-
filetype="structure2row",
|
|
124
|
-
popmapfile=m,
|
|
125
|
-
guidetree=t,
|
|
126
|
-
qmatrix_iqtree=i,
|
|
127
|
-
siterates_iqtree=siq,
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
print("++++++++++++++++++++++++++++++++")
|
|
131
|
-
print("+++ SUCCESS!")
|
|
132
|
-
print("++++++++++++++++++++++++++++++++")
|
|
133
|
-
print("\n")
|
|
134
|
-
|
|
135
|
-
print("################################")
|
|
136
|
-
print("### TESTING SIMPLE IMPUTERS...")
|
|
137
|
-
print("################################")
|
|
138
|
-
print("\n")
|
|
139
|
-
|
|
140
|
-
print("-----------------------------------------------------")
|
|
141
|
-
print("--- Testing ImputeAlleleFreq by-population...")
|
|
142
|
-
print("-----------------------------------------------------")
|
|
143
|
-
print("\n")
|
|
144
|
-
|
|
145
|
-
afpops = ImputeAlleleFreq(
|
|
146
|
-
genotype_data=data,
|
|
147
|
-
by_populations=True,
|
|
148
|
-
prefix=prefix,
|
|
149
|
-
write_output=False,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
print("-----------------------------------------------------")
|
|
153
|
-
print("--- Testing ImputeAlleleFreq global...")
|
|
154
|
-
print("-----------------------------------------------------")
|
|
155
|
-
print("\n")
|
|
156
|
-
|
|
157
|
-
afpops = ImputeAlleleFreq(
|
|
158
|
-
genotype_data=data,
|
|
159
|
-
by_populations=False,
|
|
160
|
-
prefix=prefix,
|
|
161
|
-
write_output=False,
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
print("-----------------------------------------------------")
|
|
165
|
-
print("--- Testing ImputePhylo...")
|
|
166
|
-
print("-----------------------------------------------------")
|
|
167
|
-
print("\n")
|
|
168
|
-
|
|
169
|
-
phylo = ImputePhylo(
|
|
170
|
-
genotype_data=data,
|
|
171
|
-
prefix=prefix,
|
|
172
|
-
disable_progressbar=True,
|
|
173
|
-
write_output=False,
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
print("-----------------------------------------------------")
|
|
177
|
-
print("--- Testing ImputeNMF...")
|
|
178
|
-
print("-----------------------------------------------------")
|
|
179
|
-
print("\n")
|
|
180
|
-
|
|
181
|
-
mf = ImputeNMF(
|
|
182
|
-
genotype_data=data,
|
|
183
|
-
prefix=prefix,
|
|
184
|
-
write_output=False,
|
|
185
|
-
)
|
|
186
|
-
|
|
187
|
-
print("++++++++++++++++++++++++++++++++")
|
|
188
|
-
print("+++ SUCCESS!")
|
|
189
|
-
print("++++++++++++++++++++++++++++++++")
|
|
190
|
-
print("\n")
|
|
191
|
-
|
|
192
|
-
##############################################
|
|
193
|
-
### Make gridparams
|
|
194
|
-
##############################################
|
|
195
|
-
|
|
196
|
-
# For randomizedsearchcv
|
|
197
|
-
# Number of trees in random forest
|
|
198
|
-
n_estimators = [
|
|
199
|
-
int(x) for x in np.linspace(start=100, stop=1000, num=10)
|
|
200
|
-
]
|
|
201
|
-
|
|
202
|
-
# Number of features to consider at every split
|
|
203
|
-
max_features = ["sqrt", "log2"]
|
|
204
|
-
|
|
205
|
-
# Maximum number of levels in the tree
|
|
206
|
-
max_depth = [int(x) for x in np.linspace(10, 110, num=11)]
|
|
207
|
-
max_depth.append(None)
|
|
208
|
-
|
|
209
|
-
# Minimmum number of samples required to split a node
|
|
210
|
-
min_samples_split = [int(x) for x in np.linspace(2, 10, num=5)]
|
|
211
|
-
|
|
212
|
-
# Minimum number of samples required at each leaf node
|
|
213
|
-
min_samples_leaf = [int(x) for x in np.linspace(1, 5, num=5)]
|
|
214
|
-
|
|
215
|
-
# Proportion of dataset to use with bootstrapping
|
|
216
|
-
# max_samples = [x for x in np.linspace(0.5, 1.0, num=6)]
|
|
217
|
-
|
|
218
|
-
# Random Forest gridparams - RandomizedSearchCV
|
|
219
|
-
grid_params_random = {
|
|
220
|
-
"max_features": max_features,
|
|
221
|
-
"max_depth": max_depth,
|
|
222
|
-
"min_samples_split": min_samples_split,
|
|
223
|
-
"min_samples_leaf": min_samples_leaf,
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
# Genetic Algorithm grid_params
|
|
227
|
-
grid_params_ga = {
|
|
228
|
-
"max_features": Categorical(["sqrt", "log2"]),
|
|
229
|
-
"min_samples_split": Integer(2, 10),
|
|
230
|
-
"min_samples_leaf": Integer(1, 10),
|
|
231
|
-
"max_depth": Integer(2, 110),
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
print("#################################")
|
|
235
|
-
print("### TESTING IterativeImputer...")
|
|
236
|
-
print("#################################")
|
|
237
|
-
print("\n")
|
|
238
|
-
|
|
239
|
-
print("-----------------------------------------------------")
|
|
240
|
-
print(
|
|
241
|
-
"--- Testing ImputeRandomForest with randomized grid\n"
|
|
242
|
-
"--- search and initial_strategy == 'populations'..."
|
|
243
|
-
)
|
|
244
|
-
print("-----------------------------------------------------")
|
|
245
|
-
print("\n")
|
|
246
|
-
|
|
247
|
-
# Random forest imputation with RandomizedSearchCV grid search
|
|
248
|
-
rf_imp = ImputeRandomForest(
|
|
249
|
-
data,
|
|
250
|
-
prefix=prefix,
|
|
251
|
-
n_estimators=50,
|
|
252
|
-
n_nearest_features=2,
|
|
253
|
-
gridparams=grid_params_random,
|
|
254
|
-
cv=3,
|
|
255
|
-
grid_iter=40,
|
|
256
|
-
n_jobs=-1,
|
|
257
|
-
max_iter=2,
|
|
258
|
-
column_subset=1.0,
|
|
259
|
-
ga=False,
|
|
260
|
-
disable_progressbar=True,
|
|
261
|
-
extratrees=False,
|
|
262
|
-
mutation_probability=0.1,
|
|
263
|
-
chunk_size=1.0,
|
|
264
|
-
initial_strategy="populations",
|
|
265
|
-
)
|
|
266
|
-
|
|
267
|
-
print("-----------------------------------------------------")
|
|
268
|
-
print(
|
|
269
|
-
"--- Testing ImputeRandomForest with GA grid search and\n"
|
|
270
|
-
"--- initial_strategy == 'phylogeny'..."
|
|
271
|
-
)
|
|
272
|
-
print("-----------------------------------------------------")
|
|
273
|
-
print("\n")
|
|
274
|
-
|
|
275
|
-
# Genetic Algorithm grid search Test
|
|
276
|
-
rf_imp2 = ImputeRandomForest(
|
|
277
|
-
data,
|
|
278
|
-
prefix=prefix,
|
|
279
|
-
n_estimators=50,
|
|
280
|
-
n_nearest_features=2,
|
|
281
|
-
gridparams=grid_params_ga,
|
|
282
|
-
cv=3,
|
|
283
|
-
grid_iter=40,
|
|
284
|
-
n_jobs=-1,
|
|
285
|
-
max_iter=2,
|
|
286
|
-
column_subset=1.0,
|
|
287
|
-
ga=True,
|
|
288
|
-
disable_progressbar=True,
|
|
289
|
-
extratrees=False,
|
|
290
|
-
chunk_size=1.0,
|
|
291
|
-
initial_strategy="phylogeny",
|
|
292
|
-
)
|
|
293
|
-
|
|
294
|
-
print("++++++++++++++++++++++++++++++++")
|
|
295
|
-
print("+++ SUCCESS!")
|
|
296
|
-
print("++++++++++++++++++++++++++++++++")
|
|
297
|
-
print("\n")
|
|
298
|
-
|
|
299
|
-
print("#################################")
|
|
300
|
-
print("TESTING NEURAL NETWORKS...")
|
|
301
|
-
print("#################################")
|
|
302
|
-
print("\n")
|
|
303
|
-
|
|
304
|
-
print("-----------------------------------------------------")
|
|
305
|
-
print(
|
|
306
|
-
"--- Testing VAE with validation procedure with\n"
|
|
307
|
-
"--- intial_strategy='populations'..."
|
|
308
|
-
)
|
|
309
|
-
print("-----------------------------------------------------")
|
|
310
|
-
print("\n")
|
|
311
|
-
|
|
312
|
-
vae = ImputeVAE(
|
|
313
|
-
genotype_data=data,
|
|
314
|
-
prefix=prefix,
|
|
315
|
-
disable_progressbar=True,
|
|
316
|
-
validation_only=1.0,
|
|
317
|
-
initial_strategy="populations",
|
|
318
|
-
cv=3,
|
|
319
|
-
)
|
|
320
|
-
|
|
321
|
-
print("-----------------------------------------------------")
|
|
322
|
-
print(
|
|
323
|
-
"--- Testing ImputeNLPCA with\n"
|
|
324
|
-
"--- initial_strategy == 'phylogeny'..."
|
|
325
|
-
)
|
|
326
|
-
print("-----------------------------------------------------")
|
|
327
|
-
print("\n")
|
|
328
|
-
|
|
329
|
-
nlpca = ImputeNLPCA(
|
|
330
|
-
data,
|
|
331
|
-
n_components=3,
|
|
332
|
-
initial_strategy="phylogeny",
|
|
333
|
-
disable_progressbar=True,
|
|
334
|
-
cv=3,
|
|
335
|
-
hidden_activation="elu",
|
|
336
|
-
hidden_layer_sizes="midpoint",
|
|
337
|
-
validation_only=None,
|
|
338
|
-
num_hidden_layers=1,
|
|
339
|
-
learning_rate=0.1,
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
print("-------------------------------------------------------")
|
|
343
|
-
print("--- Testing ImputeUBP with initial_strategy == 'nmf'...")
|
|
344
|
-
print("-------------------------------------------------------")
|
|
345
|
-
print("\n")
|
|
346
|
-
|
|
347
|
-
ubp = ImputeUBP(
|
|
348
|
-
genotype_data=data,
|
|
349
|
-
initial_strategy="nmf",
|
|
350
|
-
disable_progressbar=True,
|
|
351
|
-
validation_only=None,
|
|
352
|
-
learning_rate=0.1,
|
|
353
|
-
num_hidden_layers=1,
|
|
354
|
-
hidden_layer_sizes=1,
|
|
355
|
-
hidden_activation="elu",
|
|
356
|
-
cv=3,
|
|
357
|
-
n_components=3,
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
print("++++++++++++++++++++++++++++++++")
|
|
361
|
-
print("+++ SUCCESS!")
|
|
362
|
-
print("++++++++++++++++++++++++++++++++")
|
|
363
|
-
print("\n")
|
|
364
|
-
|
|
365
|
-
# Try to remove temporary treefile.
|
|
366
|
-
try:
|
|
367
|
-
os.remove(t)
|
|
368
|
-
except OSError:
|
|
369
|
-
pass
|
|
370
|
-
|
|
371
|
-
print("######################################")
|
|
372
|
-
print("### ALL TESTS PASSED SUCCESSFULLY!")
|
|
373
|
-
print("######################################")
|
|
374
|
-
print("\n")
|
test/test_tkc.py
DELETED
|
@@ -1,185 +0,0 @@
|
|
|
1
|
-
import sys
|
|
2
|
-
import os
|
|
3
|
-
import copy
|
|
4
|
-
import unittest
|
|
5
|
-
import pprint
|
|
6
|
-
from pgsui.impute.estimators import (
|
|
7
|
-
ImputeKNN,
|
|
8
|
-
ImputeRandomForest,
|
|
9
|
-
ImputeXGBoost,
|
|
10
|
-
ImputeVAE,
|
|
11
|
-
ImputeStandardAutoEncoder,
|
|
12
|
-
ImputeUBP,
|
|
13
|
-
ImputeNLPCA,
|
|
14
|
-
)
|
|
15
|
-
from pgsui.impute.simple_imputers import (
|
|
16
|
-
ImputePhylo,
|
|
17
|
-
ImputeMF,
|
|
18
|
-
ImputeAlleleFreq,
|
|
19
|
-
ImputeRefAllele,
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
from snpio import GenotypeData
|
|
23
|
-
from pgsui.data_processing.transformers import SimGenotypeDataTransformer
|
|
24
|
-
import numpy as np
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class HiddenPrints:
|
|
28
|
-
def __enter__(self):
|
|
29
|
-
self._original_stdout = sys.stdout
|
|
30
|
-
sys.stdout = open(os.devnull, "w")
|
|
31
|
-
|
|
32
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
33
|
-
sys.stdout.close()
|
|
34
|
-
sys.stdout = self._original_stdout
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class TestMyClasses(unittest.TestCase):
|
|
38
|
-
def setUp(self):
|
|
39
|
-
with HiddenPrints():
|
|
40
|
-
self.genotype_data = GenotypeData(
|
|
41
|
-
filename="pgsui/example_data/phylip_files/test_n100.phy",
|
|
42
|
-
popmapfile="pgsui/example_data/popmaps/test.popmap",
|
|
43
|
-
guidetree="pgsui/example_data/trees/test.tre",
|
|
44
|
-
qmatrix="pgsui/example_data/trees/test.qmat",
|
|
45
|
-
siterates="pgsui/example_data/trees/test_siterates_n100.txt",
|
|
46
|
-
prefix="test_imputer",
|
|
47
|
-
force_popmap=True,
|
|
48
|
-
plot_format="png",
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
# Create a SimGenotypeDataTransformer instance and use it
|
|
52
|
-
# to simulate missing data
|
|
53
|
-
self.transformer = SimGenotypeDataTransformer(
|
|
54
|
-
genotype_data=self.genotype_data,
|
|
55
|
-
prop_missing=0.2,
|
|
56
|
-
strategy="random_weighted",
|
|
57
|
-
)
|
|
58
|
-
self.transformer.fit(self.genotype_data.genotypes_012(fmt="numpy"))
|
|
59
|
-
self.simulated_data = copy.deepcopy(self.genotype_data)
|
|
60
|
-
|
|
61
|
-
self.simulated_data.genotypes_012 = self.transformer.transform(
|
|
62
|
-
self.genotype_data.genotypes_012(fmt="numpy")
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
def _test_class(self, class_instance, do_gridsearch=False):
|
|
66
|
-
print(f"\nMETHOD: {class_instance.__name__}\n")
|
|
67
|
-
|
|
68
|
-
if do_gridsearch:
|
|
69
|
-
# Do a simple test.
|
|
70
|
-
if class_instance in [ImputeRandomForest, ImputeXGBoost]:
|
|
71
|
-
param_grid = {"n_estimators": [50, 100]} # Do a simple test
|
|
72
|
-
elif class_instance in [
|
|
73
|
-
ImputeVAE,
|
|
74
|
-
ImputeStandardAutoEncoder,
|
|
75
|
-
ImputeNLPCA,
|
|
76
|
-
ImputeUBP,
|
|
77
|
-
]:
|
|
78
|
-
param_grid = {"dropout_rate": [0.1, 0.2]}
|
|
79
|
-
elif class_instance == ImputeKNN:
|
|
80
|
-
param_grid = {"n_neighbors": [5, 8]}
|
|
81
|
-
else:
|
|
82
|
-
param_grid = None
|
|
83
|
-
|
|
84
|
-
kwargs = {
|
|
85
|
-
'sim_strategy': "random_weighted"
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
instance = class_instance(
|
|
89
|
-
self.simulated_data,
|
|
90
|
-
gridparams=param_grid,
|
|
91
|
-
**kwargs)
|
|
92
|
-
|
|
93
|
-
imputed_data = instance.imputed.genotypes_012(fmt="numpy")
|
|
94
|
-
|
|
95
|
-
# Test that the imputed values are close to the original values
|
|
96
|
-
accuracy = self.transformer.accuracy(
|
|
97
|
-
self.genotype_data.genotypes_012(fmt="numpy"), imputed_data
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
(
|
|
101
|
-
auc_roc_scores,
|
|
102
|
-
precision_scores,
|
|
103
|
-
recall_scores,
|
|
104
|
-
avg_precision_scores,
|
|
105
|
-
) = self.transformer.auc_roc_pr_ap(
|
|
106
|
-
self.genotype_data.genotypes_012(fmt="numpy"), imputed_data
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
|
|
110
|
-
f"OVERALL ACCURACY: {accuracy}"
|
|
111
|
-
)
|
|
112
|
-
pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
|
|
113
|
-
f"AUC-ROC PER CLASS: {dict(zip(range(3), auc_roc_scores))}"
|
|
114
|
-
)
|
|
115
|
-
pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
|
|
116
|
-
f"PRECISION PER CLASS: {dict(zip(range(3), precision_scores))}"
|
|
117
|
-
)
|
|
118
|
-
pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
|
|
119
|
-
f"RECALL PER CLASS: {dict(zip(range(3), recall_scores))}"
|
|
120
|
-
)
|
|
121
|
-
pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
|
|
122
|
-
f"AVERAGE PRECISION PER CLASS: {dict(zip(range(3), avg_precision_scores))}"
|
|
123
|
-
)
|
|
124
|
-
print("\n")
|
|
125
|
-
|
|
126
|
-
# def test_ImputeKNN(self):
|
|
127
|
-
# self._test_class(ImputeKNN)
|
|
128
|
-
|
|
129
|
-
# def test_ImputeRandomForest(self):
|
|
130
|
-
# self._test_class(ImputeRandomForest)
|
|
131
|
-
|
|
132
|
-
# def test_ImputeXGBoost(self):
|
|
133
|
-
# self._test_class(ImputeXGBoost)
|
|
134
|
-
|
|
135
|
-
# def test_ImputeVAE(self):
|
|
136
|
-
# self._test_class(ImputeVAE)
|
|
137
|
-
|
|
138
|
-
# def test_ImputeStandardAutoEncoder(self):
|
|
139
|
-
# self._test_class(ImputeStandardAutoEncoder)
|
|
140
|
-
|
|
141
|
-
def test_ImputeUBP(self):
|
|
142
|
-
self._test_class(ImputeUBP)
|
|
143
|
-
|
|
144
|
-
# def test_ImputeNLPCA(self):
|
|
145
|
-
# self._test_class(ImputeNLPCA)
|
|
146
|
-
|
|
147
|
-
# def test_ImputeKNN_grid(self):
|
|
148
|
-
# self._test_class(ImputeKNN, do_gridsearch=True)
|
|
149
|
-
|
|
150
|
-
# def test_ImputeRandomForest_grid(self):
|
|
151
|
-
# self._test_class(ImputeRandomForest, do_gridsearch=True)
|
|
152
|
-
|
|
153
|
-
# def test_ImputeXGBoost_grid(self):
|
|
154
|
-
# self._test_class(ImputeXGBoost, do_gridsearch=True)
|
|
155
|
-
|
|
156
|
-
# def test_ImputeVAE_grid(self):
|
|
157
|
-
# self._test_class(ImputeVAE, do_gridsearch=True)
|
|
158
|
-
|
|
159
|
-
# def test_ImputeStandardAutoEncoder_grid(self):
|
|
160
|
-
# self._test_class(ImputeStandardAutoEncoder, do_gridsearch=True)
|
|
161
|
-
|
|
162
|
-
# def test_ImputeUBP_grid(self):
|
|
163
|
-
# self._test_class(ImputeUBP, do_gridsearch=True)
|
|
164
|
-
|
|
165
|
-
# def test_ImputeNLPCA_grid(self):
|
|
166
|
-
# self._test_class(ImputeNLPCA, do_gridsearch=True)
|
|
167
|
-
|
|
168
|
-
# def test_ImputePhylo(self):
|
|
169
|
-
# self._test_class(ImputePhylo)
|
|
170
|
-
|
|
171
|
-
# def test_ImputeAlleleFreq(self):
|
|
172
|
-
# self._test_class(ImputeAlleleFreq)
|
|
173
|
-
|
|
174
|
-
# def test_ImputeRefAllele(self):
|
|
175
|
-
# self._test_class(ImputeRefAllele)
|
|
176
|
-
|
|
177
|
-
# def test_ImputeMF(self):
|
|
178
|
-
# self._test_class(ImputeMF)
|
|
179
|
-
|
|
180
|
-
# def test_ImputeRefAllele(self):
|
|
181
|
-
# self._test_class(ImputeRefAllele)
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
if __name__ == "__main__":
|
|
185
|
-
unittest.main()
|