pg-sui 0.2.3__py3-none-any.whl → 1.6.16a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. pg_sui-1.6.16a3.dist-info/METADATA +292 -0
  2. pg_sui-1.6.16a3.dist-info/RECORD +81 -0
  3. {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.16a3.dist-info/entry_points.txt +4 -0
  5. {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info/licenses}/LICENSE +0 -0
  6. pg_sui-1.6.16a3.dist-info/top_level.txt +1 -0
  7. pgsui/__init__.py +35 -54
  8. pgsui/_version.py +34 -0
  9. pgsui/cli.py +922 -0
  10. pgsui/data_processing/__init__.py +0 -0
  11. pgsui/data_processing/config.py +565 -0
  12. pgsui/data_processing/containers.py +1436 -0
  13. pgsui/data_processing/transformers.py +557 -907
  14. pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  15. pgsui/electron/app/__main__.py +5 -0
  16. pgsui/electron/app/extra-resources/.gitkeep +1 -0
  17. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  18. pgsui/electron/app/icons/icons/128x128.png +0 -0
  19. pgsui/electron/app/icons/icons/16x16.png +0 -0
  20. pgsui/electron/app/icons/icons/24x24.png +0 -0
  21. pgsui/electron/app/icons/icons/256x256.png +0 -0
  22. pgsui/electron/app/icons/icons/32x32.png +0 -0
  23. pgsui/electron/app/icons/icons/48x48.png +0 -0
  24. pgsui/electron/app/icons/icons/512x512.png +0 -0
  25. pgsui/electron/app/icons/icons/64x64.png +0 -0
  26. pgsui/electron/app/icons/icons/icon.icns +0 -0
  27. pgsui/electron/app/icons/icons/icon.ico +0 -0
  28. pgsui/electron/app/main.js +227 -0
  29. pgsui/electron/app/package-lock.json +6894 -0
  30. pgsui/electron/app/package.json +51 -0
  31. pgsui/electron/app/preload.js +15 -0
  32. pgsui/electron/app/server.py +157 -0
  33. pgsui/electron/app/ui/logo.png +0 -0
  34. pgsui/electron/app/ui/renderer.js +131 -0
  35. pgsui/electron/app/ui/styles.css +59 -0
  36. pgsui/electron/app/ui/ui_shim.js +72 -0
  37. pgsui/electron/bootstrap.py +43 -0
  38. pgsui/electron/launch.py +57 -0
  39. pgsui/electron/package.json +14 -0
  40. pgsui/example_data/__init__.py +0 -0
  41. pgsui/example_data/phylip_files/__init__.py +0 -0
  42. pgsui/example_data/phylip_files/test.phy +0 -0
  43. pgsui/example_data/popmaps/__init__.py +0 -0
  44. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  45. pgsui/example_data/structure_files/__init__.py +0 -0
  46. pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
  47. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  48. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  49. pgsui/impute/__init__.py +0 -0
  50. pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
  51. pgsui/impute/deterministic/imputers/mode.py +844 -0
  52. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  53. pgsui/impute/deterministic/imputers/phylo.py +973 -0
  54. pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
  55. pgsui/impute/supervised/__init__.py +0 -0
  56. pgsui/impute/supervised/base.py +343 -0
  57. pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  58. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
  59. pgsui/impute/supervised/imputers/random_forest.py +291 -0
  60. pgsui/impute/unsupervised/__init__.py +0 -0
  61. pgsui/impute/unsupervised/base.py +1121 -0
  62. pgsui/impute/unsupervised/callbacks.py +92 -262
  63. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
  64. pgsui/impute/unsupervised/imputers/autoencoder.py +1361 -0
  65. pgsui/impute/unsupervised/imputers/nlpca.py +1666 -0
  66. pgsui/impute/unsupervised/imputers/ubp.py +1660 -0
  67. pgsui/impute/unsupervised/imputers/vae.py +1316 -0
  68. pgsui/impute/unsupervised/loss_functions.py +261 -0
  69. pgsui/impute/unsupervised/models/__init__.py +0 -0
  70. pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
  71. pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
  72. pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
  73. pgsui/impute/unsupervised/models/vae_model.py +269 -630
  74. pgsui/impute/unsupervised/nn_scorers.py +255 -0
  75. pgsui/utils/__init__.py +0 -0
  76. pgsui/utils/classification_viz.py +608 -0
  77. pgsui/utils/logging_utils.py +22 -0
  78. pgsui/utils/misc.py +35 -480
  79. pgsui/utils/plotting.py +996 -829
  80. pgsui/utils/pretty_metrics.py +290 -0
  81. pgsui/utils/scorers.py +213 -666
  82. pg_sui-0.2.3.dist-info/METADATA +0 -322
  83. pg_sui-0.2.3.dist-info/RECORD +0 -75
  84. pg_sui-0.2.3.dist-info/top_level.txt +0 -3
  85. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  86. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  87. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  88. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  89. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  90. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  91. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  92. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  93. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  94. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  95. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  96. pgsui/example_data/trees/test.iqtree +0 -376
  97. pgsui/example_data/trees/test.qmat +0 -5
  98. pgsui/example_data/trees/test.rate +0 -2033
  99. pgsui/example_data/trees/test.tre +0 -1
  100. pgsui/example_data/trees/test_n10.rate +0 -19
  101. pgsui/example_data/trees/test_n100.rate +0 -109
  102. pgsui/example_data/trees/test_n500.rate +0 -509
  103. pgsui/example_data/trees/test_siterates.txt +0 -2024
  104. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  105. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  106. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  107. pgsui/example_data/vcf_files/test.vcf +0 -244
  108. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  109. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  110. pgsui/impute/estimators.py +0 -1268
  111. pgsui/impute/impute.py +0 -1463
  112. pgsui/impute/simple_imputers.py +0 -1431
  113. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
  114. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
  115. pgsui/impute/unsupervised/keras_classifiers.py +0 -697
  116. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  117. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
  118. pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
  119. pgsui/pg_sui.py +0 -261
  120. pgsui/utils/sequence_tools.py +0 -407
  121. simulation/sim_benchmarks.py +0 -333
  122. simulation/sim_treeparams.py +0 -475
  123. test/__init__.py +0 -0
  124. test/pg_sui_simtest.py +0 -215
  125. test/pg_sui_testing.py +0 -523
  126. test/test.py +0 -151
  127. test/test_pgsui.py +0 -374
  128. test/test_tkc.py +0 -185
test/pg_sui_testing.py DELETED
@@ -1,523 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- # Standard library imports
4
- import argparse
5
- import sys
6
-
7
- import numpy as np
8
- import pandas as pd
9
- import scipy.stats as stats
10
-
11
- from sklearn_genetic.space import Continuous, Categorical, Integer
12
-
13
- # from pgsui import GenotypeData
14
- from snpio import GenotypeData
15
- from impute.estimators import (
16
- ImputeNLPCA,
17
- ImputeUBP,
18
- ImputeRandomForest,
19
- ImputeVAE,
20
- )
21
- from impute.simple_imputers import ImputePhylo
22
-
23
- # from snpio import GenotypeData
24
- # from impute.estimators import *
25
- # from impute.simple_imputers import ImputeAlleleFreq, ImputePhylo
26
-
27
- # from read_input import GenotypeData
28
- # from estimators import *
29
-
30
-
31
- def main():
32
- """Class instantiations and main package body"""
33
-
34
- args = get_arguments()
35
-
36
- if args.str and args.phylip:
37
- sys.exit("Error: Only one file type can be specified")
38
-
39
- # If VCF file is specified.
40
- if args.str:
41
- if not args.pop_ids and args.popmap is None:
42
- raise TypeError("Either --pop_ids or --popmap must be specified\n")
43
-
44
- if args.pop_ids:
45
- print("\n--pop_ids was specified as column 2\n")
46
- else:
47
- print(
48
- "\n--pop_ids was not specified; "
49
- "using popmap file to get population IDs\n"
50
- )
51
-
52
- if args.onerow_perind:
53
- print("\nUsing one row per individual...\n")
54
- else:
55
- print("\nUsing two rows per individual...\n")
56
-
57
- if args.onerow_perind:
58
- data = GenotypeData(
59
- filename=args.str,
60
- filetype="structure1row",
61
- popmapfile=args.popmap,
62
- guidetree=args.treefile,
63
- qmatrix_iqtree=args.iqtree,
64
- )
65
- else:
66
- data = GenotypeData(
67
- filename=args.str,
68
- filetype="structure2row",
69
- popmapfile=args.popmap,
70
- guidetree=args.treefile,
71
- qmatrix_iqtree=args.iqtree,
72
- )
73
-
74
- if args.phylip:
75
- if args.pop_ids or args.onerow_perind:
76
- print(
77
- "\nPhylip file was used with structure arguments; ignoring "
78
- "structure file arguments\n"
79
- )
80
-
81
- if args.popmap is None:
82
- raise TypeError("No popmap file supplied with PHYLIP file\n")
83
-
84
- data = GenotypeData(
85
- filename=args.phylip,
86
- filetype="phylip",
87
- popmapfile=args.popmap,
88
- guidetree=args.treefile,
89
- qmatrix_iqtree=args.iqtree,
90
- siterates_iqtree="pgsui/example_data/trees/test_n10.rate",
91
- )
92
-
93
- if args.resume_imputed:
94
- pass
95
- # data.read_imputed(args.resume_imputed, impute_methods="rf")
96
- # data.write_imputed(data.imputed_rf_df, args.prefix)
97
-
98
- else:
99
- # For randomizedsearchcv
100
- # Number of trees in random forest
101
- n_estimators = [
102
- int(x) for x in np.linspace(start=100, stop=1000, num=10)
103
- ]
104
-
105
- # Number of features to consider at every split
106
- max_features = ["sqrt", "log2"]
107
-
108
- # Maximum number of levels in the tree
109
- max_depth = [int(x) for x in np.linspace(10, 110, num=11)]
110
- max_depth.append(None)
111
-
112
- # Minimmum number of samples required to split a node
113
- min_samples_split = [int(x) for x in np.linspace(2, 10, num=5)]
114
-
115
- # Minimum number of samples required at each leaf node
116
- min_samples_leaf = [int(x) for x in np.linspace(1, 5, num=5)]
117
-
118
- # Proportion of dataset to use with bootstrapping
119
- # max_samples = [x for x in np.linspace(0.5, 1.0, num=6)]
120
-
121
- # # Random Forest gridparams - RandomizedSearchCV
122
- # grid_params = {
123
- # "max_features": max_features,
124
- # "max_depth": max_depth,
125
- # "min_samples_split": min_samples_split,
126
- # "min_samples_leaf": min_samples_leaf,
127
- # }
128
-
129
- # Random Forest gridparams - Genetic Algorithms
130
- # grid_params = {
131
- # "n_estimators": Integer(100, 500),
132
- # "max_features": max_features,
133
- # "max_depth": max_depth,
134
- # "min_samples_split": min_samples_split,
135
- # "min_samples_leaf": min_samples_leaf,
136
- # "max_samples": max_samples
137
- # }
138
-
139
- # # Genetic Algorithm grid_params
140
- # grid_params = {
141
- # "max_features": Categorical(["sqrt", "log2"]),
142
- # "min_samples_split": Integer(2, 10),
143
- # "min_samples_leaf": Integer(1, 10),
144
- # "max_depth": Integer(2, 110),
145
- # }
146
-
147
- # Bayesian Ridge gridparams - RandomizedSearchCV
148
- # grid_params = {
149
- # "alpha_1": stats.loguniform(1e-6, 0.01),
150
- # "alpha_2": stats.loguniform(1e-6, 0.01),
151
- # "lambda_1": stats.loguniform(1e-6, 0.01),
152
- # "lambda_2": stats.loguniform(1e-6, 0.01),
153
- # }
154
-
155
- # # Bayesian Ridge gridparams - Genetic algorithm
156
- # grid_params = {
157
- # "alpha_1": Continuous(1e-6, 1e-3, distribution="log-uniform"),
158
- # "alpha_2": Continuous(1e-6, 1e-3, distribution="log-uniform"),
159
- # "lambda_1": Continuous(1e-6, 1e-3, distribution="log-uniform"),
160
- # "lambda_2": Continuous(1e-6, 1e-3, distribution="log-uniform")
161
- # }
162
-
163
- # # Random forest imputation with genetic algorithm grid search
164
- # rf_imp = ImputeRandomForest(
165
- # data,
166
- # prefix=args.prefix,
167
- # n_estimators=50,
168
- # n_nearest_features=1,
169
- # gridparams=grid_params,
170
- # cv=3,
171
- # grid_iter=40,
172
- # n_jobs=4,
173
- # max_iter=2,
174
- # column_subset=1.0,
175
- # ga=False,
176
- # disable_progressbar=True,
177
- # extratrees=False,
178
- # mutation_probability=0.1,
179
- # progress_update_percent=20,
180
- # chunk_size=1.0,
181
- # initial_strategy="phylogeny",
182
- # )
183
-
184
- # # Genetic Algorithm grid search Test
185
- # rf_imp2 = ImputeRandomForest(
186
- # data,
187
- # prefix=args.prefix,
188
- # n_estimators=50,
189
- # n_nearest_features=2,
190
- # gridparams=grid_params,
191
- # cv=3,
192
- # grid_iter=40,
193
- # n_jobs=-1,
194
- # max_iter=2,
195
- # column_subset=1.0,
196
- # ga=True,
197
- # disable_progressbar=True,
198
- # extratrees=False,
199
- # chunk_size=1.0,
200
- # initial_strategy="phylogeny",
201
- # )
202
-
203
- # rfdata = rf_imp.imputed
204
- # print(rfdata.genotypes012_df)
205
-
206
- # rf_data = rf_imp.imputed
207
- # print(data.genotypes012_df)
208
- # print(rf_data.genotypes012_df)
209
-
210
- # imp_decoded = data.decode_imputed(rf_imp.imputed)
211
- # print(imp_decoded)
212
-
213
- # # RandomizedSearchCV Test
214
- # rf_imp = ImputeRandomForest(
215
- # data,
216
- # prefix=args.prefix,
217
- # n_estimators=50,
218
- # n_nearest_features=3,
219
- # gridparams=grid_params,
220
- # cv=3,
221
- # grid_iter=40,
222
- # n_jobs=4,
223
- # max_iter=2,
224
- # column_subset=5,
225
- # ga=False,
226
- # disable_progressbar=False,
227
- # extratrees=False,
228
- # progress_update_percent=20,
229
- # chunk_size=0.2,
230
- # initial_strategy="phylogeny",
231
- # )
232
-
233
- # lgbm = ImputeLightGBM(
234
- # data,
235
- # prefix=args.prefix,
236
- # cv=3,
237
- # n_jobs=4,
238
- # n_estimators=50,
239
- # disable_progressbar=True,
240
- # chunk_size=0.2,
241
- # validation_only=0.1,
242
- # n_nearest_features=3,
243
- # max_iter=2,
244
- # initial_strategy="populations",
245
- # )
246
-
247
- # vae = ImputeVAE(
248
- # genotype_data=data,
249
- # prefix=args.prefix,
250
- # disable_progressbar=True,
251
- # validation_only=None,
252
- # initial_strategy="populations",
253
- # )
254
-
255
- # vae_gtdata = vae.imputed
256
- # print(vae_gtdata.genotypes012_df)
257
-
258
- # complete_encoded = imputer.train(train_epochs=300, batch_size=256)
259
- # print(complete_encoded)
260
-
261
- # rf_imp = ImputeRandomForest(
262
- # data,
263
- # prefix=args.prefix,
264
- # n_estimators=50,
265
- # n_nearest_features=3,
266
- # n_jobs=4,
267
- # max_iter=2,
268
- # disable_progressbar=True,
269
- # extratrees=False,
270
- # max_features="sqrt",
271
- # min_samples_split=5,
272
- # min_samples_leaf=2,
273
- # max_depth=30,
274
- # cv=3,
275
- # validation_only=0.3,
276
- # chunk_size=1.0,
277
- # initial_strategy="populations",
278
- # )
279
-
280
- # afpops = ImputeAlleleFreq(
281
- # genotype_data=data,
282
- # by_populations=True,
283
- # prefix=args.prefix,
284
- # )
285
-
286
- # print(data.genotypes012_df)
287
- # print(afpops.genotypes012_df)
288
-
289
- # br_imp = ImputeBayesianRidge(data, prefix=args.prefix, n_iter=100, gridparams=grid_params, grid_iter=3, cv=3, n_jobs=4, max_iter=5, n_nearest_features=3, column_subset=4, ga=False, disable_progressbar=True, progress_update_percent=20, chunk_size=1.0)
290
-
291
- # aftestpops = ImputeAlleleFreq(
292
- # genotype_data=data, by_populations=True, prefix=args.prefix
293
- # )
294
-
295
- # aftestpops_data = aftestpops.imputed
296
-
297
- # print(data.genotypes012_df)
298
- # print(aftestpops_data.genotypes012_df)
299
-
300
- # vae = ImputeVAE(
301
- # gt=np.array([[0, 1], [-9, 1], [2, -9]]),
302
- # initial_strategy="most_frequent",
303
- # cv=3,
304
- # validation_only=None,
305
- # )
306
-
307
- # vae_data = vae.imputed
308
-
309
- # print(data.genotypes012_df)
310
- # print(vae_data.genotypes012_df)
311
-
312
- # For GridSearchCV. Generate parameters to sample from.
313
- learning_rate = [float(10) ** x for x in np.arange(-4, 0)]
314
- l1_penalty = [float(10) ** x for x in np.arange(-6, -1)]
315
- l1_penalty.append(0.0)
316
- l2_penalty = [float(10) ** x for x in np.arange(-6, -1)]
317
- l2_penalty.append(0.0)
318
- hidden_activation = ["elu", "relu"]
319
- num_hidden_layers = [1, 2, 3, 4, 5]
320
- hidden_layer_sizes = ["sqrt", "midpoint"]
321
- n_components = [2, 3]
322
- dropout_rate = [round(x, 1) for x in np.arange(0.0, 1.0, 0.1)]
323
- batch_size = [16, 32, 48, 64]
324
- optimizer = ["adam", "sgd", "adagrad"]
325
-
326
- # grid_params = {
327
- # "learning_rate": Continuous(1e-6, 0.1, distribution="log-uniform"),
328
- # "l2_penalty": Continuous(1e-6, 0.01, distribution="uniform"),
329
- # "n_components": Integer(2, 3),
330
- # # "hidden_activation": Categorical(["elu", "relu"]),
331
- # }
332
-
333
- grid_params = {
334
- # "learning_rate": learning_rate,
335
- # "l1_penalty": l1_penalty,
336
- "l2_penalty": l2_penalty,
337
- # "hidden_activation": hidden_activation,
338
- # "hidden_layer_sizes": hidden_layer_sizes,
339
- "n_components": n_components,
340
- # "dropout_rate": dropout_rate,
341
- # "batch_size": batch_size,
342
- # "optimizer": optimizer,
343
- }
344
-
345
- ubp = ImputeUBP(
346
- data,
347
- disable_progressbar=False,
348
- cv=3,
349
- column_subset=1.0,
350
- validation_split=0.0,
351
- learning_rate=0.1,
352
- num_hidden_layers=1,
353
- verbose=1,
354
- dropout_rate=0.2,
355
- hidden_activation="elu",
356
- batch_size=64,
357
- l1_penalty=1e-6,
358
- l2_penalty=1e-6,
359
- gridparams=grid_params,
360
- n_jobs=4,
361
- grid_iter=5,
362
- sim_strategy="nonrandom_weighted",
363
- sim_prop_missing=0.4,
364
- scoring_metric="precision_recall_macro",
365
- gridsearch_method="randomized_gridsearch",
366
- early_stop_gen=5,
367
- # sample_weights={0: 1.0, 1: 0.0, 2: 1.0},
368
- # sample_weights="auto",
369
- )
370
-
371
- # ubp = ImputeVAE(
372
- # data,
373
- # # gridparams=grid_params,
374
- # # initial_strategy="populations",
375
- # # disable_progressbar=True,
376
- # # cv=3,
377
- # # column_subset=1.0,
378
- # # validation_size=0.3,
379
- # # learning_rate=0.1,
380
- # # num_hidden_layers=1,
381
- # # verbose=1,
382
- # # gridparams=grid_params,
383
- # )
384
-
385
- # nlpca_data = nlpca.imputed
386
- # print(nlpca_data.genotypes012_df)
387
-
388
- # print(data.genotypes012_df)
389
- # print(nlpca_data.genotypes012_df)
390
-
391
- # ubp = ImputeUBP(
392
- # genotype_data=data,
393
- # test_categorical=np.array([[0, 1], [-9, 1], [2, -9]]),
394
- # )
395
-
396
- # ubp = ImputeVAE(
397
- # gt=np.array([[0, 1], [-9, 1], [2, -9]]),
398
- # initial_strategy="most_frequent",
399
- # )
400
-
401
- # br_imp = ImputeBayesianRidge(
402
- # data,
403
- # prefix=args.prefix,
404
- # alpha_1=0.0002689638465560243,
405
- # alpha_2=0.0001473822173361299,
406
- # lambda_1=0.0003281735206234651,
407
- # lambda_2=0.00020767920087590963,
408
- # n_iter=100,
409
- # n_nearest_features=3,
410
- # progress_update_percent=20,
411
- # disable_progressbar=True,
412
- # max_iter=2,
413
- # cv=3,
414
- # initial_strategy="group_mode",
415
- # )
416
-
417
- # phylo = ImputePhylo(
418
- # genotype_data=data, save_plots=False, disable_progressbar=True
419
- # )
420
-
421
- # phylodata = phylo.imputed
422
- # print(phylodata.genotypes012_df)
423
-
424
-
425
- def get_arguments():
426
- """[Parse command-line arguments. Imported with argparse]
427
-
428
- Returns:
429
- [argparse object]: [contains command-line arguments; accessed as method]
430
- """
431
-
432
- parser = argparse.ArgumentParser(
433
- description="Machine learning missing data imputation and species delimitation",
434
- add_help=False,
435
- )
436
-
437
- required_args = parser.add_argument_group("Required arguments")
438
- filetype_args = parser.add_argument_group(
439
- "File type arguments (choose only one)"
440
- )
441
- structure_args = parser.add_argument_group("Structure file arguments")
442
- optional_args = parser.add_argument_group("Optional arguments")
443
-
444
- # File Type arguments
445
- filetype_args.add_argument(
446
- "-s", "--str", type=str, required=False, help="Input structure file"
447
- )
448
- filetype_args.add_argument(
449
- "-p", "--phylip", type=str, required=False, help="Input phylip file"
450
- )
451
-
452
- filetype_args.add_argument(
453
- "-t",
454
- "--treefile",
455
- type=str,
456
- required=False,
457
- default=None,
458
- help="Newick-formatted treefile",
459
- )
460
-
461
- filetype_args.add_argument(
462
- "-i",
463
- "--iqtree",
464
- type=str,
465
- required=False,
466
- help=".iqtree output file containing Rate Matrix Q",
467
- )
468
-
469
- # Structure Arguments
470
- structure_args.add_argument(
471
- "--onerow_perind",
472
- default=False,
473
- action="store_true",
474
- help="Toggles on one row per individual option in structure file",
475
- )
476
- structure_args.add_argument(
477
- "--pop_ids",
478
- default=False,
479
- required=False,
480
- action="store_true",
481
- help="Toggles on population ID column (2nd col) in structure file",
482
- )
483
-
484
- ## Optional Arguments
485
- optional_args.add_argument(
486
- "-m",
487
- "--popmap",
488
- type=str,
489
- required=False,
490
- default=None,
491
- help="Two-column tab-separated population map file: inds\tpops. No header line",
492
- )
493
- optional_args.add_argument(
494
- "--prefix",
495
- type=str,
496
- required=False,
497
- default="output",
498
- help="Prefix for output files",
499
- )
500
-
501
- optional_args.add_argument(
502
- "--resume_imputed",
503
- type=str,
504
- required=False,
505
- help="Read in imputed data from a file instead of doing the imputation",
506
- )
507
- # Add help menu
508
- optional_args.add_argument(
509
- "-h", "--help", action="help", help="Displays this help menu"
510
- )
511
-
512
- # If no command-line arguments are called then exit and call help menu.
513
- if len(sys.argv) == 1:
514
- print("\nExiting because no command-line options were called.\n")
515
- parser.print_help(sys.stderr)
516
- sys.exit(1)
517
-
518
- args = parser.parse_args()
519
- return args
520
-
521
-
522
- if __name__ == "__main__":
523
- main()
test/test.py DELETED
@@ -1,151 +0,0 @@
1
- import sys
2
- import os
3
- import copy
4
- import unittest
5
- import pprint
6
- from snpio import GenotypeData
7
- from pgsui import *
8
- from pgsui.utils.misc import HiddenPrints
9
-
10
-
11
- class TestMyClasses(unittest.TestCase):
12
- def setUp(self):
13
- with HiddenPrints():
14
- self.genotype_data = GenotypeData(
15
- filename="pgsui/example_data/phylip_files/test_n100.phy",
16
- popmapfile="pgsui/example_data/popmaps/test.popmap",
17
- guidetree="pgsui/example_data/trees/test.tre",
18
- qmatrix="pgsui/example_data/trees/test.qmat",
19
- siterates="pgsui/example_data/trees/test_siterates_n100.txt",
20
- prefix="test_imputer",
21
- force_popmap=True,
22
- plot_format="png",
23
- )
24
-
25
- # Create a SimGenotypeDataTransformer instance and use it
26
- # to simulate missing data
27
- self.transformer = SimGenotypeDataTransformer(
28
- genotype_data=self.genotype_data,
29
- prop_missing=0.2,
30
- strategy="random",
31
- )
32
- self.transformer.fit(self.genotype_data.genotypes_012(fmt="numpy"))
33
- self.simulated_data = copy.deepcopy(self.genotype_data)
34
-
35
- self.simulated_data.genotypes_012 = self.transformer.transform(
36
- self.genotype_data.genotypes_012(fmt="numpy")
37
- )
38
-
39
- def _test_class(self, class_instance, do_gridsearch=False):
40
- print(f"\nMETHOD: {class_instance.__name__}\n")
41
-
42
- if do_gridsearch:
43
- # Do a simple test.
44
- if class_instance in [ImputeRandomForest, ImputeXGBoost]:
45
- param_grid = {"n_estimators": [50, 100]} # Do a simple test
46
- elif class_instance in [
47
- ImputeVAE,
48
- ImputeStandardAutoEncoder,
49
- ImputeNLPCA,
50
- ImputeUBP,
51
- ]:
52
- param_grid = {"dropout_rate": [0.1, 0.2]}
53
- elif class_instance == ImputeKNN:
54
- param_grid = {"n_neighbors": [5, 8]}
55
- else:
56
- param_grid = None
57
-
58
- instance = class_instance(
59
- self.simulated_data,
60
- gridparams=param_grid,
61
- )
62
- imputed_data = instance.imputed.genotypes_012(fmt="numpy")
63
-
64
- # Test that the imputed values are close to the original values
65
- accuracy = self.transformer.accuracy(
66
- self.genotype_data.genotypes_012(fmt="numpy"), imputed_data
67
- )
68
-
69
- (
70
- auc_roc_scores,
71
- precision_scores,
72
- recall_scores,
73
- avg_precision_scores,
74
- ) = self.transformer.auc_roc_pr_ap(
75
- self.genotype_data.genotypes_012(fmt="numpy"), imputed_data
76
- )
77
-
78
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
79
- f"OVERALL ACCURACY: {accuracy}"
80
- )
81
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
82
- f"AUC-ROC PER CLASS: {dict(zip(range(3), auc_roc_scores))}"
83
- )
84
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
85
- f"PRECISION PER CLASS: {dict(zip(range(3), precision_scores))}"
86
- )
87
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
88
- f"RECALL PER CLASS: {dict(zip(range(3), recall_scores))}"
89
- )
90
- pprint.PrettyPrinter(indent=4, sort_dicts=True).pprint(
91
- f"AVERAGE PRECISION PER CLASS: {dict(zip(range(3), avg_precision_scores))}"
92
- )
93
- print("\n")
94
-
95
- def test_ImputeKNN(self):
96
- self._test_class(ImputeKNN)
97
-
98
- def test_ImputeRandomForest(self):
99
- self._test_class(ImputeRandomForest)
100
-
101
- def test_ImputeXGBoost(self):
102
- self._test_class(ImputeXGBoost)
103
-
104
- def test_ImputeVAE(self):
105
- self._test_class(ImputeVAE)
106
-
107
- def test_ImputeStandardAutoEncoder(self):
108
- self._test_class(ImputeStandardAutoEncoder)
109
-
110
- def test_ImputeUBP(self):
111
- self._test_class(ImputeUBP)
112
-
113
- def test_ImputeNLPCA(self):
114
- self._test_class(ImputeNLPCA)
115
-
116
- def test_ImputeKNN_grid(self):
117
- self._test_class(ImputeKNN, do_gridsearch=True)
118
-
119
- def test_ImputeRandomForest_grid(self):
120
- self._test_class(ImputeRandomForest, do_gridsearch=True)
121
-
122
- def test_ImputeXGBoost_grid(self):
123
- self._test_class(ImputeXGBoost, do_gridsearch=True)
124
-
125
- def test_ImputeVAE_grid(self):
126
- self._test_class(ImputeVAE, do_gridsearch=True)
127
-
128
- def test_ImputeStandardAutoEncoder_grid(self):
129
- self._test_class(ImputeStandardAutoEncoder, do_gridsearch=True)
130
-
131
- def test_ImputeUBP_grid(self):
132
- self._test_class(ImputeUBP, do_gridsearch=True)
133
-
134
- def test_ImputeNLPCA_grid(self):
135
- self._test_class(ImputeNLPCA, do_gridsearch=True)
136
-
137
- def test_ImputePhylo(self):
138
- self._test_class(ImputePhylo)
139
-
140
- def test_ImputeAlleleFreq(self):
141
- self._test_class(ImputeAlleleFreq)
142
-
143
- def test_ImputeMF(self):
144
- self._test_class(ImputeMF)
145
-
146
- def test_ImputeRefAllele(self):
147
- self._test_class(ImputeRefAllele)
148
-
149
-
150
- if __name__ == "__main__":
151
- unittest.main()