pg-sui 0.2.3__py3-none-any.whl → 1.6.16a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. pg_sui-1.6.16a3.dist-info/METADATA +292 -0
  2. pg_sui-1.6.16a3.dist-info/RECORD +81 -0
  3. {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info}/WHEEL +1 -1
  4. pg_sui-1.6.16a3.dist-info/entry_points.txt +4 -0
  5. {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info/licenses}/LICENSE +0 -0
  6. pg_sui-1.6.16a3.dist-info/top_level.txt +1 -0
  7. pgsui/__init__.py +35 -54
  8. pgsui/_version.py +34 -0
  9. pgsui/cli.py +922 -0
  10. pgsui/data_processing/__init__.py +0 -0
  11. pgsui/data_processing/config.py +565 -0
  12. pgsui/data_processing/containers.py +1436 -0
  13. pgsui/data_processing/transformers.py +557 -907
  14. pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
  15. pgsui/electron/app/__main__.py +5 -0
  16. pgsui/electron/app/extra-resources/.gitkeep +1 -0
  17. pgsui/electron/app/icons/icons/1024x1024.png +0 -0
  18. pgsui/electron/app/icons/icons/128x128.png +0 -0
  19. pgsui/electron/app/icons/icons/16x16.png +0 -0
  20. pgsui/electron/app/icons/icons/24x24.png +0 -0
  21. pgsui/electron/app/icons/icons/256x256.png +0 -0
  22. pgsui/electron/app/icons/icons/32x32.png +0 -0
  23. pgsui/electron/app/icons/icons/48x48.png +0 -0
  24. pgsui/electron/app/icons/icons/512x512.png +0 -0
  25. pgsui/electron/app/icons/icons/64x64.png +0 -0
  26. pgsui/electron/app/icons/icons/icon.icns +0 -0
  27. pgsui/electron/app/icons/icons/icon.ico +0 -0
  28. pgsui/electron/app/main.js +227 -0
  29. pgsui/electron/app/package-lock.json +6894 -0
  30. pgsui/electron/app/package.json +51 -0
  31. pgsui/electron/app/preload.js +15 -0
  32. pgsui/electron/app/server.py +157 -0
  33. pgsui/electron/app/ui/logo.png +0 -0
  34. pgsui/electron/app/ui/renderer.js +131 -0
  35. pgsui/electron/app/ui/styles.css +59 -0
  36. pgsui/electron/app/ui/ui_shim.js +72 -0
  37. pgsui/electron/bootstrap.py +43 -0
  38. pgsui/electron/launch.py +57 -0
  39. pgsui/electron/package.json +14 -0
  40. pgsui/example_data/__init__.py +0 -0
  41. pgsui/example_data/phylip_files/__init__.py +0 -0
  42. pgsui/example_data/phylip_files/test.phy +0 -0
  43. pgsui/example_data/popmaps/__init__.py +0 -0
  44. pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
  45. pgsui/example_data/structure_files/__init__.py +0 -0
  46. pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
  47. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
  48. pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
  49. pgsui/impute/__init__.py +0 -0
  50. pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
  51. pgsui/impute/deterministic/imputers/mode.py +844 -0
  52. pgsui/impute/deterministic/imputers/nmf.py +221 -0
  53. pgsui/impute/deterministic/imputers/phylo.py +973 -0
  54. pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
  55. pgsui/impute/supervised/__init__.py +0 -0
  56. pgsui/impute/supervised/base.py +343 -0
  57. pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
  58. pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
  59. pgsui/impute/supervised/imputers/random_forest.py +291 -0
  60. pgsui/impute/unsupervised/__init__.py +0 -0
  61. pgsui/impute/unsupervised/base.py +1121 -0
  62. pgsui/impute/unsupervised/callbacks.py +92 -262
  63. {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
  64. pgsui/impute/unsupervised/imputers/autoencoder.py +1361 -0
  65. pgsui/impute/unsupervised/imputers/nlpca.py +1666 -0
  66. pgsui/impute/unsupervised/imputers/ubp.py +1660 -0
  67. pgsui/impute/unsupervised/imputers/vae.py +1316 -0
  68. pgsui/impute/unsupervised/loss_functions.py +261 -0
  69. pgsui/impute/unsupervised/models/__init__.py +0 -0
  70. pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
  71. pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
  72. pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
  73. pgsui/impute/unsupervised/models/vae_model.py +269 -630
  74. pgsui/impute/unsupervised/nn_scorers.py +255 -0
  75. pgsui/utils/__init__.py +0 -0
  76. pgsui/utils/classification_viz.py +608 -0
  77. pgsui/utils/logging_utils.py +22 -0
  78. pgsui/utils/misc.py +35 -480
  79. pgsui/utils/plotting.py +996 -829
  80. pgsui/utils/pretty_metrics.py +290 -0
  81. pgsui/utils/scorers.py +213 -666
  82. pg_sui-0.2.3.dist-info/METADATA +0 -322
  83. pg_sui-0.2.3.dist-info/RECORD +0 -75
  84. pg_sui-0.2.3.dist-info/top_level.txt +0 -3
  85. pgsui/example_data/phylip_files/test_n10.phy +0 -118
  86. pgsui/example_data/phylip_files/test_n100.phy +0 -118
  87. pgsui/example_data/phylip_files/test_n2.phy +0 -118
  88. pgsui/example_data/phylip_files/test_n500.phy +0 -118
  89. pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
  90. pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
  91. pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
  92. pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
  93. pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
  94. pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
  95. pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
  96. pgsui/example_data/trees/test.iqtree +0 -376
  97. pgsui/example_data/trees/test.qmat +0 -5
  98. pgsui/example_data/trees/test.rate +0 -2033
  99. pgsui/example_data/trees/test.tre +0 -1
  100. pgsui/example_data/trees/test_n10.rate +0 -19
  101. pgsui/example_data/trees/test_n100.rate +0 -109
  102. pgsui/example_data/trees/test_n500.rate +0 -509
  103. pgsui/example_data/trees/test_siterates.txt +0 -2024
  104. pgsui/example_data/trees/test_siterates_n10.txt +0 -10
  105. pgsui/example_data/trees/test_siterates_n100.txt +0 -100
  106. pgsui/example_data/trees/test_siterates_n500.txt +0 -500
  107. pgsui/example_data/vcf_files/test.vcf +0 -244
  108. pgsui/example_data/vcf_files/test.vcf.gz +0 -0
  109. pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
  110. pgsui/impute/estimators.py +0 -1268
  111. pgsui/impute/impute.py +0 -1463
  112. pgsui/impute/simple_imputers.py +0 -1431
  113. pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
  114. pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
  115. pgsui/impute/unsupervised/keras_classifiers.py +0 -697
  116. pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
  117. pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
  118. pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
  119. pgsui/pg_sui.py +0 -261
  120. pgsui/utils/sequence_tools.py +0 -407
  121. simulation/sim_benchmarks.py +0 -333
  122. simulation/sim_treeparams.py +0 -475
  123. test/__init__.py +0 -0
  124. test/pg_sui_simtest.py +0 -215
  125. test/pg_sui_testing.py +0 -523
  126. test/test.py +0 -151
  127. test/test_pgsui.py +0 -374
  128. test/test_tkc.py +0 -185
@@ -0,0 +1,292 @@
1
+ Metadata-Version: 2.4
2
+ Name: pg-sui
3
+ Version: 1.6.16a3
4
+ Summary: Python machine and deep learning API to impute missing genotypes
5
+ Author-email: "Drs. Bradley T. Martin and Tyler K. Chafin" <evobio721@gmail.com>
6
+ Maintainer-email: "Dr. Bradley T. Martin" <evobio721@gmail.com>
7
+ License: GNU General Public License v3 (GPLv3)
8
+ Project-URL: Homepage, https://github.com/btmartin721/PG-SUI
9
+ Project-URL: Documentation, https://pg-sui.readthedocs.io/en/latest/
10
+ Project-URL: Source, https://github.com/btmartin721/PG-SUI.git
11
+ Project-URL: BugTracker, https://github.com/btmartin721/PG-SUI/issues
12
+ Keywords: impute,imputation,AI,deep learning,machine learning,neural network,vae,autoencoder,ubp,nlpca,population genetics,unsupervised,supervised,bioinformatics,snp,genomics,genotype,missing data,data analysis,data science,statistics,data visualization,python
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Development Status :: 4 - Beta
17
+ Classifier: Environment :: Console
18
+ Classifier: Intended Audience :: Science/Research
19
+ Classifier: Intended Audience :: Developers
20
+ Classifier: Intended Audience :: Education
21
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
22
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
+ Classifier: Topic :: Scientific/Engineering :: Visualization
26
+ Classifier: Operating System :: MacOS
27
+ Classifier: Operating System :: MacOS :: MacOS X
28
+ Classifier: Operating System :: Unix
29
+ Classifier: Operating System :: POSIX
30
+ Classifier: Natural Language :: English
31
+ Requires-Python: >=3.11
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: matplotlib
35
+ Requires-Dist: numpy>=2.1
36
+ Requires-Dist: pandas>=2.2.2
37
+ Requires-Dist: scikit-learn>=1.4
38
+ Requires-Dist: scipy
39
+ Requires-Dist: seaborn
40
+ Requires-Dist: torch
41
+ Requires-Dist: tqdm
42
+ Requires-Dist: toytree
43
+ Requires-Dist: optuna
44
+ Requires-Dist: rich
45
+ Requires-Dist: rich[jupyter]
46
+ Requires-Dist: snpio
47
+ Provides-Extra: intel
48
+ Requires-Dist: scikit-learn-intelex; extra == "intel"
49
+ Provides-Extra: docs
50
+ Requires-Dist: sphinx; extra == "docs"
51
+ Requires-Dist: sphinx-rtd-theme; extra == "docs"
52
+ Requires-Dist: sphinx_autodoc_typehints; extra == "docs"
53
+ Requires-Dist: sphinxcontrib-napoleon; extra == "docs"
54
+ Requires-Dist: sphinxcontrib-programoutput; extra == "docs"
55
+ Provides-Extra: dev
56
+ Requires-Dist: twine; extra == "dev"
57
+ Requires-Dist: wheel; extra == "dev"
58
+ Requires-Dist: pytest; extra == "dev"
59
+ Requires-Dist: sphinx; extra == "dev"
60
+ Requires-Dist: sphinx-rtd-theme; extra == "dev"
61
+ Requires-Dist: sphinx-autodoc-typehints; extra == "dev"
62
+ Requires-Dist: sphinxcontrib-napoleon; extra == "dev"
63
+ Requires-Dist: sphinxcontrib-programoutput; extra == "dev"
64
+ Requires-Dist: requests; extra == "dev"
65
+ Provides-Extra: optional
66
+ Requires-Dist: PyObjC; extra == "optional"
67
+ Provides-Extra: gui
68
+ Requires-Dist: fastapi>=0.110; extra == "gui"
69
+ Requires-Dist: uvicorn[standard]>=0.23; extra == "gui"
70
+ Dynamic: license-file
71
+
72
+ # PG-SUI
73
+
74
+ ![PG-SUI Logo](https://github.com/btmartin721/PG-SUI/blob/master/img/pgsui-logo-faded.png)
75
+
76
+ Population Genomic Supervised and Unsupervised Imputation.
77
+
78
+ ## About PG-SUI
79
+
80
+ PG-SUI is a Python 3 API that uses machine learning to impute missing values from population genomic SNP data. There are several supervised and unsupervised machine learning algorithms available to impute missing data, as well as some non-machine learning imputers that are useful.
81
+
82
+ Below is some general information and a basic tutorial. For more detailed information, see our [API Documentation](https://pg-sui.readthedocs.io/en/latest/).
83
+
84
+ ### Unsupervised Imputation Methods
85
+
86
+ Unsupervised imputers include three custom neural network models:
87
+
88
+ + Variational Autoencoder (VAE) [1](#1)
89
+ + VAE models train themselves to reconstruct their input (i.e., the genotypes) [1](#1). To use VAE for imputation, the missing values are masked and the VAE model gets trained to reconstruct only on known values. Once the model is trained, it is then used to predict the missing values.
90
+ + Autoencoder [2](#2)
91
+ + A standard autoencoder that trains the input to predict itself [2](#2). As with VAE, missing values are masked and the model gets trained only on known values. Predictions are then made on the missing values.
92
+ + Non-linear Principal Component Analysis (NLPCA) [3](#3)
93
+ + NLPCA initializes random, reduced-dimensional input, then trains itself by using the known values (i.e., genotypes) as targets and refining the random input until it accurately predicts the genotype output [3](#3). The trained model can then predict the missing values.
94
+ + Unsupervised Backpropagation (UBP) [4](#4)
95
+ + UBP is an extension of NLPCA that runs over three phases [4](#4). Phase 1 refines the randomly generated, reduced-dimensional input in a single layer perceptron neural network to obtain good initial input values. Phase 2 uses the refined reduced-dimensional input from phase 1 as input into a multi-layer perceptron (MLP), but in Phase 2 only the neural network weights are refined. Phase three uses an MLP to refine both the weights and the reduced-dimensional input. Once the model is trained, it can be used to predict the missing values.
96
+
97
+ ### Supervised Imputation Methods
98
+
99
+ Supervised methods utilze the scikit-learn's ``IterativeImputer``, which is based on the MICE (Multivariate Imputation by Chained Equations) algorithm [5](#5), and iterates over each SNP site (i.e., feature) while uses the N nearest neighbor features to inform the imputation. The number of nearest features can be adjusted by users. IterativeImputer currently works with the following scikit-learn classifiers:
100
+
101
+ + ImputeRandomForest
102
+ + ImputeHistGradientBoosting
103
+
104
+ See the [scikit-learn documentation](https://scikit-learn.org) for more information on IterativeImputer and each of the classifiers.
105
+
106
+ ### Non-Machine Learning (Deterministic) Methods
107
+
108
+ We also include several deterministic options for imputing missing data, including:
109
+
110
+ + Per-population mode per SNP site
111
+ + Overall mode per SNP site
112
+
113
+ ## Installing PG-SUI
114
+
115
+ PG-SUI supports both pip and conda distributions. Both are kept current with up-to-date releases.
116
+
117
+ ### Installation with Pip
118
+
119
+ To install PG-SUI with pip, do the following. It is strongly recommended to install pg-sui in a virtual environment.
120
+
121
+ ``` shell
122
+ python3 -m venv .pgsui-venv
123
+ source .pgsui-venv/bin/activate
124
+ pip install pg-sui
125
+ ```
126
+
127
+ ### Installation with Anaconda
128
+
129
+ To install PG-SUI with Anaconda, do the following:
130
+
131
+ ``` shell
132
+ conda create -n pgsui-env python=3.12
133
+ conda activate pgsui-env
134
+ conda install -c btmartin721 pg-sui
135
+ ```
136
+
137
+ ### Docker Container
138
+
139
+ We also maintains a Docker image that comes with PG-SUI preinstalled. This can be useful for automated worklows such as Nextflow or Snakemake.
140
+
141
+ ``` shell
142
+ docker pull pg-sui:latest
143
+ ```
144
+
145
+ ### Optional MacOS GUI
146
+
147
+ PG-SUI ships an optional Electron GUI (Graphical User Interface) wrapper around the Python CLI. Currently for the GUI, only MacOS is supported.
148
+
149
+ 1. Install the Python-side extras (FastAPI/ uvicorn helper) if you want to serve from Python:
150
+ `pip install pg-sui[gui]`
151
+ 2. Install [Node.js](https://nodejs.org) and fetch the app dependencies:
152
+ `pgsui-gui-setup`
153
+ 3. Launch the graphical interface:
154
+ `pgsui-gui`
155
+
156
+ The GUI shells out to the same CLI underneath, so presets, overrides, and YAML configs behave identically.
157
+
158
+ ## Input Data
159
+
160
+ You can read your input files as a GenotypeData object from the [SNPio](https://snpio.readthedocs.io/en/latest/) package. SNPio supports the VCF, PHYLIP, STRUCTURE, and GENEPOP input file formats.
161
+
162
+ ``` python
163
+ # Import snpio. Automatically installed with pg-sui.
164
+ from snpio import VCFReader
165
+
166
+ # Read in VCF alignment.
167
+ # SNPio also supports PHYLIP, STRUCTURE, and GENEPOP input file formats.
168
+ data = VCFReader(
169
+ filename="pgsui/example_data/phylogen_subset14K.vcf.gz,
170
+ popmapfile="pgsui/example_data/popmaps/phylogen_nomx.popmap", # optional
171
+ force_popmap=True, # optional
172
+ )
173
+ ```
174
+
175
+ ## Supported Imputation Methods
176
+
177
+ There are several supported algorithms PG-SUI uses to impute missing data. Each one can be run by calling the corresponding class. You must provide a GenotypeData instance as the first positional argument.
178
+
179
+ You can import all the supported methods with the following:
180
+
181
+ ``` python
182
+ from pgsui import ImputeUBP, ImputeVAE, ImputeNLPCA, ImputeAutoencoder, ImputeRefAllele, ImputeMostFrequent, ImputeRandomForest, ImputeHistGradientBoosting
183
+ ```
184
+
185
+ ### Unsupervised Imputers
186
+
187
+ The four unsupervised imputers can be run by initializing them with the SNPio ``GenotypeData`` object and then calling ``fit()`` and ``transform()``.
188
+
189
+ ``` python
190
+ # Initialize the models, then fit and impute
191
+ vae = ImputeVAE(data) # Variational autoencoder
192
+ vae.fit()
193
+ vae_imputed = vae.transform()
194
+
195
+ nlpca = ImputeNLPCA(data) # Nonlinear PCA
196
+ nlpca.fit()
197
+ nlpca_imputed = nlpca.transform()
198
+
199
+ ubp = ImputeUBP(data) # Unsupervised backpropagation
200
+ ubp.fit()
201
+ ubp_imputed = ubp.transform()
202
+
203
+ ae = ImputeAutoencoder(data) # standard autoencoder
204
+ ae.fit()
205
+ ae_imputed = ae.transform()
206
+ ```
207
+
208
+ The ``*_imputed`` objects will be NumPy arrays that are compatible with SNPio's ``GenotypeData`` objects.
209
+
210
+ ### Supervised Imputers
211
+
212
+ Various supervised imputation options are supported, and these use the same API design.
213
+
214
+ ``` python
215
+ # Supervised IterativeImputer classifiers
216
+
217
+ # Random Forest
218
+ rf = ImputeRandomForest(data)
219
+ rf.fit()
220
+ imputed_rf = rf.transform()
221
+
222
+ # HistGradientBoosting
223
+ hgb = ImputeHistGradientBoosting(data)
224
+ hgb.fit()
225
+ imputed_hgb = hgb.transform()
226
+ ```
227
+
228
+ ### Non-machine learning methods
229
+
230
+ The following deterministic methods are supported. ``ImputeMostFrequent`` supports the mode-per-population or overall (global) mode options to inform imputation.
231
+
232
+ ``` python
233
+ # Per-population, per-locus mode
234
+ pop_mode = ImputeMostFrequent(data, by_populations=True)
235
+ pop_mode.fit()
236
+ imputed_pop_mode = pop_mode.transform()
237
+
238
+ # Per-locus mode
239
+ mode = ImputeMostFrequent(data, by_populations=False)
240
+ mode.fit()
241
+ imputed_mode = mode.transform()
242
+ ```
243
+
244
+ Or, always replace missing values with the reference allele.
245
+
246
+ ``` python
247
+ ref = ImputeRefAllele(data)
248
+ ref.fit()
249
+ imputed_ref = ref.transform()
250
+ ```
251
+
252
+ ## Command-Line Interface
253
+
254
+ Run the PG-SUI CLI with ``pg-sui`` (installed alongside the library). The CLI follows the same precedence model as the Python API:
255
+
256
+ ``code defaults < preset (--preset) < YAML (--config) < explicit CLI flags < --set key=value``.
257
+
258
+ Recent releases add explicit switches for the simulated-missingness workflow shared by the neural and supervised models:
259
+
260
+ + ``--sim-strategy`` selects one of ``random``, ``random_weighted``, ``random_weighted_inv``, ``nonrandom``, ``nonrandom_weighted``.
261
+ + ``--sim-prop`` sets the proportion of observed calls to temporarily mask when building the evaluation set.
262
+ + ``--simulate-missing`` disables simulated masking entirely (store-false flag); omit it to inherit preset/YAML defaults or re-enable via ``--set sim.simulate_missing=True``.
263
+
264
+ Example:
265
+
266
+ ``` shell
267
+ pg-sui \
268
+ --vcf data.vcf.gz \
269
+ --popmap pops.popmap \
270
+ --models ImputeUBP ImputeVAE \
271
+ --preset balanced \
272
+ --sim-strategy random_weighted_inv \
273
+ --sim-prop 0.25 \
274
+ --prefix ubp_and_vae \
275
+ --n-jobs 4 \
276
+ --tune-n-trials 100 \
277
+ --set tune.enabled=True
278
+ ```
279
+
280
+ CLI overrides cascade into every selected model, so a single invocation can evaluate multiple imputers with a consistent simulation strategy and output prefix.
281
+
282
+ ## References
283
+
284
+ 1. Kingma, D.P. & Welling, M. (2013). Auto-encoding variational bayes. In: Proceedings of the International Conference on Learning Representations (ICLR). arXiv:1312.6114 [stat.ML].
285
+
286
+ 2. Hinton, G.E., & Salakhutdinov, R.R. (2006). Reducing the dimensionality of data with neural networks. Science, 313(5786), 504-507.
287
+
288
+ 3. Scholz, M., Kaplan, F., Guy, C. L., Kopka, J., & Selbig, J. (2005). Non-linear PCA: a missing data approach. Bioinformatics, 21(20), 3887-3895.
289
+
290
+ 4. Gashler, M. S., Smith, M. R., Morris, R., & Martinez, T. (2016). Missing value imputation with unsupervised backpropagation. Computational Intelligence, 32(2), 196-215.
291
+
292
+ 5. Stef van Buuren, Karin Groothuis-Oudshoorn (2011). mice: Multivariate Imputation by Chained Equations in R. Journal of Statistical Software 45: 1-67.
@@ -0,0 +1,81 @@
1
+ pg_sui-1.6.16a3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
+ pgsui/__init__.py,sha256=wQFzVX6vh8aUva1LCvP42jS7rcKCpkaU52YfZIy61q8,1493
3
+ pgsui/_version.py,sha256=Ob9Wzde013P9zPCsc2eaS--ZJQaRFPqutqIvBNJDnP4,714
4
+ pgsui/cli.py,sha256=F6v7Bv073NgH3Ku_Lb6DSciTCLzM_o49ktrKnjkiHJg,30229
5
+ pgsui/data_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ pgsui/data_processing/config.py,sha256=g5G7pjWG4uU2BRvBu_DpO0J_4X1Foa11X69imBWjaKA,20483
7
+ pgsui/data_processing/containers.py,sha256=mtVExBEQQXCID_TbngxY23I690d09WIzmJBdVa7HIo0,51513
8
+ pgsui/data_processing/transformers.py,sha256=kdwOTmfDjgQ3RmiwQIK7LYL4vQUpgA4bob7NHYgnYRM,30998
9
+ pgsui/electron/bootstrap.py,sha256=wnrXgX-hiqrMMFE9WGoD-UC8zeK2ZP6Kupu68PodVWI,1185
10
+ pgsui/electron/launch.py,sha256=M60o_jub77kJL-B9d_sMB7LYuTzWlOnQXR09efmCX2o,1715
11
+ pgsui/electron/package.json,sha256=12hbBq7xincW5V4645TTC58jfkA2rPgFP_eLb_WbhKo,372
12
+ pgsui/electron/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ pgsui/electron/app/__main__.py,sha256=rF7tIpuy4OvMnzLms-fAUNfqYINEyR0Iofh1KKu5RI0,103
14
+ pgsui/electron/app/main.js,sha256=yobpq13uOdoLzwonCxjAFECwqop0bk8_-7pYBGci04U,8252
15
+ pgsui/electron/app/package-lock.json,sha256=0sp5XM4tclppG4vqFDtpKDIV9dBC0JVtjBtYKpSXIP8,246701
16
+ pgsui/electron/app/package.json,sha256=Mk5FALnz7hlV_5r5f84HgYp2oLjVD9SHnXmMVrTfzmo,1276
17
+ pgsui/electron/app/preload.js,sha256=L3lPsTGfgiGXLmQFwktW11En1YSb01hWlJD6kzzmbhE,803
18
+ pgsui/electron/app/server.py,sha256=q8gWwgkbaDjNZTT_dR1HXm45xv4V7wjrPk-Uc6axros,4568
19
+ pgsui/electron/app/extra-resources/.gitkeep,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
20
+ pgsui/electron/app/icons/icons/1024x1024.png,sha256=psZk62iG308QN9XP7sDCifGAHwG7__W-e2EUrF9hss0,42270
21
+ pgsui/electron/app/icons/icons/128x128.png,sha256=R9Za6fj289L432EdwAX7pUj4p4qy7iTMzghrrQzocCg,4705
22
+ pgsui/electron/app/icons/icons/16x16.png,sha256=TaVFFA8V4LQX_KwZCjko_eRPibmYEy1JSkuPicUZ0oQ,704
23
+ pgsui/electron/app/icons/icons/24x24.png,sha256=vMmSBmnTPhaVBfV_oCK_r3wykBq9JneyQu4TlHUVjBU,1094
24
+ pgsui/electron/app/icons/icons/256x256.png,sha256=D97Y8KYOg_RcsR8pNDcxyxtRz6X3qKkKw-oChy29tuY,9044
25
+ pgsui/electron/app/icons/icons/32x32.png,sha256=ah-Z-WybnXG801UV2xwAdvUNmQmy1C29EWLNlTgN8kI,1355
26
+ pgsui/electron/app/icons/icons/48x48.png,sha256=WYVea1_leKVpowEQCrbmV1zG5aMBRh-aMKpiziTKWYc,1995
27
+ pgsui/electron/app/icons/icons/512x512.png,sha256=A1YP6sKNFZEdVl7vVlIAUjunSzFhtXS0_e7bxRiadOQ,17784
28
+ pgsui/electron/app/icons/icons/64x64.png,sha256=tH4qDy3My1DPZz4QzSyz4P6_eel_3t246xKhtm1xKwg,2610
29
+ pgsui/electron/app/icons/icons/icon.icns,sha256=qaJExjDDJQk-uSbTIyPFn6a1hb8JRxs5HQmYdzZdb0c,108937
30
+ pgsui/electron/app/icons/icons/icon.ico,sha256=ljjD98wBOVfUSV3gPlMzXZ4FAprRdrAQENCZ76KTFmo,361102
31
+ pgsui/electron/app/ui/logo.png,sha256=uHhtVMYQsSCkcpeS8JPo-DVsAqwg3G4t-0lYuT4AP6E,38243
32
+ pgsui/electron/app/ui/renderer.js,sha256=SzG7WhiQ3lDp2DKgkOnRO8lohAyhXlnQi8V-r7lILeI,5351
33
+ pgsui/electron/app/ui/styles.css,sha256=NQPGczGXxw8XjrLbqKZtEQD-SICvciFuUfgtKm_FuHk,2675
34
+ pgsui/electron/app/ui/ui_shim.js,sha256=4qu_9qpMc_p-Boih0o8I2OAVL9TMS4DUUk7BRQIfCeM,2513
35
+ pgsui/example_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
+ pgsui/example_data/phylip_files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
+ pgsui/example_data/phylip_files/test.phy,sha256=LEq2Q3hjmPVDP2uF1Ai8OH2If5Y58ny-GRfuXa3l61c,238537
38
+ pgsui/example_data/popmaps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ pgsui/example_data/popmaps/phylogen_nomx.popmap,sha256=JR84kJ5jVjJ45k4kpvGVt2JNJHApQg1xNs2pKPt-V4k,3310
40
+ pgsui/example_data/structure_files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
+ pgsui/example_data/structure_files/test.pops.2row.allsites.str,sha256=fbmLcOUQSKyy71A77NPtJJMLSOkquPky0WiPlV4qHhA,967878
42
+ pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz,sha256=B9sxdIGPSbw4m4MTX_B1VHDx5cDicMNn83AFJy7SEAU,8053540
43
+ pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi,sha256=0gHDvboDAEZLQbXdYfUOiJL3oDOr1mOlaQxdlcz_erg,106087
44
+ pgsui/impute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
+ pgsui/impute/deterministic/imputers/allele_freq.py,sha256=tqaMggiNs6hxs4CN3No2d5llmmIPt0jQhHT0mHry2IM,29573
46
+ pgsui/impute/deterministic/imputers/mode.py,sha256=lQmrOYEyfQidqGaG86QB5HeM66IeOFLK1UoCvh-iDno,35049
47
+ pgsui/impute/deterministic/imputers/nmf.py,sha256=171_TTDZAe1NFjbmKQTOlPDe_rA1aO8q2Th5z0w2RU8,8086
48
+ pgsui/impute/deterministic/imputers/phylo.py,sha256=uN86_L2TeiUFOZLdU8pwymRiQf0SI7Sl6SAnCbAywVQ,38873
49
+ pgsui/impute/deterministic/imputers/ref_allele.py,sha256=OGdv4n1avIArItZ-V0kqX2g4OOZc2felh7HM1ikY2iU,28075
50
+ pgsui/impute/supervised/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
+ pgsui/impute/supervised/base.py,sha256=A92x1pS8DO0OwbhQem3KBAMbWs368KJcVl88zJ7wE10,13756
52
+ pgsui/impute/supervised/imputers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
+ pgsui/impute/supervised/imputers/hist_gradient_boosting.py,sha256=5LZsee2R9tbshmgVvLDIjGDihiQPvp9XfbaGwzL35E8,11509
54
+ pgsui/impute/supervised/imputers/random_forest.py,sha256=jpeaLjhzYrwRPc9nPasLgwOqBoHlBlymHMc3x5OkBWA,10393
55
+ pgsui/impute/unsupervised/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
+ pgsui/impute/unsupervised/base.py,sha256=DqgP8QNKmbUaljzx_lWC_zFuWTUO3iiRseYTQEWNG0Q,46945
57
+ pgsui/impute/unsupervised/callbacks.py,sha256=jkxncpKNRdssImv4N5c-Hq8VcA07QvxLoct7EqDW9RE,5026
58
+ pgsui/impute/unsupervised/loss_functions.py,sha256=f18uQnerj0KF9xcU2I1_Y2OCKKguXXaEDaYhJg0XElY,10089
59
+ pgsui/impute/unsupervised/nn_scorers.py,sha256=-rl5MBJm2GN6E1wPBIe1wMgdrHEHhYooUUxVbfcf1Z8,9758
60
+ pgsui/impute/unsupervised/imputers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
+ pgsui/impute/unsupervised/imputers/autoencoder.py,sha256=k8GJ80hysAteT-QUmRRtWfPcvH-QjUFHrWBZGVuFH1c,57872
62
+ pgsui/impute/unsupervised/imputers/nlpca.py,sha256=AChGz0Gp9Yt0qF8vtrDI4t5iIr70LQRb-8pFGQBYLZo,69086
63
+ pgsui/impute/unsupervised/imputers/ubp.py,sha256=sBJ9VSW2tTzXVjnXogjhuj6GixRaH0HcQ2B7cK4y6tw,71361
64
+ pgsui/impute/unsupervised/imputers/vae.py,sha256=hK3ZOxyLjQqm-3Hcq2BMUZOghsJiDGYNRQjqA7zoncs,54713
65
+ pgsui/impute/unsupervised/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
+ pgsui/impute/unsupervised/models/autoencoder_model.py,sha256=mHmfTkldJNpN7Dy7RTS2RnkE2L_K1rChNjpjDvzRlEQ,12333
67
+ pgsui/impute/unsupervised/models/nlpca_model.py,sha256=1NathvhsirBtd9UcmeJzRoVf7oi7PfDmRpt18Di63Cg,8021
68
+ pgsui/impute/unsupervised/models/ubp_model.py,sha256=4guGkQzCTIsDnImOjJV5kG1xc1ST9oO4aUeXrSBSpQg,8491
69
+ pgsui/impute/unsupervised/models/vae_model.py,sha256=hMG7K1OR95qLEEcprGSzIoTIISXHSu2yWDy6QkG93Lg,15576
70
+ pgsui/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
+ pgsui/utils/classification_viz.py,sha256=WkGQkEIkpTMLQvGdGoU53kn1iNUO9ipeg5stSY1rcFg,22012
72
+ pgsui/utils/logging_utils.py,sha256=o_ElRL05B_DrbALYkuW8s_azfKQiN8kJ4oXwshyIMyI,521
73
+ pgsui/utils/misc.py,sha256=Mw5CsspFJkDAcCRufk-lO7fKyVoYK7PRYXkLXKswUjI,3065
74
+ pgsui/utils/plotting.py,sha256=d5CTzGIpanu3j6rEB6fq_F1g8w_A2Ti_XiedRjIFFII,42444
75
+ pgsui/utils/pretty_metrics.py,sha256=dtN7Ohcx3qJYCw4JeJCXvthGDdSV7bgE8v6EGwHSAE0,9862
76
+ pgsui/utils/scorers.py,sha256=sL2upL2ZZMFBTMM4DiGiWeXrqc_fp1RRbleYCnuRUhw,12564
77
+ pg_sui-1.6.16a3.dist-info/METADATA,sha256=G87-keKhu56QhKFiAeGQVSD36EH_W2uNf9u4wciAsmU,12320
78
+ pg_sui-1.6.16a3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
+ pg_sui-1.6.16a3.dist-info/entry_points.txt,sha256=xidyl6yqQv7oj3XSzZC6Vv9l7aNgbHi_pjv-dJjGJds,129
80
+ pg_sui-1.6.16a3.dist-info/top_level.txt,sha256=87-oDpfY6sDY_uN-OM2lcnrgPesifhzwqFOajp9ukz0,6
81
+ pg_sui-1.6.16a3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,4 @@
1
+ [console_scripts]
2
+ pg-sui = pgsui.cli:main
3
+ pgsui-gui = pgsui.electron.launch:main
4
+ pgsui-gui-setup = pgsui.electron.bootstrap:main
@@ -0,0 +1 @@
1
+ pgsui
pgsui/__init__.py CHANGED
@@ -1,63 +1,44 @@
1
1
  ## PG-SUI package by Bradley T. Martin and Tyler K. Chafin
2
2
  ## E-mail: evobio721@gmail.com
3
- ## Version 0.1, completed 13-Dec-2021
4
-
5
- # Suppresses tensorflow GPU warnings.
6
- import os
7
- import warnings
8
-
9
- os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
10
-
11
- from pgsui.utils.misc import get_processor_name
12
-
13
-
14
- # Requires scikit-learn-intellex package
15
- if get_processor_name().strip().startswith("Intel"):
16
- try:
17
- from sklearnex import patch_sklearn
18
-
19
- patch_sklearn()
20
- intelex = True
21
- except (ImportError, TypeError):
22
- warnings.warn(
23
- "Intel CPU detected but scikit-learn-intelex is not installed. We recommend installing it to speed up computation if your hardware supports it."
24
- )
25
- intelex = False
26
- else:
27
- intelex = False
28
-
29
- os.environ["intelex"] = str(intelex)
30
-
31
- from pgsui.impute.estimators import (
32
- ImputeKNN,
33
- ImputeNLPCA,
34
- ImputeRandomForest,
35
- ImputeStandardAutoEncoder,
36
- ImputeUBP,
37
- ImputeVAE,
38
- ImputeXGBoost,
3
+ from ._version import version as __version__
4
+
5
+ from pgsui.data_processing.containers import (
6
+ AutoencoderConfig,
7
+ HGBConfig,
8
+ MostFrequentConfig,
9
+ NLPCAConfig,
10
+ RefAlleleConfig,
11
+ RFConfig,
12
+ UBPConfig,
13
+ VAEConfig,
39
14
  )
40
-
41
- from pgsui.impute.simple_imputers import (
42
- ImputePhylo,
43
- ImputeMF,
44
- ImputeAlleleFreq,
45
- ImputeRefAllele,
15
+ from pgsui.impute.deterministic.imputers.mode import ImputeMostFrequent
16
+ from pgsui.impute.deterministic.imputers.ref_allele import ImputeRefAllele
17
+ from pgsui.impute.supervised.imputers.hist_gradient_boosting import (
18
+ ImputeHistGradientBoosting,
46
19
  )
47
-
48
- from pgsui.data_processing.transformers import SimGenotypeDataTransformer
20
+ from pgsui.impute.supervised.imputers.random_forest import ImputeRandomForest
21
+ from pgsui.impute.unsupervised.imputers.autoencoder import ImputeAutoencoder
22
+ from pgsui.impute.unsupervised.imputers.nlpca import ImputeNLPCA
23
+ from pgsui.impute.unsupervised.imputers.ubp import ImputeUBP
24
+ from pgsui.impute.unsupervised.imputers.vae import ImputeVAE
49
25
 
50
26
  __all__ = [
51
- "ImputeUBP",
27
+ "ImputeAutoencoder", # Unsupervised imputer classes
52
28
  "ImputeVAE",
53
- "ImputeXGBoost",
54
- "ImputeStandardAutoEncoder",
55
- "ImputeRandomForest",
56
29
  "ImputeNLPCA",
57
- "ImputeKNN",
58
- "SimGenotypeDataTransformer",
59
- "ImputePhylo",
60
- "ImputeMF",
61
- "ImputeAlleleFreq",
62
- "ImputeRefAllele",
30
+ "ImputeUBP",
31
+ "ImputeRandomForest", # Supervised imputer classes
32
+ "ImputeHistGradientBoosting",
33
+ "ImputeRefAllele", # Deterministic imputer classes
34
+ "ImputeMostFrequent",
35
+ "AutoencoderConfig", # Unsupervised imputer configs
36
+ "VAEConfig",
37
+ "NLPCAConfig",
38
+ "UBPConfig",
39
+ "MostFrequentConfig", # Deterministic imputer configs
40
+ "RefAlleleConfig",
41
+ "RFConfig", # Supervised imputer configs
42
+ "HGBConfig",
43
+ "__version__",
63
44
  ]
pgsui/_version.py ADDED
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '1.6.16a3'
32
+ __version_tuple__ = version_tuple = (1, 6, 16, 'a3')
33
+
34
+ __commit_id__ = commit_id = None