pg-sui 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/METADATA +101 -79
- pg_sui-1.6.14.dev9.dist-info/RECORD +81 -0
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/WHEEL +1 -1
- pg_sui-1.6.14.dev9.dist-info/entry_points.txt +4 -0
- {pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info/licenses}/LICENSE +0 -0
- pg_sui-1.6.14.dev9.dist-info/top_level.txt +1 -0
- pgsui/__init__.py +35 -54
- pgsui/_version.py +34 -0
- pgsui/cli.py +909 -0
- pgsui/data_processing/__init__.py +0 -0
- pgsui/data_processing/config.py +565 -0
- pgsui/data_processing/containers.py +1424 -0
- pgsui/data_processing/transformers.py +557 -907
- pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
- pgsui/electron/app/__main__.py +5 -0
- pgsui/electron/app/extra-resources/.gitkeep +1 -0
- pgsui/electron/app/icons/icons/1024x1024.png +0 -0
- pgsui/electron/app/icons/icons/128x128.png +0 -0
- pgsui/electron/app/icons/icons/16x16.png +0 -0
- pgsui/electron/app/icons/icons/24x24.png +0 -0
- pgsui/electron/app/icons/icons/256x256.png +0 -0
- pgsui/electron/app/icons/icons/32x32.png +0 -0
- pgsui/electron/app/icons/icons/48x48.png +0 -0
- pgsui/electron/app/icons/icons/512x512.png +0 -0
- pgsui/electron/app/icons/icons/64x64.png +0 -0
- pgsui/electron/app/icons/icons/icon.icns +0 -0
- pgsui/electron/app/icons/icons/icon.ico +0 -0
- pgsui/electron/app/main.js +227 -0
- pgsui/electron/app/package-lock.json +6894 -0
- pgsui/electron/app/package.json +51 -0
- pgsui/electron/app/preload.js +15 -0
- pgsui/electron/app/server.py +157 -0
- pgsui/electron/app/ui/logo.png +0 -0
- pgsui/electron/app/ui/renderer.js +131 -0
- pgsui/electron/app/ui/styles.css +59 -0
- pgsui/electron/app/ui/ui_shim.js +72 -0
- pgsui/electron/bootstrap.py +43 -0
- pgsui/electron/launch.py +57 -0
- pgsui/electron/package.json +14 -0
- pgsui/example_data/__init__.py +0 -0
- pgsui/example_data/phylip_files/__init__.py +0 -0
- pgsui/example_data/phylip_files/test.phy +0 -0
- pgsui/example_data/popmaps/__init__.py +0 -0
- pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
- pgsui/example_data/structure_files/__init__.py +0 -0
- pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
- pgsui/impute/__init__.py +0 -0
- pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
- pgsui/impute/deterministic/imputers/mode.py +844 -0
- pgsui/impute/deterministic/imputers/nmf.py +221 -0
- pgsui/impute/deterministic/imputers/phylo.py +973 -0
- pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
- pgsui/impute/supervised/__init__.py +0 -0
- pgsui/impute/supervised/base.py +343 -0
- pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
- pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
- pgsui/impute/supervised/imputers/random_forest.py +291 -0
- pgsui/impute/unsupervised/__init__.py +0 -0
- pgsui/impute/unsupervised/base.py +1118 -0
- pgsui/impute/unsupervised/callbacks.py +92 -262
- {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
- pgsui/impute/unsupervised/imputers/autoencoder.py +1285 -0
- pgsui/impute/unsupervised/imputers/nlpca.py +1554 -0
- pgsui/impute/unsupervised/imputers/ubp.py +1575 -0
- pgsui/impute/unsupervised/imputers/vae.py +1228 -0
- pgsui/impute/unsupervised/loss_functions.py +261 -0
- pgsui/impute/unsupervised/models/__init__.py +0 -0
- pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
- pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
- pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
- pgsui/impute/unsupervised/models/vae_model.py +269 -630
- pgsui/impute/unsupervised/nn_scorers.py +255 -0
- pgsui/utils/__init__.py +0 -0
- pgsui/utils/classification_viz.py +608 -0
- pgsui/utils/logging_utils.py +22 -0
- pgsui/utils/misc.py +35 -480
- pgsui/utils/plotting.py +996 -829
- pgsui/utils/pretty_metrics.py +290 -0
- pgsui/utils/scorers.py +213 -666
- pg_sui-0.2.0.dist-info/RECORD +0 -75
- pg_sui-0.2.0.dist-info/top_level.txt +0 -3
- pgsui/example_data/phylip_files/test_n10.phy +0 -118
- pgsui/example_data/phylip_files/test_n100.phy +0 -118
- pgsui/example_data/phylip_files/test_n2.phy +0 -118
- pgsui/example_data/phylip_files/test_n500.phy +0 -118
- pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
- pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
- pgsui/example_data/trees/test.iqtree +0 -376
- pgsui/example_data/trees/test.qmat +0 -5
- pgsui/example_data/trees/test.rate +0 -2033
- pgsui/example_data/trees/test.tre +0 -1
- pgsui/example_data/trees/test_n10.rate +0 -19
- pgsui/example_data/trees/test_n100.rate +0 -109
- pgsui/example_data/trees/test_n500.rate +0 -509
- pgsui/example_data/trees/test_siterates.txt +0 -2024
- pgsui/example_data/trees/test_siterates_n10.txt +0 -10
- pgsui/example_data/trees/test_siterates_n100.txt +0 -100
- pgsui/example_data/trees/test_siterates_n500.txt +0 -500
- pgsui/example_data/vcf_files/test.vcf +0 -244
- pgsui/example_data/vcf_files/test.vcf.gz +0 -0
- pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
- pgsui/impute/estimators.py +0 -1268
- pgsui/impute/impute.py +0 -1463
- pgsui/impute/simple_imputers.py +0 -1431
- pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
- pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
- pgsui/impute/unsupervised/keras_classifiers.py +0 -697
- pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
- pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
- pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
- pgsui/pg_sui.py +0 -261
- pgsui/utils/sequence_tools.py +0 -407
- simulation/sim_benchmarks.py +0 -333
- simulation/sim_treeparams.py +0 -475
- test/__init__.py +0 -0
- test/pg_sui_simtest.py +0 -215
- test/pg_sui_testing.py +0 -523
- test/test.py +0 -151
- test/test_pgsui.py +0 -374
- test/test_tkc.py +0 -185
|
@@ -1,58 +1,73 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: pg-sui
|
|
3
|
-
Version:
|
|
4
|
-
Summary: Python machine and deep learning
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Author-email: evobio721@gmail.com
|
|
8
|
-
Maintainer: Bradley T. Martin
|
|
9
|
-
Maintainer-email: evobio721@gmail.com
|
|
3
|
+
Version: 1.6.14.dev9
|
|
4
|
+
Summary: Python machine and deep learning API to impute missing genotypes
|
|
5
|
+
Author-email: "Drs. Bradley T. Martin and Tyler K. Chafin" <evobio721@gmail.com>
|
|
6
|
+
Maintainer-email: "Dr. Bradley T. Martin" <evobio721@gmail.com>
|
|
10
7
|
License: GNU General Public License v3 (GPLv3)
|
|
11
8
|
Project-URL: Homepage, https://github.com/btmartin721/PG-SUI
|
|
12
9
|
Project-URL: Documentation, https://pg-sui.readthedocs.io/en/latest/
|
|
13
10
|
Project-URL: Source, https://github.com/btmartin721/PG-SUI.git
|
|
14
|
-
Project-URL:
|
|
15
|
-
Keywords:
|
|
16
|
-
Platform: UNKNOWN
|
|
11
|
+
Project-URL: BugTracker, https://github.com/btmartin721/PG-SUI/issues
|
|
12
|
+
Keywords: impute,imputation,AI,deep learning,machine learning,neural network,vae,autoencoder,ubp,nlpca,population genetics,unsupervised,supervised,bioinformatics,snp,genomics,genotype,missing data,data analysis,data science,statistics,data visualization,python
|
|
17
13
|
Classifier: Programming Language :: Python :: 3
|
|
18
|
-
Classifier: Programming Language :: Python :: 3
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
22
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
16
|
Classifier: Development Status :: 4 - Beta
|
|
17
|
+
Classifier: Environment :: Console
|
|
24
18
|
Classifier: Intended Audience :: Science/Research
|
|
19
|
+
Classifier: Intended Audience :: Developers
|
|
20
|
+
Classifier: Intended Audience :: Education
|
|
25
21
|
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
26
22
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
27
|
-
Classifier:
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
26
|
+
Classifier: Operating System :: MacOS
|
|
27
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
28
|
+
Classifier: Operating System :: Unix
|
|
29
|
+
Classifier: Operating System :: POSIX
|
|
28
30
|
Classifier: Natural Language :: English
|
|
29
|
-
Requires-Python: >=3.
|
|
31
|
+
Requires-Python: >=3.11
|
|
30
32
|
Description-Content-Type: text/markdown
|
|
31
|
-
|
|
32
|
-
Requires-Dist: jupyterlab
|
|
33
|
-
Requires-Dist: keras
|
|
33
|
+
License-File: LICENSE
|
|
34
34
|
Requires-Dist: matplotlib
|
|
35
|
-
Requires-Dist: numpy
|
|
36
|
-
Requires-Dist: pandas
|
|
37
|
-
Requires-Dist:
|
|
38
|
-
Requires-Dist: scikeras
|
|
39
|
-
Requires-Dist: scikit-learn (>=1.0)
|
|
35
|
+
Requires-Dist: numpy>=2.1
|
|
36
|
+
Requires-Dist: pandas>=2.2.2
|
|
37
|
+
Requires-Dist: scikit-learn>=1.4
|
|
40
38
|
Requires-Dist: scipy
|
|
41
39
|
Requires-Dist: seaborn
|
|
42
|
-
Requires-Dist:
|
|
43
|
-
Requires-Dist: snpio
|
|
44
|
-
Requires-Dist: tensorflow
|
|
45
|
-
Requires-Dist: toytree
|
|
40
|
+
Requires-Dist: torch
|
|
46
41
|
Requires-Dist: tqdm
|
|
47
|
-
Requires-Dist:
|
|
48
|
-
Requires-Dist:
|
|
49
|
-
Requires-Dist:
|
|
50
|
-
|
|
51
|
-
Requires-Dist:
|
|
52
|
-
Requires-Dist: sphinx (<7) ; extra == 'docs'
|
|
53
|
-
Requires-Dist: sphinx-autodoc-typehints ; extra == 'docs'
|
|
42
|
+
Requires-Dist: toytree
|
|
43
|
+
Requires-Dist: optuna
|
|
44
|
+
Requires-Dist: rich
|
|
45
|
+
Requires-Dist: rich[jupyter]
|
|
46
|
+
Requires-Dist: snpio
|
|
54
47
|
Provides-Extra: intel
|
|
55
|
-
Requires-Dist: scikit-learn-intelex
|
|
48
|
+
Requires-Dist: scikit-learn-intelex; extra == "intel"
|
|
49
|
+
Provides-Extra: docs
|
|
50
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
51
|
+
Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
52
|
+
Requires-Dist: sphinx_autodoc_typehints; extra == "docs"
|
|
53
|
+
Requires-Dist: sphinxcontrib-napoleon; extra == "docs"
|
|
54
|
+
Requires-Dist: sphinxcontrib-programoutput; extra == "docs"
|
|
55
|
+
Provides-Extra: dev
|
|
56
|
+
Requires-Dist: twine; extra == "dev"
|
|
57
|
+
Requires-Dist: wheel; extra == "dev"
|
|
58
|
+
Requires-Dist: pytest; extra == "dev"
|
|
59
|
+
Requires-Dist: sphinx; extra == "dev"
|
|
60
|
+
Requires-Dist: sphinx-rtd-theme; extra == "dev"
|
|
61
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == "dev"
|
|
62
|
+
Requires-Dist: sphinxcontrib-napoleon; extra == "dev"
|
|
63
|
+
Requires-Dist: sphinxcontrib-programoutput; extra == "dev"
|
|
64
|
+
Requires-Dist: requests; extra == "dev"
|
|
65
|
+
Provides-Extra: optional
|
|
66
|
+
Requires-Dist: PyObjC; extra == "optional"
|
|
67
|
+
Provides-Extra: gui
|
|
68
|
+
Requires-Dist: fastapi>=0.110; extra == "gui"
|
|
69
|
+
Requires-Dist: uvicorn[standard]>=0.23; extra == "gui"
|
|
70
|
+
Dynamic: license-file
|
|
56
71
|
|
|
57
72
|
|
|
58
73
|
<img src="https://github.com/btmartin721/PG-SUI/blob/master/img/pgsui-logo-faded.png" alt="PG-SUI Logo" width="50%" height="50%">
|
|
@@ -64,13 +79,13 @@ Population Genomic Supervised and Unsupervised Imputation.
|
|
|
64
79
|
|
|
65
80
|
## About PG-SUI
|
|
66
81
|
|
|
67
|
-
PG-SUI is a Python 3 API that uses machine learning to impute missing values from population genomic SNP data. There are several supervised and unsupervised machine learning algorithms available to impute missing data, as well as some non-machine learning imputers that are useful.
|
|
82
|
+
PG-SUI is a Python 3 API that uses machine learning to impute missing values from population genomic SNP data. There are several supervised and unsupervised machine learning algorithms available to impute missing data, as well as some non-machine learning imputers that are useful.
|
|
68
83
|
|
|
69
84
|
Below is some general information and a basic tutorial. For more detailed information, see our [API Documentation](https://pg-sui.readthedocs.io/en/latest/).
|
|
70
85
|
|
|
71
86
|
### Supervised Imputation Methods
|
|
72
87
|
|
|
73
|
-
Supervised methods utilze the scikit-learn's IterativeImputer, which is based on the MICE (Multivariate Imputation by Chained Equations) algorithm ([1](#1)), and iterates over each SNP site (i.e., feature) while uses the N nearest neighbor features to inform the imputation. The number of nearest features can be adjusted by users. IterativeImputer currently works with any of the following scikit-learn classifiers:
|
|
88
|
+
Supervised methods utilze the scikit-learn's IterativeImputer, which is based on the MICE (Multivariate Imputation by Chained Equations) algorithm ([1](#1)), and iterates over each SNP site (i.e., feature) while uses the N nearest neighbor features to inform the imputation. The number of nearest features can be adjusted by users. IterativeImputer currently works with any of the following scikit-learn classifiers:
|
|
74
89
|
|
|
75
90
|
+ K-Nearest Neighbors
|
|
76
91
|
+ Random Forest
|
|
@@ -111,64 +126,46 @@ These four "simple" imputation methods can be used as standalone imputers, as th
|
|
|
111
126
|
The easiest way to install PG-SUI is to use pip:
|
|
112
127
|
|
|
113
128
|
```
|
|
114
|
-
pip install
|
|
129
|
+
pip install pg-sui
|
|
115
130
|
```
|
|
116
131
|
|
|
117
132
|
If you have an Intel CPU and want to use the sklearn-genetic-intelex package to speed up scikit-learn computations, you can do:
|
|
118
133
|
|
|
119
134
|
```
|
|
120
|
-
pip install
|
|
135
|
+
pip install pg-sui[intel]
|
|
121
136
|
```
|
|
122
137
|
|
|
138
|
+
### Optional GUI (Electron)
|
|
139
|
+
|
|
140
|
+
PG-SUI ships an Electron GUI wrapper around the Python CLI.
|
|
141
|
+
|
|
142
|
+
1. Install the Python-side extras (FastAPI/uvicorn helper) if you want to serve from Python:
|
|
143
|
+
`pip install pg-sui[gui]`
|
|
144
|
+
2. Install Node.js (https://nodejs.org) and fetch the app dependencies once:
|
|
145
|
+
`pgsui-gui-setup`
|
|
146
|
+
3. Launch the GUI:
|
|
147
|
+
`pgsui-gui`
|
|
148
|
+
|
|
149
|
+
The GUI shells out to the same CLI underneath, so presets/overrides and YAML configs behave identically.
|
|
150
|
+
|
|
123
151
|
## Manual Installation
|
|
124
152
|
|
|
125
153
|
### Dependencies
|
|
126
154
|
|
|
127
|
-
+ python >= 3.
|
|
155
|
+
+ python >= 3.11
|
|
128
156
|
+ pandas
|
|
129
|
-
+ numpy
|
|
157
|
+
+ numpy
|
|
130
158
|
+ scipy
|
|
131
159
|
+ matplotlib
|
|
132
160
|
+ seaborn
|
|
133
161
|
+ plotly
|
|
134
162
|
+ kaleido
|
|
135
|
-
+ jupyterlab
|
|
136
163
|
+ tqdm
|
|
137
164
|
+ toytree
|
|
138
|
-
+ pyvolve
|
|
139
165
|
+ scikit-learn
|
|
140
|
-
+ tensorflow >= 2.7
|
|
141
|
-
+ keras >= 2.7
|
|
142
166
|
+ xgboost
|
|
143
|
-
+ scikeras >= 0.6.0
|
|
144
167
|
+ snpio
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
### Manual Install
|
|
148
|
-
|
|
149
|
-
If you want to install everything manually, the requirements can be installed with conda and pip. sklearn-genetic-opt and scikeras are only avaiable via pip, and scikeras requires tensorflow >= 2.7 and scikit-learn >= 1.0.
|
|
150
|
-
|
|
151
|
-
```
|
|
152
|
-
conda create -n pg-sui python
|
|
153
|
-
conda activate pg-sui
|
|
154
|
-
|
|
155
|
-
conda install matplotlib seaborn jupyterlab scikit-learn tqdm pandas numpy scipy xgboost lightgbm kaleido
|
|
156
|
-
|
|
157
|
-
# Only works if using Intel CPUs; speeds up processing
|
|
158
|
-
conda install scikit-learn-intelex
|
|
159
|
-
|
|
160
|
-
conda install -c conda-forge toytree kaleido
|
|
161
|
-
|
|
162
|
-
conda install -c bioconda pyvolve
|
|
163
|
-
|
|
164
|
-
conda install -c plotly plotly
|
|
165
|
-
|
|
166
|
-
pip install sklearn-genetic-opt[all]
|
|
167
|
-
|
|
168
|
-
pip install scikeras snpio
|
|
169
|
-
|
|
170
|
-
pip install tensorflow-cpu
|
|
171
|
-
```
|
|
168
|
+
+ optuna
|
|
172
169
|
|
|
173
170
|
#### Installation troubleshooting
|
|
174
171
|
|
|
@@ -229,7 +226,7 @@ You can read your input files as a GenotypeData object from the [SNPio](https://
|
|
|
229
226
|
```
|
|
230
227
|
|
|
231
228
|
# Import snpio. Automatically installed with pgsui when using pip.
|
|
232
|
-
from snpio import GenotypeData
|
|
229
|
+
from snpio import GenotypeData
|
|
233
230
|
|
|
234
231
|
# Read in PHYLIP, VCF, or STRUCTURE-formatted alignments.
|
|
235
232
|
data = GenotypeData(
|
|
@@ -295,13 +292,40 @@ mf = ImputeMF(*args) # Matrix factorization
|
|
|
295
292
|
|
|
296
293
|
### Unsupervised Neural Networks
|
|
297
294
|
|
|
298
|
-
```
|
|
295
|
+
``` python
|
|
299
296
|
vae = ImputeVAE(data) # Variational autoencoder
|
|
300
297
|
nlpca = ImputeNLPCA(data) # Nonlinear PCA
|
|
301
298
|
ubp = ImputeUBP(data) # Unsupervised backpropagation
|
|
302
299
|
sae = ImputeStandardAutoEncoder(data) # standard autoencoder
|
|
303
300
|
```
|
|
304
301
|
|
|
302
|
+
## Command-Line Interface
|
|
303
|
+
|
|
304
|
+
Run the PG-SUI CLI with ``pg-sui`` (installed alongside the library). The CLI follows the same precedence model as the Python API:
|
|
305
|
+
|
|
306
|
+
``code defaults < preset (--preset) < YAML (--config) < explicit CLI flags < --set key=value``.
|
|
307
|
+
|
|
308
|
+
Recent releases add explicit switches for the simulated-missingness workflow shared by the neural and supervised models:
|
|
309
|
+
|
|
310
|
+
- ``--sim-strategy`` selects one of ``random``, ``random_weighted``, ``random_weighted_inv``, ``nonrandom``, ``nonrandom_weighted``.
|
|
311
|
+
- ``--sim-prop`` sets the proportion of observed calls to temporarily mask when building the evaluation set.
|
|
312
|
+
- ``--simulate-missing`` disables simulated masking entirely (store-false flag); omit it to inherit preset/YAML defaults or re-enable via ``--set sim.simulate_missing=True``.
|
|
313
|
+
|
|
314
|
+
Example:
|
|
315
|
+
|
|
316
|
+
```
|
|
317
|
+
pg-sui \
|
|
318
|
+
--vcf data.vcf.gz \
|
|
319
|
+
--popmap pops.popmap \
|
|
320
|
+
--models ImputeUBP ImputeVAE \
|
|
321
|
+
--preset balanced \
|
|
322
|
+
--sim-strategy random_weighted_inv \
|
|
323
|
+
--sim-prop 0.25 \
|
|
324
|
+
--set io.prefix=vae_vs_ubp
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
CLI overrides cascade into every selected model, so a single invocation can evaluate multiple imputers with a consistent simulation strategy and output prefix.
|
|
328
|
+
|
|
305
329
|
## To-Dos
|
|
306
330
|
|
|
307
331
|
- simulations
|
|
@@ -318,5 +342,3 @@ sae = ImputeStandardAutoEncoder(data) # standard autoencoder
|
|
|
318
342
|
<a name="4">4. </a>Scholz, M., Kaplan, F., Guy, C. L., Kopka, J., & Selbig, J. (2005). Non-linear PCA: a missing data approach. Bioinformatics, 21(20), 3887-3895.
|
|
319
343
|
|
|
320
344
|
<a name="5">5. </a>Gashler, M. S., Smith, M. R., Morris, R., & Martinez, T. (2016). Missing value imputation with unsupervised backpropagation. Computational Intelligence, 32(2), 196-215.
|
|
321
|
-
|
|
322
|
-
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
pg_sui-1.6.14.dev9.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
2
|
+
pgsui/__init__.py,sha256=wQFzVX6vh8aUva1LCvP42jS7rcKCpkaU52YfZIy61q8,1493
|
|
3
|
+
pgsui/_version.py,sha256=wKIKHCX5SmsO-4sA-OpkKAHVYuXeZ-aL_K4jO6bGQPU,719
|
|
4
|
+
pgsui/cli.py,sha256=tYSXK_BvBv_8rpDEFGcqrV1OCwN48vMervVV-C-6H_A,29675
|
|
5
|
+
pgsui/data_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
pgsui/data_processing/config.py,sha256=g5G7pjWG4uU2BRvBu_DpO0J_4X1Foa11X69imBWjaKA,20483
|
|
7
|
+
pgsui/data_processing/containers.py,sha256=5bGA_u3czuTr8TMq6ZmQ-Wc1l-4Qh8-O2siBi3h-owc,51558
|
|
8
|
+
pgsui/data_processing/transformers.py,sha256=kdwOTmfDjgQ3RmiwQIK7LYL4vQUpgA4bob7NHYgnYRM,30998
|
|
9
|
+
pgsui/electron/bootstrap.py,sha256=wnrXgX-hiqrMMFE9WGoD-UC8zeK2ZP6Kupu68PodVWI,1185
|
|
10
|
+
pgsui/electron/launch.py,sha256=M60o_jub77kJL-B9d_sMB7LYuTzWlOnQXR09efmCX2o,1715
|
|
11
|
+
pgsui/electron/package.json,sha256=12hbBq7xincW5V4645TTC58jfkA2rPgFP_eLb_WbhKo,372
|
|
12
|
+
pgsui/electron/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
pgsui/electron/app/__main__.py,sha256=rF7tIpuy4OvMnzLms-fAUNfqYINEyR0Iofh1KKu5RI0,103
|
|
14
|
+
pgsui/electron/app/main.js,sha256=yobpq13uOdoLzwonCxjAFECwqop0bk8_-7pYBGci04U,8252
|
|
15
|
+
pgsui/electron/app/package-lock.json,sha256=0sp5XM4tclppG4vqFDtpKDIV9dBC0JVtjBtYKpSXIP8,246701
|
|
16
|
+
pgsui/electron/app/package.json,sha256=Mk5FALnz7hlV_5r5f84HgYp2oLjVD9SHnXmMVrTfzmo,1276
|
|
17
|
+
pgsui/electron/app/preload.js,sha256=L3lPsTGfgiGXLmQFwktW11En1YSb01hWlJD6kzzmbhE,803
|
|
18
|
+
pgsui/electron/app/server.py,sha256=q8gWwgkbaDjNZTT_dR1HXm45xv4V7wjrPk-Uc6axros,4568
|
|
19
|
+
pgsui/electron/app/extra-resources/.gitkeep,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
20
|
+
pgsui/electron/app/icons/icons/1024x1024.png,sha256=psZk62iG308QN9XP7sDCifGAHwG7__W-e2EUrF9hss0,42270
|
|
21
|
+
pgsui/electron/app/icons/icons/128x128.png,sha256=R9Za6fj289L432EdwAX7pUj4p4qy7iTMzghrrQzocCg,4705
|
|
22
|
+
pgsui/electron/app/icons/icons/16x16.png,sha256=TaVFFA8V4LQX_KwZCjko_eRPibmYEy1JSkuPicUZ0oQ,704
|
|
23
|
+
pgsui/electron/app/icons/icons/24x24.png,sha256=vMmSBmnTPhaVBfV_oCK_r3wykBq9JneyQu4TlHUVjBU,1094
|
|
24
|
+
pgsui/electron/app/icons/icons/256x256.png,sha256=D97Y8KYOg_RcsR8pNDcxyxtRz6X3qKkKw-oChy29tuY,9044
|
|
25
|
+
pgsui/electron/app/icons/icons/32x32.png,sha256=ah-Z-WybnXG801UV2xwAdvUNmQmy1C29EWLNlTgN8kI,1355
|
|
26
|
+
pgsui/electron/app/icons/icons/48x48.png,sha256=WYVea1_leKVpowEQCrbmV1zG5aMBRh-aMKpiziTKWYc,1995
|
|
27
|
+
pgsui/electron/app/icons/icons/512x512.png,sha256=A1YP6sKNFZEdVl7vVlIAUjunSzFhtXS0_e7bxRiadOQ,17784
|
|
28
|
+
pgsui/electron/app/icons/icons/64x64.png,sha256=tH4qDy3My1DPZz4QzSyz4P6_eel_3t246xKhtm1xKwg,2610
|
|
29
|
+
pgsui/electron/app/icons/icons/icon.icns,sha256=qaJExjDDJQk-uSbTIyPFn6a1hb8JRxs5HQmYdzZdb0c,108937
|
|
30
|
+
pgsui/electron/app/icons/icons/icon.ico,sha256=ljjD98wBOVfUSV3gPlMzXZ4FAprRdrAQENCZ76KTFmo,361102
|
|
31
|
+
pgsui/electron/app/ui/logo.png,sha256=uHhtVMYQsSCkcpeS8JPo-DVsAqwg3G4t-0lYuT4AP6E,38243
|
|
32
|
+
pgsui/electron/app/ui/renderer.js,sha256=SzG7WhiQ3lDp2DKgkOnRO8lohAyhXlnQi8V-r7lILeI,5351
|
|
33
|
+
pgsui/electron/app/ui/styles.css,sha256=NQPGczGXxw8XjrLbqKZtEQD-SICvciFuUfgtKm_FuHk,2675
|
|
34
|
+
pgsui/electron/app/ui/ui_shim.js,sha256=4qu_9qpMc_p-Boih0o8I2OAVL9TMS4DUUk7BRQIfCeM,2513
|
|
35
|
+
pgsui/example_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
+
pgsui/example_data/phylip_files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
+
pgsui/example_data/phylip_files/test.phy,sha256=LEq2Q3hjmPVDP2uF1Ai8OH2If5Y58ny-GRfuXa3l61c,238537
|
|
38
|
+
pgsui/example_data/popmaps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
+
pgsui/example_data/popmaps/phylogen_nomx.popmap,sha256=JR84kJ5jVjJ45k4kpvGVt2JNJHApQg1xNs2pKPt-V4k,3310
|
|
40
|
+
pgsui/example_data/structure_files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
+
pgsui/example_data/structure_files/test.pops.2row.allsites.str,sha256=fbmLcOUQSKyy71A77NPtJJMLSOkquPky0WiPlV4qHhA,967878
|
|
42
|
+
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz,sha256=B9sxdIGPSbw4m4MTX_B1VHDx5cDicMNn83AFJy7SEAU,8053540
|
|
43
|
+
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi,sha256=0gHDvboDAEZLQbXdYfUOiJL3oDOr1mOlaQxdlcz_erg,106087
|
|
44
|
+
pgsui/impute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
|
+
pgsui/impute/deterministic/imputers/allele_freq.py,sha256=tqaMggiNs6hxs4CN3No2d5llmmIPt0jQhHT0mHry2IM,29573
|
|
46
|
+
pgsui/impute/deterministic/imputers/mode.py,sha256=lQmrOYEyfQidqGaG86QB5HeM66IeOFLK1UoCvh-iDno,35049
|
|
47
|
+
pgsui/impute/deterministic/imputers/nmf.py,sha256=171_TTDZAe1NFjbmKQTOlPDe_rA1aO8q2Th5z0w2RU8,8086
|
|
48
|
+
pgsui/impute/deterministic/imputers/phylo.py,sha256=uN86_L2TeiUFOZLdU8pwymRiQf0SI7Sl6SAnCbAywVQ,38873
|
|
49
|
+
pgsui/impute/deterministic/imputers/ref_allele.py,sha256=OGdv4n1avIArItZ-V0kqX2g4OOZc2felh7HM1ikY2iU,28075
|
|
50
|
+
pgsui/impute/supervised/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
+
pgsui/impute/supervised/base.py,sha256=A92x1pS8DO0OwbhQem3KBAMbWs368KJcVl88zJ7wE10,13756
|
|
52
|
+
pgsui/impute/supervised/imputers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
+
pgsui/impute/supervised/imputers/hist_gradient_boosting.py,sha256=5LZsee2R9tbshmgVvLDIjGDihiQPvp9XfbaGwzL35E8,11509
|
|
54
|
+
pgsui/impute/supervised/imputers/random_forest.py,sha256=jpeaLjhzYrwRPc9nPasLgwOqBoHlBlymHMc3x5OkBWA,10393
|
|
55
|
+
pgsui/impute/unsupervised/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
+
pgsui/impute/unsupervised/base.py,sha256=ev9sWIMTB7WwKgkPW5MkSGrGr_BZaxvhkKiUUmvI5jo,46794
|
|
57
|
+
pgsui/impute/unsupervised/callbacks.py,sha256=jkxncpKNRdssImv4N5c-Hq8VcA07QvxLoct7EqDW9RE,5026
|
|
58
|
+
pgsui/impute/unsupervised/loss_functions.py,sha256=f18uQnerj0KF9xcU2I1_Y2OCKKguXXaEDaYhJg0XElY,10089
|
|
59
|
+
pgsui/impute/unsupervised/nn_scorers.py,sha256=-rl5MBJm2GN6E1wPBIe1wMgdrHEHhYooUUxVbfcf1Z8,9758
|
|
60
|
+
pgsui/impute/unsupervised/imputers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
|
+
pgsui/impute/unsupervised/imputers/autoencoder.py,sha256=fREEWDcmL4DEpFTXFDQjb84sdf2yHSDoCLNWMDhHqe8,54018
|
|
62
|
+
pgsui/impute/unsupervised/imputers/nlpca.py,sha256=fboE4gLv6M-mq2PPHSejtpSfShrhUfUSsngAinHgzVw,63797
|
|
63
|
+
pgsui/impute/unsupervised/imputers/ubp.py,sha256=ThJh8J-txNaP3JZENtJeQJQim18U3vc2kxOW3vvnyqA,67021
|
|
64
|
+
pgsui/impute/unsupervised/imputers/vae.py,sha256=zoOc9yEvAyUPGAew0x3clRvBb7SflMN0LPFuVO0rTsU,50598
|
|
65
|
+
pgsui/impute/unsupervised/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
|
+
pgsui/impute/unsupervised/models/autoencoder_model.py,sha256=mHmfTkldJNpN7Dy7RTS2RnkE2L_K1rChNjpjDvzRlEQ,12333
|
|
67
|
+
pgsui/impute/unsupervised/models/nlpca_model.py,sha256=1NathvhsirBtd9UcmeJzRoVf7oi7PfDmRpt18Di63Cg,8021
|
|
68
|
+
pgsui/impute/unsupervised/models/ubp_model.py,sha256=4guGkQzCTIsDnImOjJV5kG1xc1ST9oO4aUeXrSBSpQg,8491
|
|
69
|
+
pgsui/impute/unsupervised/models/vae_model.py,sha256=hMG7K1OR95qLEEcprGSzIoTIISXHSu2yWDy6QkG93Lg,15576
|
|
70
|
+
pgsui/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
|
+
pgsui/utils/classification_viz.py,sha256=WkGQkEIkpTMLQvGdGoU53kn1iNUO9ipeg5stSY1rcFg,22012
|
|
72
|
+
pgsui/utils/logging_utils.py,sha256=o_ElRL05B_DrbALYkuW8s_azfKQiN8kJ4oXwshyIMyI,521
|
|
73
|
+
pgsui/utils/misc.py,sha256=Mw5CsspFJkDAcCRufk-lO7fKyVoYK7PRYXkLXKswUjI,3065
|
|
74
|
+
pgsui/utils/plotting.py,sha256=d5CTzGIpanu3j6rEB6fq_F1g8w_A2Ti_XiedRjIFFII,42444
|
|
75
|
+
pgsui/utils/pretty_metrics.py,sha256=dtN7Ohcx3qJYCw4JeJCXvthGDdSV7bgE8v6EGwHSAE0,9862
|
|
76
|
+
pgsui/utils/scorers.py,sha256=sL2upL2ZZMFBTMM4DiGiWeXrqc_fp1RRbleYCnuRUhw,12564
|
|
77
|
+
pg_sui-1.6.14.dev9.dist-info/METADATA,sha256=F_9XvBZpZqKvrMnmD9TnwRuOkjXPEtnsUyGnRQu9orE,14443
|
|
78
|
+
pg_sui-1.6.14.dev9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
79
|
+
pg_sui-1.6.14.dev9.dist-info/entry_points.txt,sha256=xidyl6yqQv7oj3XSzZC6Vv9l7aNgbHi_pjv-dJjGJds,129
|
|
80
|
+
pg_sui-1.6.14.dev9.dist-info/top_level.txt,sha256=87-oDpfY6sDY_uN-OM2lcnrgPesifhzwqFOajp9ukz0,6
|
|
81
|
+
pg_sui-1.6.14.dev9.dist-info/RECORD,,
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pgsui
|
pgsui/__init__.py
CHANGED
|
@@ -1,63 +1,44 @@
|
|
|
1
1
|
## PG-SUI package by Bradley T. Martin and Tyler K. Chafin
|
|
2
2
|
## E-mail: evobio721@gmail.com
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
# Requires scikit-learn-intellex package
|
|
15
|
-
if get_processor_name().strip().startswith("Intel"):
|
|
16
|
-
try:
|
|
17
|
-
from sklearnex import patch_sklearn
|
|
18
|
-
|
|
19
|
-
patch_sklearn()
|
|
20
|
-
intelex = True
|
|
21
|
-
except (ImportError, TypeError):
|
|
22
|
-
warnings.warn(
|
|
23
|
-
"Intel CPU detected but scikit-learn-intelex is not installed. We recommend installing it to speed up computation if your hardware supports it."
|
|
24
|
-
)
|
|
25
|
-
intelex = False
|
|
26
|
-
else:
|
|
27
|
-
intelex = False
|
|
28
|
-
|
|
29
|
-
os.environ["intelex"] = str(intelex)
|
|
30
|
-
|
|
31
|
-
from pgsui.impute.estimators import (
|
|
32
|
-
ImputeKNN,
|
|
33
|
-
ImputeNLPCA,
|
|
34
|
-
ImputeRandomForest,
|
|
35
|
-
ImputeStandardAutoEncoder,
|
|
36
|
-
ImputeUBP,
|
|
37
|
-
ImputeVAE,
|
|
38
|
-
ImputeXGBoost,
|
|
3
|
+
from ._version import version as __version__
|
|
4
|
+
|
|
5
|
+
from pgsui.data_processing.containers import (
|
|
6
|
+
AutoencoderConfig,
|
|
7
|
+
HGBConfig,
|
|
8
|
+
MostFrequentConfig,
|
|
9
|
+
NLPCAConfig,
|
|
10
|
+
RefAlleleConfig,
|
|
11
|
+
RFConfig,
|
|
12
|
+
UBPConfig,
|
|
13
|
+
VAEConfig,
|
|
39
14
|
)
|
|
40
|
-
|
|
41
|
-
from pgsui.impute.
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
ImputeAlleleFreq,
|
|
45
|
-
ImputeRefAllele,
|
|
15
|
+
from pgsui.impute.deterministic.imputers.mode import ImputeMostFrequent
|
|
16
|
+
from pgsui.impute.deterministic.imputers.ref_allele import ImputeRefAllele
|
|
17
|
+
from pgsui.impute.supervised.imputers.hist_gradient_boosting import (
|
|
18
|
+
ImputeHistGradientBoosting,
|
|
46
19
|
)
|
|
47
|
-
|
|
48
|
-
from pgsui.
|
|
20
|
+
from pgsui.impute.supervised.imputers.random_forest import ImputeRandomForest
|
|
21
|
+
from pgsui.impute.unsupervised.imputers.autoencoder import ImputeAutoencoder
|
|
22
|
+
from pgsui.impute.unsupervised.imputers.nlpca import ImputeNLPCA
|
|
23
|
+
from pgsui.impute.unsupervised.imputers.ubp import ImputeUBP
|
|
24
|
+
from pgsui.impute.unsupervised.imputers.vae import ImputeVAE
|
|
49
25
|
|
|
50
26
|
__all__ = [
|
|
51
|
-
"
|
|
27
|
+
"ImputeAutoencoder", # Unsupervised imputer classes
|
|
52
28
|
"ImputeVAE",
|
|
53
|
-
"ImputeXGBoost",
|
|
54
|
-
"ImputeStandardAutoEncoder",
|
|
55
|
-
"ImputeRandomForest",
|
|
56
29
|
"ImputeNLPCA",
|
|
57
|
-
"
|
|
58
|
-
"
|
|
59
|
-
"
|
|
60
|
-
"
|
|
61
|
-
"
|
|
62
|
-
"
|
|
30
|
+
"ImputeUBP",
|
|
31
|
+
"ImputeRandomForest", # Supervised imputer classes
|
|
32
|
+
"ImputeHistGradientBoosting",
|
|
33
|
+
"ImputeRefAllele", # Deterministic imputer classes
|
|
34
|
+
"ImputeMostFrequent",
|
|
35
|
+
"AutoencoderConfig", # Unsupervised imputer configs
|
|
36
|
+
"VAEConfig",
|
|
37
|
+
"NLPCAConfig",
|
|
38
|
+
"UBPConfig",
|
|
39
|
+
"MostFrequentConfig", # Deterministic imputer configs
|
|
40
|
+
"RefAlleleConfig",
|
|
41
|
+
"RFConfig", # Supervised imputer configs
|
|
42
|
+
"HGBConfig",
|
|
43
|
+
"__version__",
|
|
63
44
|
]
|
pgsui/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '1.6.14.dev9'
|
|
32
|
+
__version_tuple__ = version_tuple = (1, 6, 14, 'dev9')
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|