pg-sui 0.2.3__py3-none-any.whl → 1.6.16a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pg_sui-1.6.16a3.dist-info/METADATA +292 -0
- pg_sui-1.6.16a3.dist-info/RECORD +81 -0
- {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info}/WHEEL +1 -1
- pg_sui-1.6.16a3.dist-info/entry_points.txt +4 -0
- {pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info/licenses}/LICENSE +0 -0
- pg_sui-1.6.16a3.dist-info/top_level.txt +1 -0
- pgsui/__init__.py +35 -54
- pgsui/_version.py +34 -0
- pgsui/cli.py +922 -0
- pgsui/data_processing/__init__.py +0 -0
- pgsui/data_processing/config.py +565 -0
- pgsui/data_processing/containers.py +1436 -0
- pgsui/data_processing/transformers.py +557 -907
- pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
- pgsui/electron/app/__main__.py +5 -0
- pgsui/electron/app/extra-resources/.gitkeep +1 -0
- pgsui/electron/app/icons/icons/1024x1024.png +0 -0
- pgsui/electron/app/icons/icons/128x128.png +0 -0
- pgsui/electron/app/icons/icons/16x16.png +0 -0
- pgsui/electron/app/icons/icons/24x24.png +0 -0
- pgsui/electron/app/icons/icons/256x256.png +0 -0
- pgsui/electron/app/icons/icons/32x32.png +0 -0
- pgsui/electron/app/icons/icons/48x48.png +0 -0
- pgsui/electron/app/icons/icons/512x512.png +0 -0
- pgsui/electron/app/icons/icons/64x64.png +0 -0
- pgsui/electron/app/icons/icons/icon.icns +0 -0
- pgsui/electron/app/icons/icons/icon.ico +0 -0
- pgsui/electron/app/main.js +227 -0
- pgsui/electron/app/package-lock.json +6894 -0
- pgsui/electron/app/package.json +51 -0
- pgsui/electron/app/preload.js +15 -0
- pgsui/electron/app/server.py +157 -0
- pgsui/electron/app/ui/logo.png +0 -0
- pgsui/electron/app/ui/renderer.js +131 -0
- pgsui/electron/app/ui/styles.css +59 -0
- pgsui/electron/app/ui/ui_shim.js +72 -0
- pgsui/electron/bootstrap.py +43 -0
- pgsui/electron/launch.py +57 -0
- pgsui/electron/package.json +14 -0
- pgsui/example_data/__init__.py +0 -0
- pgsui/example_data/phylip_files/__init__.py +0 -0
- pgsui/example_data/phylip_files/test.phy +0 -0
- pgsui/example_data/popmaps/__init__.py +0 -0
- pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
- pgsui/example_data/structure_files/__init__.py +0 -0
- pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
- pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
- pgsui/impute/__init__.py +0 -0
- pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
- pgsui/impute/deterministic/imputers/mode.py +844 -0
- pgsui/impute/deterministic/imputers/nmf.py +221 -0
- pgsui/impute/deterministic/imputers/phylo.py +973 -0
- pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
- pgsui/impute/supervised/__init__.py +0 -0
- pgsui/impute/supervised/base.py +343 -0
- pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
- pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
- pgsui/impute/supervised/imputers/random_forest.py +291 -0
- pgsui/impute/unsupervised/__init__.py +0 -0
- pgsui/impute/unsupervised/base.py +1121 -0
- pgsui/impute/unsupervised/callbacks.py +92 -262
- {simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
- pgsui/impute/unsupervised/imputers/autoencoder.py +1361 -0
- pgsui/impute/unsupervised/imputers/nlpca.py +1666 -0
- pgsui/impute/unsupervised/imputers/ubp.py +1660 -0
- pgsui/impute/unsupervised/imputers/vae.py +1316 -0
- pgsui/impute/unsupervised/loss_functions.py +261 -0
- pgsui/impute/unsupervised/models/__init__.py +0 -0
- pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
- pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
- pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
- pgsui/impute/unsupervised/models/vae_model.py +269 -630
- pgsui/impute/unsupervised/nn_scorers.py +255 -0
- pgsui/utils/__init__.py +0 -0
- pgsui/utils/classification_viz.py +608 -0
- pgsui/utils/logging_utils.py +22 -0
- pgsui/utils/misc.py +35 -480
- pgsui/utils/plotting.py +996 -829
- pgsui/utils/pretty_metrics.py +290 -0
- pgsui/utils/scorers.py +213 -666
- pg_sui-0.2.3.dist-info/METADATA +0 -322
- pg_sui-0.2.3.dist-info/RECORD +0 -75
- pg_sui-0.2.3.dist-info/top_level.txt +0 -3
- pgsui/example_data/phylip_files/test_n10.phy +0 -118
- pgsui/example_data/phylip_files/test_n100.phy +0 -118
- pgsui/example_data/phylip_files/test_n2.phy +0 -118
- pgsui/example_data/phylip_files/test_n500.phy +0 -118
- pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
- pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
- pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
- pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
- pgsui/example_data/trees/test.iqtree +0 -376
- pgsui/example_data/trees/test.qmat +0 -5
- pgsui/example_data/trees/test.rate +0 -2033
- pgsui/example_data/trees/test.tre +0 -1
- pgsui/example_data/trees/test_n10.rate +0 -19
- pgsui/example_data/trees/test_n100.rate +0 -109
- pgsui/example_data/trees/test_n500.rate +0 -509
- pgsui/example_data/trees/test_siterates.txt +0 -2024
- pgsui/example_data/trees/test_siterates_n10.txt +0 -10
- pgsui/example_data/trees/test_siterates_n100.txt +0 -100
- pgsui/example_data/trees/test_siterates_n500.txt +0 -500
- pgsui/example_data/vcf_files/test.vcf +0 -244
- pgsui/example_data/vcf_files/test.vcf.gz +0 -0
- pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
- pgsui/impute/estimators.py +0 -1268
- pgsui/impute/impute.py +0 -1463
- pgsui/impute/simple_imputers.py +0 -1431
- pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
- pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
- pgsui/impute/unsupervised/keras_classifiers.py +0 -697
- pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
- pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
- pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
- pgsui/pg_sui.py +0 -261
- pgsui/utils/sequence_tools.py +0 -407
- simulation/sim_benchmarks.py +0 -333
- simulation/sim_treeparams.py +0 -475
- test/__init__.py +0 -0
- test/pg_sui_simtest.py +0 -215
- test/pg_sui_testing.py +0 -523
- test/test.py +0 -151
- test/test_pgsui.py +0 -374
- test/test_tkc.py +0 -185
pg_sui-0.2.3.dist-info/METADATA
DELETED
|
@@ -1,322 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: pg-sui
|
|
3
|
-
Version: 0.2.3
|
|
4
|
-
Summary: Python machine and deep learning package to impute missing SNPs
|
|
5
|
-
Home-page: https://github.com/btmartin721/PG-SUI
|
|
6
|
-
Author: Bradley T. Martin and Tyler K. Chafin
|
|
7
|
-
Author-email: evobio721@gmail.com
|
|
8
|
-
Maintainer: Bradley T. Martin
|
|
9
|
-
Maintainer-email: evobio721@gmail.com
|
|
10
|
-
License: GNU General Public License v3 (GPLv3)
|
|
11
|
-
Project-URL: Homepage, https://github.com/btmartin721/PG-SUI
|
|
12
|
-
Project-URL: Documentation, https://pg-sui.readthedocs.io/en/latest/
|
|
13
|
-
Project-URL: Source, https://github.com/btmartin721/PG-SUI.git
|
|
14
|
-
Project-URL: Bug Tracker, https://github.com/btmartin721/PG-SUI/issues
|
|
15
|
-
Keywords: python,impute,imputation,imputer,machine learning,neural network,api,IterativeImputer,vae,ubp,nlpca,autoencoder,deep learning,population genomics
|
|
16
|
-
Platform: UNKNOWN
|
|
17
|
-
Classifier: Programming Language :: Python :: 3
|
|
18
|
-
Classifier: Programming Language :: Python :: 3
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
-
Classifier: Development Status :: 4 - Beta
|
|
24
|
-
Classifier: Intended Audience :: Science/Research
|
|
25
|
-
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
26
|
-
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
27
|
-
Classifier: Operating System :: OS Independent
|
|
28
|
-
Classifier: Natural Language :: English
|
|
29
|
-
Requires-Python: >=3.8,<4
|
|
30
|
-
Description-Content-Type: text/markdown
|
|
31
|
-
Requires-Dist: importlib-resources (>=1.1.0)
|
|
32
|
-
Requires-Dist: jupyterlab
|
|
33
|
-
Requires-Dist: keras
|
|
34
|
-
Requires-Dist: matplotlib
|
|
35
|
-
Requires-Dist: numpy (==1.24.3)
|
|
36
|
-
Requires-Dist: pandas
|
|
37
|
-
Requires-Dist: pyvolve
|
|
38
|
-
Requires-Dist: scikeras
|
|
39
|
-
Requires-Dist: scikit-learn (>=1.0)
|
|
40
|
-
Requires-Dist: scipy
|
|
41
|
-
Requires-Dist: seaborn
|
|
42
|
-
Requires-Dist: sklearn-genetic-opt[all] (>=0.6.0)
|
|
43
|
-
Requires-Dist: snpio
|
|
44
|
-
Requires-Dist: tensorflow
|
|
45
|
-
Requires-Dist: toytree
|
|
46
|
-
Requires-Dist: tqdm
|
|
47
|
-
Requires-Dist: typing-extensions (<4.6.0)
|
|
48
|
-
Requires-Dist: urllib3 (<2.0.0,>=1.26.7)
|
|
49
|
-
Requires-Dist: xgboost
|
|
50
|
-
Provides-Extra: docs
|
|
51
|
-
Requires-Dist: sphinx-rtd-theme ; extra == 'docs'
|
|
52
|
-
Requires-Dist: sphinx (<7) ; extra == 'docs'
|
|
53
|
-
Requires-Dist: sphinx-autodoc-typehints ; extra == 'docs'
|
|
54
|
-
Provides-Extra: intel
|
|
55
|
-
Requires-Dist: scikit-learn-intelex ; extra == 'intel'
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
<img src="https://github.com/btmartin721/PG-SUI/blob/master/img/pgsui-logo-faded.png" alt="PG-SUI Logo" width="50%" height="50%">
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
# PG-SUI
|
|
62
|
-
|
|
63
|
-
Population Genomic Supervised and Unsupervised Imputation.
|
|
64
|
-
|
|
65
|
-
## About PG-SUI
|
|
66
|
-
|
|
67
|
-
PG-SUI is a Python 3 API that uses machine learning to impute missing values from population genomic SNP data. There are several supervised and unsupervised machine learning algorithms available to impute missing data, as well as some non-machine learning imputers that are useful.
|
|
68
|
-
|
|
69
|
-
Below is some general information and a basic tutorial. For more detailed information, see our [API Documentation](https://pg-sui.readthedocs.io/en/latest/).
|
|
70
|
-
|
|
71
|
-
### Supervised Imputation Methods
|
|
72
|
-
|
|
73
|
-
Supervised methods utilze the scikit-learn's IterativeImputer, which is based on the MICE (Multivariate Imputation by Chained Equations) algorithm ([1](#1)), and iterates over each SNP site (i.e., feature) while uses the N nearest neighbor features to inform the imputation. The number of nearest features can be adjusted by users. IterativeImputer currently works with any of the following scikit-learn classifiers:
|
|
74
|
-
|
|
75
|
-
+ K-Nearest Neighbors
|
|
76
|
-
+ Random Forest
|
|
77
|
-
+ XGBoost
|
|
78
|
-
|
|
79
|
-
See the scikit-learn documentation (https://scikit-learn.org) for more information on IterativeImputer and each of the classifiers.
|
|
80
|
-
|
|
81
|
-
### Unsupervised Imputation Methods
|
|
82
|
-
|
|
83
|
-
Unsupervised imputers include three custom neural network models:
|
|
84
|
-
|
|
85
|
-
+ Variational Autoencoder (VAE) ([2](#2))
|
|
86
|
-
+ Standard Autoencoder (SAE) ([3](#3))
|
|
87
|
-
+ Non-linear Principal Component Analysis (NLPCA) ([4](#4))
|
|
88
|
-
+ Unsupervised Backpropagation (UBP) ([5](#5))
|
|
89
|
-
|
|
90
|
-
VAE models train themselves to reconstruct their input (i.e., the genotypes). To use VAE for imputation, the missing values are masked and the VAE model gets trained to reconstruct only on known values. Once the model is trained, it is then used to predict the missing values.
|
|
91
|
-
|
|
92
|
-
SAE is a standard autoencoder that trains the input to predict itself. As with VAE, missing values are masked and the model gets trained only on known values. Predictions are then made on the missing values.
|
|
93
|
-
|
|
94
|
-
NLPCA initializes random, reduced-dimensional input, then trains itself by using the known values (i.e., genotypes) as targets and refining the random input until it accurately predicts the genotype output. The trained model can then predict the missing values.
|
|
95
|
-
|
|
96
|
-
UBP is an extension of NLPCA that runs over three phases. Phase 1 refines the randomly generated, reduced-dimensional input in a single layer perceptron neural network to obtain good initial input values. Phase 2 uses the refined reduced-dimensional input from phase 1 as input into a multi-layer perceptron (MLP), but in Phase 2 only the neural network weights are refined. Phase three uses an MLP to refine both the weights and the reduced-dimensional input. Once the model is trained, it can be used to predict the missing values.
|
|
97
|
-
|
|
98
|
-
### Non-Machine Learning Methods
|
|
99
|
-
|
|
100
|
-
We also include several non-machine learning options for imputing missing data, including:
|
|
101
|
-
|
|
102
|
-
+ Per-population mode per SNP site
|
|
103
|
-
+ Global mode per SNP site
|
|
104
|
-
+ Using a phylogeny as input to inform the imputation
|
|
105
|
-
+ Matrix Factorization
|
|
106
|
-
|
|
107
|
-
These four "simple" imputation methods can be used as standalone imputers, as the initial imputation strategy for IterativeImputer (at least one method is required to be chosen), and to validate the accuracy of both IterativeImputer and the neural network models.
|
|
108
|
-
|
|
109
|
-
## Installing PG-SUI
|
|
110
|
-
|
|
111
|
-
The easiest way to install PG-SUI is to use pip:
|
|
112
|
-
|
|
113
|
-
```
|
|
114
|
-
pip install pg-sui
|
|
115
|
-
```
|
|
116
|
-
|
|
117
|
-
If you have an Intel CPU and want to use the sklearn-genetic-intelex package to speed up scikit-learn computations, you can do:
|
|
118
|
-
|
|
119
|
-
```
|
|
120
|
-
pip install pg-sui[intel]
|
|
121
|
-
```
|
|
122
|
-
|
|
123
|
-
## Manual Installation
|
|
124
|
-
|
|
125
|
-
### Dependencies
|
|
126
|
-
|
|
127
|
-
+ python >= 3.8
|
|
128
|
-
+ pandas
|
|
129
|
-
+ numpy==1.24.3
|
|
130
|
-
+ scipy
|
|
131
|
-
+ matplotlib
|
|
132
|
-
+ seaborn
|
|
133
|
-
+ plotly
|
|
134
|
-
+ kaleido
|
|
135
|
-
+ jupyterlab
|
|
136
|
-
+ tqdm
|
|
137
|
-
+ toytree
|
|
138
|
-
+ pyvolve
|
|
139
|
-
+ scikit-learn
|
|
140
|
-
+ tensorflow >= 2.7
|
|
141
|
-
+ keras >= 2.7
|
|
142
|
-
+ xgboost
|
|
143
|
-
+ scikeras >= 0.6.0
|
|
144
|
-
+ snpio
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
### Manual Install
|
|
148
|
-
|
|
149
|
-
If you want to install everything manually, the requirements can be installed with conda and pip. sklearn-genetic-opt and scikeras are only avaiable via pip, and scikeras requires tensorflow >= 2.7 and scikit-learn >= 1.0.
|
|
150
|
-
|
|
151
|
-
```
|
|
152
|
-
conda create -n pg-sui python
|
|
153
|
-
conda activate pg-sui
|
|
154
|
-
|
|
155
|
-
conda install matplotlib seaborn jupyterlab scikit-learn tqdm pandas numpy scipy xgboost lightgbm kaleido
|
|
156
|
-
|
|
157
|
-
# Only works if using Intel CPUs; speeds up processing
|
|
158
|
-
conda install scikit-learn-intelex
|
|
159
|
-
|
|
160
|
-
conda install -c conda-forge toytree kaleido
|
|
161
|
-
|
|
162
|
-
conda install -c bioconda pyvolve
|
|
163
|
-
|
|
164
|
-
conda install -c plotly plotly
|
|
165
|
-
|
|
166
|
-
pip install sklearn-genetic-opt[all]
|
|
167
|
-
|
|
168
|
-
pip install scikeras snpio
|
|
169
|
-
|
|
170
|
-
pip install tensorflow-cpu
|
|
171
|
-
```
|
|
172
|
-
|
|
173
|
-
#### Installation troubleshooting
|
|
174
|
-
|
|
175
|
-
##### "use_2to3 is invalid" error
|
|
176
|
-
|
|
177
|
-
Users running setuptools v58 may encounter this error during the last step of installation, using pip to install sklearn-genetic-opt:
|
|
178
|
-
|
|
179
|
-
```
|
|
180
|
-
ERROR: Command errored out with exit status 1:
|
|
181
|
-
command: /Users/tyler/miniforge3/envs/pg-sui/bin/python3.8 -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/private/var/folders/6x/t6g4kn711z5cxmc2_tvq0mlw0000gn/T/pip-install-6y5g_mhs/deap_1d32f65d60a44056bd7031f3aad44571/setup.py'"'"'; __file__='"'"'/private/var/folders/6x/t6g4kn711z5cxmc2_tvq0mlw0000gn/T/pip-install-6y5g_mhs/deap_1d32f65d60a44056bd7031f3aad44571/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' egg_info --egg-base /private/var/folders/6x/t6g4kn711z5cxmc2_tvq0mlw0000gn/T/pip-pip-egg-info-7hg3hcq2
|
|
182
|
-
cwd: /private/var/folders/6x/t6g4kn711z5cxmc2_tvq0mlw0000gn/T/pip-install-6y5g_mhs/deap_1d32f65d60a44056bd7031f3aad44571/
|
|
183
|
-
Complete output (1 lines):
|
|
184
|
-
error in deap setup command: use_2to3 is invalid.
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
This occurs during the installation of DEAP, one of the dependencies for sklearn-genetic-opt. As a workaround, first downgrade setuptools, and then proceed with the installation as normal:
|
|
188
|
-
```
|
|
189
|
-
pip install setuptools==57
|
|
190
|
-
pip install sklearn-genetic-opt[all]
|
|
191
|
-
|
|
192
|
-
```
|
|
193
|
-
|
|
194
|
-
##### Mac ARM architecture
|
|
195
|
-
|
|
196
|
-
PG-SUI has been tested on the new Mac M1 chips and is working fine, but some changes to the installation process were necessary as of 9-December-21. Installation was successful using the following:
|
|
197
|
-
|
|
198
|
-
```
|
|
199
|
-
### Install Miniforge3 instead of Miniconda3
|
|
200
|
-
### Download: https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
|
|
201
|
-
bash ~/Downloads/Miniforge3-MacOSX-arm64.sh
|
|
202
|
-
|
|
203
|
-
# Close and re-open terminal #
|
|
204
|
-
|
|
205
|
-
#create and activate conda environment
|
|
206
|
-
conda create -n pg-sui python
|
|
207
|
-
|
|
208
|
-
#activate environment
|
|
209
|
-
conda activate pg-sui
|
|
210
|
-
|
|
211
|
-
#install packages
|
|
212
|
-
conda install -c conda-forge matplotlib seaborn jupyterlab scikit-learn tqdm pandas numpy scipy xgboost lightgbm tensorflow keras sklearn-genetic-opt toytree
|
|
213
|
-
conda install -c bioconda pyvolve
|
|
214
|
-
|
|
215
|
-
#downgrade setuptools (may or may not be necessary)
|
|
216
|
-
pip install setuptools==57
|
|
217
|
-
|
|
218
|
-
#install sklearn-genetic-opt and mlflow
|
|
219
|
-
pip install sklearn-genetic-opt mlflow
|
|
220
|
-
|
|
221
|
-
```
|
|
222
|
-
|
|
223
|
-
Any other problems we run into testing on the Mac ARM architecture will be adjusted here. Note that the step installing scikit-learn-intelex was skipped here. PG-SUI will automatically detect the CPU architecture you are running, and forgo importing this package (which will only work on Intel processors)
|
|
224
|
-
|
|
225
|
-
## Input Data
|
|
226
|
-
|
|
227
|
-
You can read your input files as a GenotypeData object from the [SNPio](https://snpio.readthedocs.io/en/latest/) package:
|
|
228
|
-
|
|
229
|
-
```
|
|
230
|
-
|
|
231
|
-
# Import snpio. Automatically installed with pgsui when using pip.
|
|
232
|
-
from snpio import GenotypeData
|
|
233
|
-
|
|
234
|
-
# Read in PHYLIP, VCF, or STRUCTURE-formatted alignments.
|
|
235
|
-
data = GenotypeData(
|
|
236
|
-
filename="example_data/phylip_files/phylogen_nomx.u.snps.phy",
|
|
237
|
-
popmapfile="example_data/popmaps/phylogen_nomx.popmap",
|
|
238
|
-
force_popmap=True,
|
|
239
|
-
filetype="auto",
|
|
240
|
-
qmatrix_iqtree="example_data/trees/test.qmat",
|
|
241
|
-
siterates_iqtree="example_data/trees/test.rate",
|
|
242
|
-
guidetree="example_data/trees/test.tre",
|
|
243
|
-
include_pops=["EA", "TT", "GU"], # Only include these populations. There's also an exclude_pops option that will exclude the provided populations.
|
|
244
|
-
)
|
|
245
|
-
```
|
|
246
|
-
|
|
247
|
-
## Supported Imputation Methods
|
|
248
|
-
|
|
249
|
-
There are numerous supported algorithms to impute missing data. Each one can be run by calling the corresponding class. You must provide a GenotypeData instance as the first positional argument.
|
|
250
|
-
|
|
251
|
-
You can import all the supported methods with:
|
|
252
|
-
|
|
253
|
-
```
|
|
254
|
-
from pgsui import *
|
|
255
|
-
```
|
|
256
|
-
|
|
257
|
-
Or you can import them one at a time.
|
|
258
|
-
|
|
259
|
-
```
|
|
260
|
-
from pgsui import ImputeVAE
|
|
261
|
-
```
|
|
262
|
-
|
|
263
|
-
### Supervised Imputers
|
|
264
|
-
|
|
265
|
-
Various supervised imputation options are supported:
|
|
266
|
-
|
|
267
|
-
```
|
|
268
|
-
# Supervised IterativeImputer classifiers
|
|
269
|
-
knn = ImputeKNN(data) # K-Nearest Neighbors
|
|
270
|
-
rf = ImputeRandomForest(data) # Random Forest or Extra Trees
|
|
271
|
-
xgb = ImputeXGBoost(data) # XGBoost
|
|
272
|
-
```
|
|
273
|
-
|
|
274
|
-
### Non-machine learning methods
|
|
275
|
-
|
|
276
|
-
Use phylogeny to inform imputation:
|
|
277
|
-
|
|
278
|
-
```
|
|
279
|
-
phylo = ImputePhylo(data)
|
|
280
|
-
```
|
|
281
|
-
|
|
282
|
-
Use by-population or global allele frequency to inform imputation
|
|
283
|
-
|
|
284
|
-
```
|
|
285
|
-
pop_af = ImputeAlleleFreq(data, by_populations=True)
|
|
286
|
-
global_af = ImputeAlleleFreq(data, by_populations=False)
|
|
287
|
-
ref_af = ImputeRefAllele(data)
|
|
288
|
-
```
|
|
289
|
-
|
|
290
|
-
Non-matrix factorization:
|
|
291
|
-
|
|
292
|
-
```
|
|
293
|
-
mf = ImputeMF(*args) # Matrix factorization
|
|
294
|
-
```
|
|
295
|
-
|
|
296
|
-
### Unsupervised Neural Networks
|
|
297
|
-
|
|
298
|
-
```
|
|
299
|
-
vae = ImputeVAE(data) # Variational autoencoder
|
|
300
|
-
nlpca = ImputeNLPCA(data) # Nonlinear PCA
|
|
301
|
-
ubp = ImputeUBP(data) # Unsupervised backpropagation
|
|
302
|
-
sae = ImputeStandardAutoEncoder(data) # standard autoencoder
|
|
303
|
-
```
|
|
304
|
-
|
|
305
|
-
## To-Dos
|
|
306
|
-
|
|
307
|
-
- simulations
|
|
308
|
-
- Documentation
|
|
309
|
-
|
|
310
|
-
## References:
|
|
311
|
-
|
|
312
|
-
<a name="1">1. </a>Stef van Buuren, Karin Groothuis-Oudshoorn (2011). mice: Multivariate Imputation by Chained Equations in R. Journal of Statistical Software 45: 1-67.
|
|
313
|
-
|
|
314
|
-
<a name="2">2. </a>Kingma, D.P. & Welling, M. (2013). Auto-encoding variational bayes. In: Proceedings of the International Conference on Learning Representations (ICLR). arXiv:1312.6114 [stat.ML].
|
|
315
|
-
|
|
316
|
-
<a name="3">3. </a>Hinton, G.E., & Salakhutdinov, R.R. (2006). Reducing the dimensionality of data with neural networks. Science, 313(5786), 504-507.
|
|
317
|
-
|
|
318
|
-
<a name="4">4. </a>Scholz, M., Kaplan, F., Guy, C. L., Kopka, J., & Selbig, J. (2005). Non-linear PCA: a missing data approach. Bioinformatics, 21(20), 3887-3895.
|
|
319
|
-
|
|
320
|
-
<a name="5">5. </a>Gashler, M. S., Smith, M. R., Morris, R., & Martinez, T. (2016). Missing value imputation with unsupervised backpropagation. Computational Intelligence, 32(2), 196-215.
|
|
321
|
-
|
|
322
|
-
|
pg_sui-0.2.3.dist-info/RECORD
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
pgsui/__init__.py,sha256=x9qoV6vULdMKnvpieT2OTwykUj1JlMn9F9WOtnuSq6s,1449
|
|
2
|
-
pgsui/pg_sui.py,sha256=TKc_5XAOoAjUEVYZm4UmKkIccgnY9cgooOJAPmcq3Hk,7471
|
|
3
|
-
pgsui/data_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
pgsui/data_processing/transformers.py,sha256=2ZdsFU7OtxUrlwjPk6P7pu0he9TBiAJ1meiYLpfPWJo,42452
|
|
5
|
-
pgsui/example_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
pgsui/example_data/phylip_files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
pgsui/example_data/phylip_files/test.phy,sha256=LEq2Q3hjmPVDP2uF1Ai8OH2If5Y58ny-GRfuXa3l61c,238537
|
|
8
|
-
pgsui/example_data/phylip_files/test_n10.phy,sha256=sgrpekxj0wIlbJymWjULqoPdrY5ehKW5nzHnK710NkU,2897
|
|
9
|
-
pgsui/example_data/phylip_files/test_n100.phy,sha256=LqXLINViOlDpmer2QWyu_Rd_GrCxAnsg6NgogKatnY4,13428
|
|
10
|
-
pgsui/example_data/phylip_files/test_n2.phy,sha256=cQxEWCeiuxQluvAt1xG-G06UcNgJH2ibaabJJMD019o,1960
|
|
11
|
-
pgsui/example_data/phylip_files/test_n500.phy,sha256=C1_y8CxP6Z5vYfY4nwckxdOL2pXKjLEoYmB_FIPW0xY,60228
|
|
12
|
-
pgsui/example_data/popmaps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
pgsui/example_data/popmaps/test.popmap,sha256=20sFUh3toDNO5wnNuv3FUEBUDkHosMeSclECCDtW354,1954
|
|
14
|
-
pgsui/example_data/structure_files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
pgsui/example_data/structure_files/test.nopops.1row.10sites.str,sha256=BxlTqWRXjVJFAkA3mnmicMK69AjoBY57R2gkDDo2dwQ,6375
|
|
16
|
-
pgsui/example_data/structure_files/test.nopops.2row.100sites.str,sha256=BzYura7k1O1Qv_FPz57IuuiWMwPV5DU6RPkJhKYdbhs,50808
|
|
17
|
-
pgsui/example_data/structure_files/test.nopops.2row.10sites.str,sha256=q0fkZJN9vvHbUsUnTaNLWA4eEYqcZo_4VRNDAhEqACg,7978
|
|
18
|
-
pgsui/example_data/structure_files/test.nopops.2row.30sites.str,sha256=OIkbvM9GDvp9ZVbmRS6QE1xkTLjsP6NlWALy9u4nCDo,17512
|
|
19
|
-
pgsui/example_data/structure_files/test.nopops.2row.allsites.str,sha256=A4oa4lNQicJmmLpsIwEFCOxJ08_jVusHQjEjIBot7ks,967410
|
|
20
|
-
pgsui/example_data/structure_files/test.pops.1row.10sites.str,sha256=bg3n0oMIj8eXTbZn4T89kv0Jb6mWDIHB5jQIrwVGbZA,6609
|
|
21
|
-
pgsui/example_data/structure_files/test.pops.2row.10sites.str,sha256=DDTnKtLVIJzaUL9ufab4cw1OySnkQ0-4nQD_x051OeI,8446
|
|
22
|
-
pgsui/example_data/structure_files/test.pops.2row.allsites.str,sha256=fbmLcOUQSKyy71A77NPtJJMLSOkquPky0WiPlV4qHhA,967878
|
|
23
|
-
pgsui/example_data/trees/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
-
pgsui/example_data/trees/test.iqtree,sha256=vcWGWdXys6v5zmCogS761W1AplkqDuXD9f3gzzcLxOw,25575
|
|
25
|
-
pgsui/example_data/trees/test.qmat,sha256=WDFO2YboGWYMKJeOq5etBRvB19IHsBQhNzWoi7Z3irg,134
|
|
26
|
-
pgsui/example_data/trees/test.rate,sha256=5ROH2y26oWTveeEEx4UVKeTd6SuNN5jIo2HBaOdgH-U,46013
|
|
27
|
-
pgsui/example_data/trees/test.tre,sha256=ZwH7j5hJidraGfjttBNOZ_AyDm6flTesONs4cfT8gbQ,4696
|
|
28
|
-
pgsui/example_data/trees/test_n10.rate,sha256=QntpWWt0nS3LmAIdnKMv501rJ2nwoIaBFRL9hiwdfgw,678
|
|
29
|
-
pgsui/example_data/trees/test_n100.rate,sha256=OSsJsWVwfw1e_LE16BFbFppIrXYyC19mNjk3XYQaC6s,2577
|
|
30
|
-
pgsui/example_data/trees/test_n500.rate,sha256=ZtROaOo93eQ4AWi8dR_5blBMoQ2kcK-hfQ7iEIWHaLA,11387
|
|
31
|
-
pgsui/example_data/trees/test_siterates.txt,sha256=90GBD6ay6G-e-PMPGRZv9oihViwTjtRPycbmlekxKTI,16235
|
|
32
|
-
pgsui/example_data/trees/test_siterates_n10.txt,sha256=sBH770qAI5x-8aN-o4EGP784UFRcSvIBMUC8oUrFiwI,80
|
|
33
|
-
pgsui/example_data/trees/test_siterates_n100.txt,sha256=9kmDNbjHBQIFVrdmwTmU0bTqIFj4KfgWzsVjR2PZwYI,804
|
|
34
|
-
pgsui/example_data/trees/test_siterates_n500.txt,sha256=cqLjXZtYSiK5j3vVhX6tMKbR9ngxhOw9Q6FtRCDRAko,4008
|
|
35
|
-
pgsui/example_data/vcf_files/test.vcf,sha256=xF7VaqnmHSNauW3xCIMpWw1yIXa6RRAbizg66SzVUHM,380243
|
|
36
|
-
pgsui/example_data/vcf_files/test.vcf.gz,sha256=g49C0ycJ4HAjT7oqlq9sV2q1NmZdCYkp5jT33RRveio,72705
|
|
37
|
-
pgsui/example_data/vcf_files/test.vcf.gz.tbi,sha256=zxitJWi7Vw1AdO4yiX5y-3NCRxiTUi0TfOcxR8RVzpI,2581
|
|
38
|
-
pgsui/impute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
pgsui/impute/estimators.py,sha256=-V5gnNtkUr9-uUDpfgcfLXlNqe4nnrlwPTFR5DcNbug,125036
|
|
40
|
-
pgsui/impute/impute.py,sha256=12zEX_-R5OLXEkhnfsqmZWvM3TB4HHun5XF-z8wN1rw,51210
|
|
41
|
-
pgsui/impute/simple_imputers.py,sha256=4fxM9l7310y07fUcXyneesmNUh3eYbPvFWFRZom_3HU,53283
|
|
42
|
-
pgsui/impute/supervised/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
-
pgsui/impute/supervised/iterative_imputer_fixedparams.py,sha256=PH6Mc7J8LJmUhnW8x43WudhorBSNg1Uxa7ZKBkMp5BQ,35075
|
|
44
|
-
pgsui/impute/supervised/iterative_imputer_gridsearch.py,sha256=ELW4cWX3QTU_1dI_eqMZoVG8vqFnLCqy41WQ0C1o4wU,45443
|
|
45
|
-
pgsui/impute/unsupervised/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
pgsui/impute/unsupervised/callbacks.py,sha256=qmHrIyYL_YBdntrhlZMAED29JGjmgZFsnB1ydkyVjnQ,9682
|
|
47
|
-
pgsui/impute/unsupervised/keras_classifiers.py,sha256=dSM7CrH86s6xTvVmdyEmTjch1g9aRszspgxExTRptjM,29483
|
|
48
|
-
pgsui/impute/unsupervised/neural_network_imputers.py,sha256=Hkj8rdo_G1HbYrKbJJ9oeYXAsOH1PLJYZKJt_eQuFYw,52292
|
|
49
|
-
pgsui/impute/unsupervised/neural_network_methods.py,sha256=ueFUQ9eh9xUUfep3myTuEz1ZDrdDZ5R_4bpQnwK-GsQ,50830
|
|
50
|
-
pgsui/impute/unsupervised/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
-
pgsui/impute/unsupervised/models/autoencoder_model.py,sha256=5HXqNr4d86Or1el_1eKw4PTKsik3t9K2dQ2M95a-VY0,20161
|
|
52
|
-
pgsui/impute/unsupervised/models/nlpca_model.py,sha256=6yRDO6aCkjGjdbrI2FOJvyDdksArsRwCSunb2NSXd1k,14501
|
|
53
|
-
pgsui/impute/unsupervised/models/ubp_model.py,sha256=CEVvGusqONpTdF4HpyOSA_WQLH0sNQ2lGfUKLL5NHPY,37879
|
|
54
|
-
pgsui/impute/unsupervised/models/vae_model.py,sha256=ZLfLqLIsYqV2_nHJ3WXAWk4v_mzqD0p8oiS-jm3legY,22338
|
|
55
|
-
pgsui/impute/unsupervised/models/in_development/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
-
pgsui/impute/unsupervised/models/in_development/cnn_model.py,sha256=nNMmGev5PmLcUUm44_bnDcx1iO0_FeaqllSCpuL2ykU,15036
|
|
57
|
-
pgsui/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
-
pgsui/utils/misc.py,sha256=dXPmjH0boW9f5HY0l17v_u42xl_ZGHKmLEL3efsMVWA,15705
|
|
59
|
-
pgsui/utils/plotting.py,sha256=WIoPoFGR4sqKNvjF9ZBehRLdKWt5H6LUr0QxnOTJ8V8,33748
|
|
60
|
-
pgsui/utils/scorers.py,sha256=UrX3gM5FkK4UAUqd9JNNAWPBUWvIDmImIrPm8pRO6fM,26862
|
|
61
|
-
pgsui/utils/sequence_tools.py,sha256=5WY_gEov2AVSG5j4PEPLtI8OFDaBmGi7U6SDqK_dplc,10143
|
|
62
|
-
simulation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
simulation/sim_benchmarks.py,sha256=lA5o1qIqG7lbN56RYhx1crc73e0c-z2C5FtuNDntFBM,11345
|
|
64
|
-
simulation/sim_treeparams.py,sha256=Crgt9SxuBLzrqsnuVFlvPvU5g7tIt3q0Sv_-nge7tfc,13793
|
|
65
|
-
test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
|
-
test/pg_sui_simtest.py,sha256=yeQA8kEvGUisvGGYUuv47zK8aw6wTkEScjfjQiociMc,6024
|
|
67
|
-
test/pg_sui_testing.py,sha256=zJLeUiE-swFI-NrUfpsNaxBoWIQCDNnj09RsLOigqNw,15871
|
|
68
|
-
test/test.py,sha256=FEylSUFKXGPusNYF34y5xqPCXeV-d94TLyB2LR8njNU,5034
|
|
69
|
-
test/test_pgsui.py,sha256=rcAoFQSvCAEe0ADXZ7xDUQt-PuvEGEizWFu3mqV3728,14310
|
|
70
|
-
test/test_tkc.py,sha256=P9O956q9Gt_nnJ3CLBpq4cSXLNmN94oA-mUjRngkFgg,5867
|
|
71
|
-
pg_sui-0.2.3.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
72
|
-
pg_sui-0.2.3.dist-info/METADATA,sha256=ORxQjOBL8SrDxXZ3ViOTTquvK0GqQCvkITCk8Uc3-3g,12725
|
|
73
|
-
pg_sui-0.2.3.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
|
|
74
|
-
pg_sui-0.2.3.dist-info/top_level.txt,sha256=jxcAQq4Ov-ynoE-TOLYXKd8OJv4gQjZ59ssYt3tbANA,22
|
|
75
|
-
pg_sui-0.2.3.dist-info/RECORD,,
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
117 10
|
|
2
|
-
GUFL_BXGU36_AA14 TGTTACCGAT
|
|
3
|
-
GUFL_BX626 YGTNACCGRN
|
|
4
|
-
GUFL_BXGU65_AA39 TGTWACCGAT
|
|
5
|
-
GUFL_BX684 TGTTACCGAT
|
|
6
|
-
GUFL_BX504 TGTTACCNAT
|
|
7
|
-
GUFL_BX503 NGTTACCGRT
|
|
8
|
-
GUFL_BXGU32 CGTTACCGAT
|
|
9
|
-
GUFL_BXGU63_AA37 YGTTACCART
|
|
10
|
-
GUFL_BXGU35_AA13 TGTTACCAAT
|
|
11
|
-
GUFL_BXGU61_U57 TGTTACCNAT
|
|
12
|
-
GUFL_BXGU62_AA36 TGTTACCAAT
|
|
13
|
-
GUFL_BX685 TGTTACCGAK
|
|
14
|
-
GUFL_BXGU33 TGTTACCGAT
|
|
15
|
-
DSAZ_BX1213 TGTTACCGGT
|
|
16
|
-
DSNM_BXDS02 TGTTACCGGT
|
|
17
|
-
ONWI_BX489 TGTTACCGAT
|
|
18
|
-
ONWI_BX493 TGTTACCGGT
|
|
19
|
-
ONTX_BXON46_153 TRTTNCCAGT
|
|
20
|
-
ONNE_BXON58_433x2 TGTTACCGGT
|
|
21
|
-
ONWI_BX495 TGTTACCGGT
|
|
22
|
-
ONWI_BX491 TGTTACCGRT
|
|
23
|
-
ONWI_BX497 TGTTACCGGT
|
|
24
|
-
ONCO_BX601 TGTTACCGGT
|
|
25
|
-
ONCO_BX588 TGTTACCGGT
|
|
26
|
-
ONWI_BX486 TGTTACCGGT
|
|
27
|
-
ONTX_BXON45_150 TRTTACCGGT
|
|
28
|
-
ONCO_BX580 TNNTACCGGT
|
|
29
|
-
ONWI_BX490 TGTTACCGGT
|
|
30
|
-
ONCO_BX602 TGTTACCGGT
|
|
31
|
-
ONKS_BXON61_133 TGYTACCGGT
|
|
32
|
-
FLFL_BXFL01 TGTTACCGRT
|
|
33
|
-
FLFL_BX683 TGTTACCGAT
|
|
34
|
-
TTLA_BXTT35_1486 TGTTGCCGGT
|
|
35
|
-
TTAR_BX507 TGTTACCGGT
|
|
36
|
-
TTTX_BX227 TGTTRCYGGT
|
|
37
|
-
TTTX_BX225 TGTTACCGGT
|
|
38
|
-
TTLA_BXTT34_1482 TGTTACCGGT
|
|
39
|
-
TTLA_BXTT36_1491 TGYTRCCAGN
|
|
40
|
-
TTLA_BXTT37_1492 TGTTRCCGGT
|
|
41
|
-
TTAR_BX987 TGNTACCNGT
|
|
42
|
-
TTLA_BXTT38_1493 TGTTRCCGGT
|
|
43
|
-
TTTX_BX222 NGYTRYYGGT
|
|
44
|
-
TTLA_BX421 TGTTACCRGT
|
|
45
|
-
TTKS_BXTT20_78 TGYTGCCAGT
|
|
46
|
-
TTLA_BXTT13 TGTTRCCRGT
|
|
47
|
-
TTAR_BX984 TNTTACCGGT
|
|
48
|
-
TTLA_BX422 TGTTACCGGT
|
|
49
|
-
TTLA_BXTT39_1498 TNTTACCRGT
|
|
50
|
-
TTTX_BX223 TGTTACCGGT
|
|
51
|
-
TTTX_BX228 TRNTACCRGT
|
|
52
|
-
MXMX_BX1195 TGYTACCGAT
|
|
53
|
-
MXMX_BX1194 TGTTACCGAT
|
|
54
|
-
MXMX_BX1196 TGYTACCGAT
|
|
55
|
-
GUMS_BXGU68_AA42 TGTTACCGGT
|
|
56
|
-
GUMS_BXGU53_T72 TGTTACCAGT
|
|
57
|
-
GUMS_BXGU78_AA52 TGTTACCRRT
|
|
58
|
-
GUMS_BXGU75_AA49 NGTTACCGRT
|
|
59
|
-
GUMS_BXGU76_AA50 TGTTACCRGT
|
|
60
|
-
GUMS_BXGU69_AA43 YGTTACCRRT
|
|
61
|
-
GUMS_BXGU77_AA51 TGTTACCGGT
|
|
62
|
-
GUMS_BXGU67_AA41 TGTTACCNRT
|
|
63
|
-
GUMS_BXGU74_AA48 TGTTACCANT
|
|
64
|
-
GUMS_BXGU73_AA47 TGTTACCNGT
|
|
65
|
-
GUMS_BXGU50_T69 TGTTACCGGT
|
|
66
|
-
GUMS_BXGU48_T62 YGTTACCGGT
|
|
67
|
-
GUMS_BXGU56_T83 TGNTACCGRT
|
|
68
|
-
GUMS_BXGU58_T92 TGNTACCNGT
|
|
69
|
-
GUMS_BXGU54_T73 TGTTACCGGT
|
|
70
|
-
GUMS_BXGU44_T56 TGTTACCGGT
|
|
71
|
-
GUMS_BX200 YGNTACCGGT
|
|
72
|
-
GUMS_BXGU79_AA73 TGTTACCAGT
|
|
73
|
-
GUMS_BXGU49_T66 TGTTACCGGT
|
|
74
|
-
GUMS_BXGU72_AA46 TGTTACCGGT
|
|
75
|
-
GUMS_BXGU71_AA45 TGNTACCGGT
|
|
76
|
-
GUMS_BXGU43_T55 TGYTAYCGRT
|
|
77
|
-
GUMS_BXGU47_T61 TGTTRCCRGT
|
|
78
|
-
GUMS_BXGU45_T59 TGTTACCRGT
|
|
79
|
-
EASC_BXEA43_1307 TGTTACCGAN
|
|
80
|
-
EAGA_BXEA49_564 TGNTANCGGT
|
|
81
|
-
EAGA_BX660 YGTTACCGGT
|
|
82
|
-
EAAL_BXEA27 TGTTACCGAT
|
|
83
|
-
TCAL_BXTC63 TGTTRCCGGT
|
|
84
|
-
TCAL_BX612 TGTTACCRNN
|
|
85
|
-
TCAL_BXTC93 TGTTACCNAT
|
|
86
|
-
EAGA_BXEA35_666 NNTTACCGGT
|
|
87
|
-
EASC_BX1108 TGTTACCAGT
|
|
88
|
-
EAGA_BXEA17 TGTTAYCRGT
|
|
89
|
-
EAGA_BXEA34_665 TGTTACCGAT
|
|
90
|
-
EASC_BXEA42_1306 TGTTACCGGT
|
|
91
|
-
TCAL_BX279 TGTTACCGGT
|
|
92
|
-
EAVA_BX320 NGTWACCAAT
|
|
93
|
-
EAGA_BXEA21 TGTTACCRNT
|
|
94
|
-
EATN_BXEA02_36x2 TGTTACCNAT
|
|
95
|
-
EAVA_BX101 TGTTACCRGT
|
|
96
|
-
EASC_BXEA41_1305 TGTTACCNAT
|
|
97
|
-
EAGA_BXEA25 TGTTACCGAK
|
|
98
|
-
TCAL_BXTC92 TGTTAYCGGT
|
|
99
|
-
EAGA_BX346 TGTTACCAGT
|
|
100
|
-
EAVA_BX321 TGTTACCRAT
|
|
101
|
-
EAGA_BX472 YGTTRCCGAT
|
|
102
|
-
EASC_BXEA40_1304 TGTTAYCRAT
|
|
103
|
-
EASC_BX1115 TGTTRCCGAT
|
|
104
|
-
EAGA_BXEA32_662 TGTTAYCGAT
|
|
105
|
-
TCAL_BX273 TGTTACCGNN
|
|
106
|
-
EASC_BX1109 TGTTACCNNT
|
|
107
|
-
TCAL_BXTC80 TGTTACCNAN
|
|
108
|
-
EASC_BX1110 TGYTACCGAT
|
|
109
|
-
TCAL_BXTC110 TGTTACCGNT
|
|
110
|
-
EAGA_BXEA31_659 TGTTACCGRT
|
|
111
|
-
EASC_BX1112 TGTTACCGGT
|
|
112
|
-
EAGA_BX301 TGTTACCGGT
|
|
113
|
-
EAGA_BXEA15_654 YGTTACCGGN
|
|
114
|
-
EASC_BX1114 TGTTACCGRN
|
|
115
|
-
TCGA_BX344 YGTWACCART
|
|
116
|
-
EAGA_BXEA33_663 TNTTACCRAT
|
|
117
|
-
CHCH_BX1191 TGTTACCGGT
|
|
118
|
-
CHCH_BX1193 TGTTACCGGT
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
117 100
|
|
2
|
-
GUFL_BXGU36_AA14 GCGRGCCTCACCGGGTTGCSCNGAGACAGAKRAAGACCAGCGCCGATCCTGGNTCTNCNCCTGGAGAGTTAGCACCCCCACCCGTARTGATGTTACCGAT
|
|
3
|
-
GUFL_BX626 GCGGGCCTNGCCGGGTTGCCCGGAGACAGANRARGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTNGCANCCCCACCCGTAGTGAYGTNACCGRN
|
|
4
|
-
GUFL_BXGU65_AA39 GCGGGCCTCGGCGGGTTGCCCGGAGNNAGATGAGGACCAGYGMCGATCCTGGCTCTGCCCCTGGANAGTTAGCACCCCCAYCCGTAGTGATGTWACCGAT
|
|
5
|
-
GUFL_BX684 GCNGGCCTCNSCGGGTTGCNCGGAGACAGAKGAGGACCAGCGCCGATCCTGGCTCTGCCNCTGGAGAGTTNGCACCCCCAYCCGNAGTGATGTTACCGAT
|
|
6
|
-
GUFL_BX504 GCGGGCCTCGSCGGGTTGCCCGGAGACAGATGAGGACCANCGMCGATNCTGGCTCNGCCCCTGGAGAGTTAGCACCCCCAYCCNTAGTGATGTTACCNAT
|
|
7
|
-
GUFL_BX503 GCGGGCCTNGSCNGGTTGCCCKGAGACAGAGGAGGACCAGCGCCGATCCTGGCTCTGCCNCTGGAGANTTAGCACNCCCAYCCGTAGKGANGTTACCGRT
|
|
8
|
-
GUFL_BXGU32 GCGGGCCTNGSCGGGTTGCCNGGAGACAGAKGAGGACCANCGCCGATCCTGGCTCTGCCCCTGGAGAGTTNGYACCCCCAYCCGTAGTGACGTTACCGAT
|
|
9
|
-
GUFL_BXGU63_AA37 GCGGGCCTCGCCNGGTTNCCCKGAGACAGATGAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGTACCCCCACCCGTAGTGAYGTTACCART
|
|
10
|
-
GUFL_BXGU35_AA13 GCRGGCCTCGCCGGGNNGCCCGGAGACAGAKGAGGACCAGCGCCGATCCTGGCTCTGCCCCTRGANAGTTAGYACNCCCATCCGTAGTGATGTTACCAAT
|
|
11
|
-
GUFL_BXGU61_U57 GCGGGCCTCGSCGGGTNGCCCGGAGANAGATGAGGACCAGCGCCGATNNNGGCTCNGCCCCTGGAGAGTTAGCACCCNCAYCCGTARTGATGTTACCNAT
|
|
12
|
-
GUFL_BXGU62_AA36 GCGGGCCTCGNCGGGTTGCNCGGAKACAGAKGAGGACCAGCNCCGATCCNGGCTCNGCCCCTGGAGAGTTAGYACCCCCACCCGTANKGATGTTACCAAT
|
|
13
|
-
GUFL_BX685 GNGRGCCTCNCCGGGTYGCCCKGAGACAGAKGAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGANTTAGCACCCCCACCCGTAGTGATGTTACCGAK
|
|
14
|
-
GUFL_BXGU33 GCGGGCCTCNCCGGGTTGCCCGGAGACAGATGAGGACCAGCGCCGATCCTGGCTCTGCCCNTGGAGANTTAGCACCCCCAYCCGTAGTGATGTTACCGAT
|
|
15
|
-
DSAZ_BX1213 GCGGGCCTCGCTGGTTTGCCCGGAGACAKATGAGAACCAGCGCCGATCCTGGTWCTGCCCCTGGAGAGTTAGCACCCCYATCCGTAGTGATGTTACCGGT
|
|
16
|
-
DSNM_BXDS02 GCGGGCCTCGCTGGTTTGCCCGGAGACAKMTGARAACCAGCGCCGATCCTGGYTCTGCCCCTGGAGAGTTAGCACCCCCATCCGTAGTGMTGTTACCGGT
|
|
17
|
-
ONWI_BX489 GCGGGCTTCGCTGGTTNGCCCGGAGANMGATGAAAACCAGCGCCGATCCTRGTTCTGCCCCTGGAGAGTTAGNACCCCCATCCGTAGTGATGTTACCGAT
|
|
18
|
-
ONWI_BX493 RCGGGCYTCGCTGNTTTGCCCGGAGACAGMTGAAAACCAGCGCCGATCCTGNTTCTGCCCCTGGNGAGTTAGCACCCCCATCCGTAGTGATGTTACCGGT
|
|
19
|
-
ONTX_BXON46_153 GCGGGCCTCGCTGGTTTGCCCGGAGACAGMTRAGAACCAGCGCCGATCCTGGTTCTRCCCCTGGAGAGTTAGCACCCCCATCCGTAGTGATRTTNCCAGT
|
|
20
|
-
ONNE_BXON58_433x2 GCGGGCCTCGCNGGTNNGCCCGGAGACMGCTGAGAACCAGCGCCGATCCTGGTTCTGCNCCTGGAGAGTTNGCANCCCCATCCGTAGTGATGTTACCGGT
|
|
21
|
-
ONWI_BX495 GCGGGCYTCGCTGGTTTGCCCGGAGACAGATGAAAACCAGCGCCGATCCTGGTTCTGCCCCTGGRGAGTTAGCACCCCCATCCGTAGTGATGTTACCGGT
|
|
22
|
-
ONWI_BX491 GCGGGCYTCGCTGGTTTGCCCGGAGACAGMTGAAAACCAGCGCCGNTCCTRGTWCTGCCCCTGGANAGTTAGCACCCCCATCCGTAGTGATGTTACCGRT
|
|
23
|
-
ONWI_BX497 RCGGGCCTCGCTGGTTTGCCCGGAGACAKMTRARAACCAGCGCCGATCCTGGTTCTGCCCCTGGRGAGTTAGNACCNCCATCCGTAGTGATGTTACCGGT
|
|
24
|
-
ONCO_BX601 GCGGGCCTCGCTGGTTTGCCCGGAGNCAGCTGAGAACCAGCGCCRATCCTGGTTCTGCCCCTNGAGAGTTAGCACCCCCATCCGTAGTGATGTTACCGGT
|
|
25
|
-
ONCO_BX588 RCGGGCCTCGCTGNTTNGNCCGGAGACAGATGAGAACCAGCGCCAATCCTGGTTCTGCCCCTGGAGAGNTAGCACCCCCAYCCGTAGTGATGTTACCGGT
|
|
26
|
-
ONWI_BX486 GNGGGCCTCGCTGGTTTGCCCGNAGACAGMTGARAACCAGCGCCGATCCTGGTTCTGNCCCTGGAGAGTTAGCACCCCCATCCGTAGTGATGTTACCGGT
|
|
27
|
-
ONTX_BXON45_150 ACGGGCCTCGCTGGTTTGCCCGGAGACMGANGAAAACCAGCGCCGATCCTNNTTCTGCCCCTGGANAGTTAGCACCCCCANCCGTAGTGMTRTTACCGGT
|
|
28
|
-
ONCO_BX580 GCGGGCCTCNCTGGTTTGCCCGGAGACAKMTGAGAACCAGCGCCRATYCTGGTTCTGCCCCTGGAGAGTTAGCACCCCCATCCGTAGTGATNNTACCGGT
|
|
29
|
-
ONWI_BX490 RCGGGCCTCGNTGNTTTGCCCGGAGACAGMTGAAAACCAGCGCCGATCCTAGTWCTRCCCCTGGRGAGTTAGNNCCCCCATCCGTAGTGATGTTACCGGT
|
|
30
|
-
ONCO_BX602 RNGGGCCTCGCTGGTTTGCCCGNAGACAKMTGAGAACCAGCGCCGATYCTGGTTCTGCCCCTGGAGAGTTAGCACCCCCATCCGTAGTGATGTTACCGGT
|
|
31
|
-
ONKS_BXON61_133 GCGGGCYTCGCTGGTTTGCCCGGAGACMGMTGARAACCAGCGCCGATCCTGGTWCTGCCCCTGGAGAGTTAGCACCCCYAYCCGTAGTGATGYTACCGGT
|
|
32
|
-
FLFL_BXFL01 GCRGGCCCCGCCGGGTTGCCCGGAGACAGATGARGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACTCCCACMCGTAGTGATGTTACCGRT
|
|
33
|
-
FLFL_BX683 GCRGGCCCCGCCGGGTTGCCCGGAGACAGATGARGACCAGCGCCGATCTTGGCTCTGCCCCTGGAGAGTTAGCACTCCCACMCGTAGTGATGTTACCGAT
|
|
34
|
-
TTLA_BXTT35_1486 GNGGGCCTCGNCGGGTTGNSCGGAGACAGATGAGGACCASYGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCATCYCCATCCNTAGKGNTGTTGCCGGT
|
|
35
|
-
TTAR_BX507 GCGGGCCTCGNCGGGNTGCCCGGAGACAGATRAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCAYCCGNAGTGATGTTACCGGT
|
|
36
|
-
TTTX_BX227 GCGGGCCTCGCCGGGTTGCSCGGAGACAGATRARGWCCAGCGCYGATCCTGGCWCTGCCCCTRGAGAGTTARCACCCCCATCCGTAGTGATGTTRCYGGT
|
|
37
|
-
TTTX_BX225 GTGGGNCTCGCCGGGTTGCCCGGAGACAGATGARGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCATCCGTAGTGATGTTACCGGT
|
|
38
|
-
TTLA_BXTT34_1482 GCGRGCYTCGCCGGGTTGCSMKGAGACAGNTGAGGWCCAGCGCCGANCCTGGCTCNGCCCCTGGARAGTTAGCACYCCCMTCCGTAGTGATGTTACCGGT
|
|
39
|
-
TTLA_BXTT36_1491 GNGGGCCTCGCCGGGTTGCCCGGAGACAGATGAAGACCAGCGCCGATCCTGGCTCTGCNCCTGGAGAGTTAGCANCYCCMTCCGTAGTGATGYTRCCAGN
|
|
40
|
-
TTLA_BXTT37_1492 GNGAGCCTCGCCGGGTTGCCCTGAGACAGATGAGGWCCAGCGCCGATCCTGNCTCTGCCCCTRGAGAGTTAGCACCCCCACCCGTAGTGATGTTRCCGGT
|
|
41
|
-
TTAR_BX987 GCGGGCCTCNCCGGGTTGCCCGGAGACAGATGAAGACCAGCGCCGATYCTGGCTCTGCCCCTGGAGAGTTARCACCCCCAYCCGTAGTGATGNTACCNGT
|
|
42
|
-
TTLA_BXTT38_1493 GCGRGCYTSGCCGGGTTGCCCGGAGACAGATGAGGACCAGCGCCGWWCCTGGNTCTGCCCCTGGAGAGTTNGNACCCCCATCCGTAGTGATGTTRCCGGT
|
|
43
|
-
TTTX_BX222 GCGRGCYTCGCCGGGTTGCCCGGAGACAGATGAAGACCASCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCATCCGTAGTGANGYTRYYGGT
|
|
44
|
-
TTLA_BX421 GCGRGCCTCNCCGGGTTGCCCKGAGACAGATGAGGWCYAGCGCCGATCCTGNCTCTGYCCCTGGAGAGTTAGCACCNCCMTCCGNAGTGATGTTACCRGT
|
|
45
|
-
TTKS_BXTT20_78 GCGGGCCTCGCCGGGTTGCCCNGAGACAGATGAGGWCCAGCGCCGWTCCTGGCTCTGCCCCTGRAGAGTTAGCACCCCCACCCGTAGTGATGYTGCCAGT
|
|
46
|
-
TTLA_BXTT13 GCGAGCCTSGCNRGGTTRCCCGGAGACANATGAGGACYAGCGCCGATCCTGGCTCTGYCCCTRGAGAGTTARCACCYCCAYCCSTAGTGATGTTRCCRGT
|
|
47
|
-
TTAR_BX984 GCGGGCCTCGNCGGGTTGCCCGGAGNCANATGAAGACYAGCGCCGATNCTGGCWCTGCCNCTGGAGAGTTAGCACCCCCACCCGTAGTGATNTTACCGGT
|
|
48
|
-
TTLA_BX422 GCGRGCCTCGCCGGGTTGCCCGGAGACANATGAGGACCAGCGCCGATCCTGGCTCTGCCNCTRGAGAGTTAGCACCYNCATCCGTAGTGATGTTACCGGT
|
|
49
|
-
TTLA_BXTT39_1498 GCGGGNCTCGCCGGGTTGCGCKGAGACAGATGAGGACCASCGCCGATCCTGGCWCTGCCCCTGGAGANTTWGCACNYCCAYCCGTAGTGATNTTACCRGT
|
|
50
|
-
TTTX_BX223 GCGGGCCTCGCCGGGTTGCGCGGAGACAGATGANGACCAGCGCCGATCCTGGCTCTGCCCCTRGARAGTTAGCACCCCCNYCCGTAGTGATGTTACCGGT
|
|
51
|
-
TTTX_BX228 GCGGGCCTCGCCGGGTTGCCCGGAGACAGATGAGGACCASCGCCGNACCTGGCACTGYCCCTGGAGAGTTWRCACNCCCACCCGTAGTGATRNTACCRGT
|
|
52
|
-
MXMX_BX1195 GCGGGCCTCGCCGGGTTGYCCGGAGACAGATGAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCAYCCGTAGTGATGYTACCGAT
|
|
53
|
-
MXMX_BX1194 GCGGGCCTCGCCGGGTTGYCCGGAGACAGATGAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCACCCGTAGTGATGTTACCGAT
|
|
54
|
-
MXMX_BX1196 GCGGGCCTCGCCGGGTTGCCCGGAGACAGATGAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCACCCGTAGTGATGYTACCGAT
|
|
55
|
-
GUMS_BXGU68_AA42 GCGGGCCTCGCCGRGTTGCCCKGAGWCAGATGAGGACYAGCGCCGNTCCTGGCTCTGCCCCTGGAGAGNTAGCACCCCCAYCCGTAGTGATGTTACCGGT
|
|
56
|
-
GUMS_BXGU53_T72 GCGGGCCTCGSCGGGTTNCCCGGAGACAGATGRGGACCAGCGCYGATCCWGGCTCNGCNCCTGGAGAKTTWGCACCCNCACCYGTAGTGATGTTACCAGT
|
|
57
|
-
GUMS_BXGU78_AA52 GCGGGCCTCGCCGGGTTGCCCGGAGACAGATGAGGACYAGCGCCGATCCWGGCTCTGNNCCTGRAGAGTTAGCACNCCCATCCGCAGTGATGTTACCRRT
|
|
58
|
-
GUMS_BXGU75_AA49 GCGGGCCTCGCCRGGTTGCCCTGAGACAGATGARGACCAGCGCCGATCCNGGCTCWGCCCCTGGAGAGTTAGCACCCCCACCCGTAGTGANGTTACCGRT
|
|
59
|
-
GUMS_BXGU76_AA50 GCGGGCCTCGCCGRGTTGCCCKRAGACAGATGAGGACYAGCGCCGATCCAGGCTCTGCCCCTGGAGAKNTAGCACCCCCATCCGTAGTGATGTTACCRGT
|
|
60
|
-
GUMS_BXGU69_AA43 GCGGGCCTCNCCGGGTTGCCCKGRGACAGATGARGACCAGCGCCGATCCWGGCTCNGNCCYTRGAGAGTTAGCACNCCCAYCCGTAGKGAYGTTACCRRT
|
|
61
|
-
GUMS_BXGU77_AA51 GCGGGCCTCGCCGGGTTGCNCKGRGACAGATGAGGACCAGCGCCGATCCWGGCTCTGCCCCTGRAGAGTKAGNACCCCCATCCGYAGTGATGTTACCGGT
|
|
62
|
-
GUMS_BXGU67_AA41 GCGGGCCTCGCCGGGTTGCCCKGAGACAGAKGAGGACCAGCGCCGATNCWGGCTCNGCCCCTGGAGAGTTAGCACNCNCACCCGTAGTGATGTTACCNRT
|
|
63
|
-
GUMS_BXGU74_AA48 GCGGNCCTCGCCGGGTTGCCNTGAGANAGATGAGGACCAGCGCCGATCCNGGCTCTGNCCCTGGANAGTTAGCANCCCCATCCGTAGKGATGTTACCANT
|
|
64
|
-
GUMS_BXGU73_AA47 GCGGGCCTCGCCGGGTTGCCCKAAGACAGAKGAGGACCAGCGCCGATCCWGGCTCTGCCCCTRGAGAGTKAGNACCCCCAYCCGTAGKGATGTTACCNGT
|
|
65
|
-
GUMS_BXGU50_T69 GCGGGCCTCGSCGGGTTGCCCKGAGACAGATGARGACCAGCGCCGATCNWGGCTCTGCCYCTGGANAGTTAGCACCCCCACCCGTAGTGATGTTACCGGT
|
|
66
|
-
GUMS_BXGU48_T62 GCGGGCCTCGCCGGGYNGCCCKGAGAMAGAKGARGACCAGCGCCGANCNAGGNTCAGCCCCTGGAGAGTTAGCACCCCCACCYGTAGTGAYGTTACCGGT
|
|
67
|
-
GUMS_BXGU56_T83 GCGGGCCTCGCCGGGTTGCCCKGAGACNGATGARGACYAGCGCCGATCCTGGCTCTGCCYCTGGAGAGTTAGCACCCNCAYCCGTAGTGATGNTACCGRT
|
|
68
|
-
GUMS_BXGU58_T92 GCGGGCCTCGCCGGGTTGCNCKGAGACAGATGAGGACCAGCNCCGATCCNGGCTCTGCCCCTGGAGAGTTNGCNCCCCCACCCGTAGTGATGNTACCNGT
|
|
69
|
-
GUMS_BXGU54_T73 GCGGGCCTCNCCRGGTNGCCMKGAGANAGNTGAGGNCCAGCGCCGATCCNGGCTCTGCCCCTGGANAGTTNGNANCCCCATCCGTAGTGATGTTACCGGT
|
|
70
|
-
GUMS_BXGU44_T56 GCGGGCCTCGCCGGGTTGCCCKGAGACAGAKGRRGAYCAGCGCCGATCCNGGCTCTGCCCCTGGAGAGTKAGCACCCNCAYCCGTAGTCATGTTACCGGT
|
|
71
|
-
GUMS_BX200 GCGGGCCTCGCCGGGTTGCCCTGAGACAGATGAGGACCAGCGCCGATCCWGGCTCTGCCCCTGGANAGTTNGCACCCCCAYCCGTAGTGAYGNTACCGGT
|
|
72
|
-
GUMS_BXGU79_AA73 GCGGGCCTCGCCGGGTTGCCMKGAGACAGAGNAGGACCAGCGCCGATNCWGGCTCTGCCCCTRRAGAGTKNGNACCCCCAYCCGTAGTGATGTTACCAGT
|
|
73
|
-
GUMS_BXGU49_T66 GCGGGCCTCGCCGRGTTGCCCNGAGAMAGATRARGACCAGCGCCGWNCCWGGCTCTGCCCCTGGAGAGTTAGCACCCCCACCCGTAGTGATGTTACCGGT
|
|
74
|
-
GUMS_BXGU72_AA46 GCGGRCCTCGCCGGGTTGCCMKGAGACAGATGARGACCAGCGCCGATCCWGCCTCTGCCCCTGGAGAGTKAGCACCCCCAYCCGTAGTGATGTTACCGGT
|
|
75
|
-
GUMS_BXGU71_AA45 GCGGGCCTCGCCGRGTNGCCNKGRGACAGATGAGGACYAGCGCCGATCCWGGCTCTGCCCCTGGAGAGNTAGCACCCNCATCCSTAGTGATGNTACCGGT
|
|
76
|
-
GUMS_BXGU43_T55 GCGGGCCTCGNCGGGTTGCCCTGAGWCAGATGARGACCAGCNCCGANCCAGGCTCWGCCCCTGGNNAGTTAGNACCCCCNCCCGTAGKGATGYTAYCGRT
|
|
77
|
-
GUMS_BXGU47_T61 GCGGGCCTCGCCGGGTTGCCCKGAGACAGAKGRRGAYYAGCGCCGATCCTGGCWCTGCCCCTGGAGAGTTAGCACCCCCACCCGTAGTGATGTTRCCRGT
|
|
78
|
-
GUMS_BXGU45_T59 GCGGGCCTCNCCGGGTTGNCCNGAGACAGATGRGGACCAGCGCYGATCCAGGCWCNGNCCCTGGANAGTTAGCACCCCCACCYGTAGTGNTGTTACCRGT
|
|
79
|
-
EASC_BXEA43_1307 GNGGGNCTCNNCGGGTTNNCCNGAGNNAGATGAGGACCAGCGCCGATCCTGGCTCTGNCCCTGGANAGTTNGCACCCCCACCCGTAGTGATGTTACCGAN
|
|
80
|
-
EAGA_BXEA49_564 GYGRGCCTCRCCGGGTTGCCCGGAGANAGATGARGACNRGCRCCGNTCCTGGCTCTGCCCCTGNAGAGTTAGCACCCSCAYCCGTRGTGATGNTANCGGT
|
|
81
|
-
EAGA_BX660 GNGGGCCTCNCCGGGTTNNCCGGAGACAGATGAGGACCAGCGCCGATCCTGGCWCTGCCCCTNGAGAGTTAGCACCCCCAYCCGTAGTNNYGTTACCGGT
|
|
82
|
-
EAAL_BXEA27 GCGRGNCTCGCCGGGYTGCCCKGAKACAGATGAGGACCAGCGCCGATCCTGGCWCTGCCCCTGGAGANTTNGCACCCSCAYCCGTAGTGATGTTACCGAT
|
|
83
|
-
TCAL_BXTC63 GCGGGCCTCGCCGGGTTGCCCKGAGACAGATGAGGACCAGCGCCGANCCTGGCTCTGNCCCTGGAGAGTTAGCACCCGCACCCGTAGTGATGTTRCCGGT
|
|
84
|
-
TCAL_BX612 GNGGGCCTCNCCGGGTTNNCCKGAGACAGAKGAGGACCAGCNCNGATCCTGGCTCTGCCCCTGGAGANTTAGCACCCSCAYCCGTAGKGNTGTTACCRNN
|
|
85
|
-
TCAL_BXTC93 GCRGRCCTCNCCGGGTTGCCCGGAGACAGATRAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCACCYGTAGTGATGTTACCNAT
|
|
86
|
-
EAGA_BXEA35_666 GCGRGCCTCNSCGGGNNNCCCNGAGACAGNTGAAGACCAGCGCCGATCCTGGCWCNNNCCCTGGAGAGTTAGCANNCNCACCCGTAGTNANNTTACCGGT
|
|
87
|
-
EASC_BX1108 GCGRGCCTCGCCGGGTTGCCCGGAGACAGATRARGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCAYCCSTRNTGATGTTACCAGT
|
|
88
|
-
EAGA_BXEA17 GCGRGCCTCGSCGGGTTGCCCGGAGACAGATGARGACCAGCGCCGNTCCTGGCTCTGNCCCKGGAGAGTTAGCACCCCCAYCYGTAGTGATGTTAYCRGT
|
|
89
|
-
EAGA_BXEA34_665 GCGRGCCTCGSCGGGTTGCCCGGAGACAGATGAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGNTNRCACCCCCAYCCGTRGTGATGTTACCGAT
|
|
90
|
-
EASC_BXEA42_1306 GCGGGCCTCGCCGGGTTGCCCGGAGACAGATGARGACCAGCGCCGATCCTGGCTCTGCCCCTGGANAGTTAGCANNCSCAYCCGTAGTGATGTTACCGGT
|
|
91
|
-
TCAL_BX279 GCGRGCCTCRCCGGGTTGCCCGGAGACAGATGARGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCWCCCSCATCCGTAGTGNTGTTACCGGT
|
|
92
|
-
EAVA_BX320 GNGGGCCTCGCCGGGTTGCNCGGAGANAGATGAGGACCAGCGCCGATCCTGGCTYTGNCCCTGGAGAGTTAGCACCCCCACCCGTAGTGANGTWACCAAT
|
|
93
|
-
EAGA_BXEA21 GCGGGCCTCGCCGGGTTGCCMGGAGACAGATGARGACCAGCGCCGATCCTGNCTNTGCCCCKGGAGAGTTAGCACCCCCAYCCGTAGTGATGTTACCRNT
|
|
94
|
-
EATN_BXEA02_36x2 GCGGGCCTCGCCGGGTYNCCCGGAGACAGATGNGGACCAGCNCNGATCCTGGNTCTGNCCNTGGAGAGTTNGCNCCCNCANCCGTAGTGATGTTACCNAT
|
|
95
|
-
EAVA_BX101 GCGGGCCTCGSCGGGTTGCCCGGAGACAGATGAGGACCAGCGCCGATCCTGGCTCTGCYCCTGGAGAGTTAGCACCCCCAYCYGTAGTGATGTTACCRGT
|
|
96
|
-
EASC_BXEA41_1305 GCRRGCCTCNCCGGGTNGCCCGGAGACAGATGARGNCCAGCGCCGATCCTGGCWCTGCCCYTGGANAGTTAGCACCCCCATCCGTAGTGATGTTACCNAT
|
|
97
|
-
EAGA_BXEA25 GCGGGNCTNNNCGGGTTGCCCKGAGACAGATGAGNACCAGCGCCGWTNCTGGCTNTGNCCCTGGAGAGTTAGCACCCCCACCCGTAGTGATGTTACCGAK
|
|
98
|
-
TCAL_BXTC92 GCGRGNCTCGCCGGGTTGCCCGGAGACAGATGARGACCAGCNCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCACCCGTAGTGATGTTAYCGGT
|
|
99
|
-
EAGA_BX346 GCGGGCCTCGCCGGGTTGCCCGGAKACAGATGAGGACCRGCGCCGATCCTGGCTYNGNCCCTGGANAGTTAGCWCCCCCACCCGTAGTGATGTTACCAGT
|
|
100
|
-
EAVA_BX321 GCGGGCCTCGCCGGGTTGCCCGGAGACAGATGARGACCAGCGCCGATCCTGGCTCTGCYCCTGGAGAGTTAGCACCCSCAYCCGTAGTGATGTTACCRAT
|
|
101
|
-
EAGA_BX472 GCGRGCCTCGCCGGGTTGCNCGGAGACAGATGAGGACCAGCNCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCATCCGTAGKGAYGTTRCCGAT
|
|
102
|
-
EASC_BXEA40_1304 GCGRGCCTCGCCGGGTTGCCCNGAGACAGATGAGGACCAGCGCCGATCCNGGCTCTGCCCCTGGANAGTTAGCACCCSCACCCGTAGTGATGTTAYCRAT
|
|
103
|
-
EASC_BX1115 GCGGGCCTNGCCGGGTTGCCCGGAGACAGATGRRGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGRGTTAGCACCCCCACCCSTAGTNATGTTRCCGAT
|
|
104
|
-
EAGA_BXEA32_662 GCGAGCCTCGSCGGGTTGCCCGGAGACAGATGARGACCAGCGCCGATCCTGGCTCTGCNCCTGGNGAGGTAGCACCCCCACCCGTAGTGATGTTAYCGAT
|
|
105
|
-
TCAL_BX273 GCGGGCCTNGSCGGGTTGCCCGGAGACANATGAGGACCAGCACCGATCCTGGCTCTGCCNCTGGAGAGTTAGCANCCCCACCCGTAGTGATGTTACCGNN
|
|
106
|
-
EASC_BX1109 GCGGGNCTNNCCGGGTTNCCCKGAGACAGATGAGGACCAGCNCCGWTCCTGGCWNTGCCCCTGGAGAGTTNGCACCCCCATCCGNAGTGATGTTACCNNT
|
|
107
|
-
TCAL_BXTC80 GCARGCCTCGCCGGGTTGCCCGGAGACAGAKNAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCATCCGTAGTGATGTTACCNAN
|
|
108
|
-
EASC_BX1110 GCGGRCCTCGCCGGGTTGCCCGGAGANAGAKGARGACCAGCGCCGATCCTGGCTCNGCCCCTGGANAGTTAGCACCCCCMYCCGTAGTGATGYTACCGAT
|
|
109
|
-
TCAL_BXTC110 GCGRGCCTCGCCGGGTTGCNCGGAGACAGAKGAAGACCAGCGCCGATCCTGGCTCTGCCNCTGGAGAGTTAGCACCCCCACCCGTAGTGATGTTACCGNT
|
|
110
|
-
EAGA_BXEA31_659 GCGRGCCTCGNCGGGTTGCCCGGAGACAGATGAGGACCAGCGCCGATCCTGGCTNTGCCCCTGGAGAGTTWGCACCCSCACCCGTAGTGATGTTACCGRT
|
|
111
|
-
EASC_BX1112 GCGRGCCTCGCCGGGTTGCCCGGAGACAGATGRGGACCNGCGCCGWTNCTGGCWCTGTCCCTGGAGRGTTAGCACCCCCATCCSTAGTGATGTTACCGGT
|
|
112
|
-
EAGA_BX301 GCGRGCCTCGCCGGGTTGCGCGGAGACAGATGARGACCAGCGCCGATCCTGGCTCTGCCCCTGGANAGTTAGCACCCCCACCCGTAGTGATGTTACCGGT
|
|
113
|
-
EAGA_BXEA15_654 GCGRGNCTCNSCRGGTTGCCCGGAGACAGAKGAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGANAGNTAGCACCCCCACCCGTAGTGAYGTTACCGGN
|
|
114
|
-
EASC_BX1114 GCGRGCCTCGCCGGGTTGCCCGGAGACAGATRRGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGRGTTAGCACCCCCAYCCCTAGTGATGTTACCGRN
|
|
115
|
-
TCGA_BX344 GNGRGCCTCGCCGGGTTRCCCGGAGACAGAKGRAGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCAYCCGTAGTGAYGTWACCART
|
|
116
|
-
EAGA_BXEA33_663 GCGGGCCTCGNCGGGTNGCNCKGAGACAGATGAGGACCAGCGCCGATNCTGGCTNTGCCNCTGGANAGTTAGCACNNCCACCCGTANTGATNTTACCRAT
|
|
117
|
-
CHCH_BX1191 GCGGGACTCGCCGGGTTGCCCGGAGACAGATGAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCACCCGTRGTGATGTTACCGGT
|
|
118
|
-
CHCH_BX1193 GCGGGACTCGCCGGGTTGCCCGGAGACAGATGAGGACCAGCGCCGATCCTGGCTCTGCCCCTGGAGAGTTAGCACCCCCACCCGTRGTGATGTTACCGGT
|