diploSHIC 1.0.7__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diploSHIC-1.0.7 → diploshic-1.1.0}/MANIFEST.in +0 -2
- {diploSHIC-1.0.7/diploSHIC.egg-info → diploshic-1.1.0}/PKG-INFO +62 -44
- {diploSHIC-1.0.7 → diploshic-1.1.0}/README.md +28 -33
- {diploSHIC-1.0.7 → diploshic-1.1.0/diploSHIC.egg-info}/PKG-INFO +62 -44
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploSHIC.egg-info/SOURCES.txt +3 -5
- diploshic-1.1.0/diploSHIC.egg-info/requires.txt +14 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/__init__.py +1 -1
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/fvTools.py +295 -45
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/makeFeatureVecsForChrArmFromVcfDiploid.py +29 -9
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/makeFeatureVecsForChrArmFromVcf_ogSHIC.py +26 -9
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/makeFeatureVecsForSingleMsDiploid.py +12 -4
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/makeFeatureVecsForSingleMs_ogSHIC.py +24 -22
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/msTools.py +9 -5
- diploshic-1.1.0/diploshic/numba_stats.py +350 -0
- diploshic-1.1.0/pyproject.toml +102 -0
- diploshic-1.1.0/tests/test_regression.py +41 -0
- diploSHIC-1.0.7/diploSHIC.egg-info/not-zip-safe +0 -1
- diploSHIC-1.0.7/diploSHIC.egg-info/requires.txt +0 -10
- diploSHIC-1.0.7/diploshic/shicstats.pyf +0 -50
- diploSHIC-1.0.7/diploshic/utils.c +0 -216
- diploSHIC-1.0.7/pyproject.toml +0 -4
- diploSHIC-1.0.7/setup.py +0 -47
- {diploSHIC-1.0.7 → diploshic-1.1.0}/LICENSE +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploSHIC.egg-info/dependency_links.txt +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploSHIC.egg-info/top_level.txt +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/diploSHIC +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/generateSimLaunchScript.py +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/makeTrainingSets.py +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/misc.py +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/setup.py +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/testing/hard.fvec +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/testing/linkedHard.fvec +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/testing/linkedSoft.fvec +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/testing/neut.fvec +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/testing/soft.fvec +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/training/hard.fvec +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/training/linkedHard.fvec +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/training/linkedSoft.fvec +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/training/neut.fvec +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/diploshic/training/soft.fvec +0 -0
- {diploSHIC-1.0.7 → diploshic-1.1.0}/setup.cfg +0 -0
|
@@ -1,16 +1,41 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: diploSHIC
|
|
3
|
-
Version: 1.0
|
|
4
|
-
Summary:
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Author-email: adkern@uoregon.edu
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: A deep learning tool for identifying hard and soft selective sweeps in population genomic data
|
|
5
|
+
Author-email: Andrew Kern <adkern@uoregon.edu>
|
|
6
|
+
Maintainer-email: Andrew Kern <adkern@uoregon.edu>
|
|
8
7
|
License: MIT
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
Project-URL: Homepage, https://github.com/kr-colab/diploSHIC
|
|
9
|
+
Project-URL: Documentation, https://github.com/kr-colab/diploSHIC/wiki
|
|
10
|
+
Project-URL: Repository, https://github.com/kr-colab/diploSHIC.git
|
|
11
|
+
Project-URL: Issues, https://github.com/kr-colab/diploSHIC/issues
|
|
12
|
+
Keywords: population genetics,selective sweeps,deep learning,CNN,genomics,evolution
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
22
|
+
Requires-Python: >=3.10
|
|
11
23
|
Description-Content-Type: text/markdown
|
|
12
|
-
Provides-Extra: dev
|
|
13
24
|
License-File: LICENSE
|
|
25
|
+
Requires-Dist: numpy>=1.20
|
|
26
|
+
Requires-Dist: scipy>=1.7
|
|
27
|
+
Requires-Dist: matplotlib>=3.5
|
|
28
|
+
Requires-Dist: pandas>=1.3
|
|
29
|
+
Requires-Dist: scikit-allel>=1.3
|
|
30
|
+
Requires-Dist: scikit-learn>=1.0
|
|
31
|
+
Requires-Dist: tensorflow<3.0,>=2.13
|
|
32
|
+
Requires-Dist: keras>=2.13
|
|
33
|
+
Requires-Dist: numba>=0.56
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
38
|
+
Dynamic: license-file
|
|
14
39
|
|
|
15
40
|
# diploS/HIC
|
|
16
41
|
This repo contains the implementation for `diploS/HIC` as described in Kern and Schrider (2018; https://doi.org/10.1534/g3.118.200262), along
|
|
@@ -23,51 +48,38 @@ using simulation. 2) `diploS/HIC` training and performance evaluation. 3) Calcul
|
|
|
23
48
|
genetic simulations must be performed using separate software such as discoal (https://github.com/kern-lab/discoal)
|
|
24
49
|
|
|
25
50
|
## Installation
|
|
26
|
-
`diploS/HIC` has a number of dependencies that should be straightforward to install using python package managers
|
|
27
|
-
such as `conda` or `pip`. The complete list of dependencies looks like this:
|
|
28
|
-
|
|
29
|
-
- numpy
|
|
30
|
-
- scipy
|
|
31
|
-
- pandas
|
|
32
|
-
- scikit-allel
|
|
33
|
-
- scikit-learn
|
|
34
|
-
- tensorflow
|
|
35
|
-
- keras
|
|
36
51
|
|
|
37
|
-
|
|
38
|
-
I'm going to focus on the steps involved to install on a linux machine using Anaconda as our python source / main
|
|
39
|
-
package manager. Assuming you have conda installed, create a new conda env
|
|
52
|
+
`diploS/HIC` requires Python 3.10+ and has the following main dependencies:
|
|
40
53
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
54
|
+
- numpy, scipy, pandas
|
|
55
|
+
- scikit-allel, scikit-learn
|
|
56
|
+
- tensorflow, keras
|
|
57
|
+
- numba
|
|
44
58
|
|
|
45
|
-
|
|
46
|
-
which is a good thing. Now we are ready to install `diploS/HIC` itself. We recommend using the binarys that
|
|
47
|
-
we have packaged using pip. Simply type
|
|
59
|
+
### Install from PyPI (recommended)
|
|
48
60
|
|
|
49
|
-
```
|
|
61
|
+
```bash
|
|
50
62
|
pip install diploshic
|
|
51
63
|
```
|
|
52
64
|
|
|
53
|
-
|
|
65
|
+
Or using [uv](https://docs.astral.sh/uv/):
|
|
54
66
|
|
|
67
|
+
```bash
|
|
68
|
+
uv pip install diploshic
|
|
55
69
|
```
|
|
56
|
-
$ git clone https://github.com/kern-lab/diploSHIC.git
|
|
57
|
-
$ cd diploSHIC
|
|
58
|
-
$ pip install .
|
|
59
|
-
```
|
|
60
70
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
https://
|
|
71
|
+
### Install from source
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
git clone https://github.com/kr-colab/diploSHIC.git
|
|
75
|
+
cd diploSHIC
|
|
76
|
+
pip install .
|
|
77
|
+
```
|
|
65
78
|
|
|
66
|
-
|
|
79
|
+
### GPU support
|
|
67
80
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
build the diploshic packge from the root of the repo dir with pip install .
|
|
81
|
+
By default, TensorFlow installs with CPU support. For GPU acceleration, see
|
|
82
|
+
the [TensorFlow GPU installation guide](https://www.tensorflow.org/install/gpu)
|
|
71
83
|
|
|
72
84
|
## Usage
|
|
73
85
|
The main program that you will interface with is `diploSHIC`. This script is installed by default
|
|
@@ -145,6 +157,14 @@ optional arguments:
|
|
|
145
157
|
information (marked by 'N'). If specified, simulations
|
|
146
158
|
will be masked in a manner mirroring windows drawn
|
|
147
159
|
from this file.
|
|
160
|
+
--vcfForMaskFileName VCFFORMASKFILENAME
|
|
161
|
+
Path to a VCF file that contains genotype information. This will be used to mask genotypes in a manner that mirrors how the true data are masked.
|
|
162
|
+
--popForMask POPFORMASK
|
|
163
|
+
The label of the population for which we should draw genotype information from the VCF for masking purposes.
|
|
164
|
+
--sampleToPopFileName SAMPLETOPOPFILENAME
|
|
165
|
+
Path to tab delimited file with population assignments (used for genotype masking); format: SampleID popID
|
|
166
|
+
--unmaskedGenoFracCutoff UNMASKEDGENOFRACCUTOFF
|
|
167
|
+
Fraction of unmasked genotypes required to retain a site (default=0.75)
|
|
148
168
|
--chrArmsForMasking CHRARMSFORMASKING
|
|
149
169
|
A comma-separated list (no spaces) of chromosome arms
|
|
150
170
|
from which we want to draw masking information (or
|
|
@@ -358,5 +378,3 @@ the output predictions will be saved in `testEmpirical.preds` and should be stra
|
|
|
358
378
|
In the interest of showing the user the whole enchilada when it comes to the workflow, I've provided the user
|
|
359
379
|
with a more detailed example on the wiki of this repo. That example can be found here: https://github.com/kern-lab/diploSHIC/wiki/A-soup-to-nuts-example
|
|
360
380
|
|
|
361
|
-
|
|
362
|
-
|
|
@@ -9,51 +9,38 @@ using simulation. 2) `diploS/HIC` training and performance evaluation. 3) Calcul
|
|
|
9
9
|
genetic simulations must be performed using separate software such as discoal (https://github.com/kern-lab/discoal)
|
|
10
10
|
|
|
11
11
|
## Installation
|
|
12
|
-
`diploS/HIC` has a number of dependencies that should be straightforward to install using python package managers
|
|
13
|
-
such as `conda` or `pip`. The complete list of dependencies looks like this:
|
|
14
12
|
|
|
15
|
-
|
|
16
|
-
- scipy
|
|
17
|
-
- pandas
|
|
18
|
-
- scikit-allel
|
|
19
|
-
- scikit-learn
|
|
20
|
-
- tensorflow
|
|
21
|
-
- keras
|
|
13
|
+
`diploS/HIC` requires Python 3.10+ and has the following main dependencies:
|
|
22
14
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
15
|
+
- numpy, scipy, pandas
|
|
16
|
+
- scikit-allel, scikit-learn
|
|
17
|
+
- tensorflow, keras
|
|
18
|
+
- numba
|
|
26
19
|
|
|
27
|
-
|
|
28
|
-
$ conda create -n diploshic python=3.10 --yes
|
|
29
|
-
```
|
|
20
|
+
### Install from PyPI (recommended)
|
|
30
21
|
|
|
31
|
-
|
|
32
|
-
which is a good thing. Now we are ready to install `diploS/HIC` itself. We recommend using the binarys that
|
|
33
|
-
we have packaged using pip. Simply type
|
|
34
|
-
|
|
35
|
-
```
|
|
22
|
+
```bash
|
|
36
23
|
pip install diploshic
|
|
37
24
|
```
|
|
38
25
|
|
|
39
|
-
|
|
26
|
+
Or using [uv](https://docs.astral.sh/uv/):
|
|
40
27
|
|
|
28
|
+
```bash
|
|
29
|
+
uv pip install diploshic
|
|
41
30
|
```
|
|
42
|
-
$ git clone https://github.com/kern-lab/diploSHIC.git
|
|
43
|
-
$ cd diploSHIC
|
|
44
|
-
$ pip install .
|
|
45
|
-
```
|
|
46
31
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
https://
|
|
32
|
+
### Install from source
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
git clone https://github.com/kr-colab/diploSHIC.git
|
|
36
|
+
cd diploSHIC
|
|
37
|
+
pip install .
|
|
38
|
+
```
|
|
51
39
|
|
|
52
|
-
|
|
40
|
+
### GPU support
|
|
53
41
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
build the diploshic packge from the root of the repo dir with pip install .
|
|
42
|
+
By default, TensorFlow installs with CPU support. For GPU acceleration, see
|
|
43
|
+
the [TensorFlow GPU installation guide](https://www.tensorflow.org/install/gpu)
|
|
57
44
|
|
|
58
45
|
## Usage
|
|
59
46
|
The main program that you will interface with is `diploSHIC`. This script is installed by default
|
|
@@ -131,6 +118,14 @@ optional arguments:
|
|
|
131
118
|
information (marked by 'N'). If specified, simulations
|
|
132
119
|
will be masked in a manner mirroring windows drawn
|
|
133
120
|
from this file.
|
|
121
|
+
--vcfForMaskFileName VCFFORMASKFILENAME
|
|
122
|
+
Path to a VCF file that contains genotype information. This will be used to mask genotypes in a manner that mirrors how the true data are masked.
|
|
123
|
+
--popForMask POPFORMASK
|
|
124
|
+
The label of the population for which we should draw genotype information from the VCF for masking purposes.
|
|
125
|
+
--sampleToPopFileName SAMPLETOPOPFILENAME
|
|
126
|
+
Path to tab delimited file with population assignments (used for genotype masking); format: SampleID popID
|
|
127
|
+
--unmaskedGenoFracCutoff UNMASKEDGENOFRACCUTOFF
|
|
128
|
+
Fraction of unmasked genotypes required to retain a site (default=0.75)
|
|
134
129
|
--chrArmsForMasking CHRARMSFORMASKING
|
|
135
130
|
A comma-separated list (no spaces) of chromosome arms
|
|
136
131
|
from which we want to draw masking information (or
|
|
@@ -1,16 +1,41 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: diploSHIC
|
|
3
|
-
Version: 1.0
|
|
4
|
-
Summary:
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Author-email: adkern@uoregon.edu
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: A deep learning tool for identifying hard and soft selective sweeps in population genomic data
|
|
5
|
+
Author-email: Andrew Kern <adkern@uoregon.edu>
|
|
6
|
+
Maintainer-email: Andrew Kern <adkern@uoregon.edu>
|
|
8
7
|
License: MIT
|
|
9
|
-
|
|
10
|
-
|
|
8
|
+
Project-URL: Homepage, https://github.com/kr-colab/diploSHIC
|
|
9
|
+
Project-URL: Documentation, https://github.com/kr-colab/diploSHIC/wiki
|
|
10
|
+
Project-URL: Repository, https://github.com/kr-colab/diploSHIC.git
|
|
11
|
+
Project-URL: Issues, https://github.com/kr-colab/diploSHIC/issues
|
|
12
|
+
Keywords: population genetics,selective sweeps,deep learning,CNN,genomics,evolution
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
22
|
+
Requires-Python: >=3.10
|
|
11
23
|
Description-Content-Type: text/markdown
|
|
12
|
-
Provides-Extra: dev
|
|
13
24
|
License-File: LICENSE
|
|
25
|
+
Requires-Dist: numpy>=1.20
|
|
26
|
+
Requires-Dist: scipy>=1.7
|
|
27
|
+
Requires-Dist: matplotlib>=3.5
|
|
28
|
+
Requires-Dist: pandas>=1.3
|
|
29
|
+
Requires-Dist: scikit-allel>=1.3
|
|
30
|
+
Requires-Dist: scikit-learn>=1.0
|
|
31
|
+
Requires-Dist: tensorflow<3.0,>=2.13
|
|
32
|
+
Requires-Dist: keras>=2.13
|
|
33
|
+
Requires-Dist: numba>=0.56
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
38
|
+
Dynamic: license-file
|
|
14
39
|
|
|
15
40
|
# diploS/HIC
|
|
16
41
|
This repo contains the implementation for `diploS/HIC` as described in Kern and Schrider (2018; https://doi.org/10.1534/g3.118.200262), along
|
|
@@ -23,51 +48,38 @@ using simulation. 2) `diploS/HIC` training and performance evaluation. 3) Calcul
|
|
|
23
48
|
genetic simulations must be performed using separate software such as discoal (https://github.com/kern-lab/discoal)
|
|
24
49
|
|
|
25
50
|
## Installation
|
|
26
|
-
`diploS/HIC` has a number of dependencies that should be straightforward to install using python package managers
|
|
27
|
-
such as `conda` or `pip`. The complete list of dependencies looks like this:
|
|
28
|
-
|
|
29
|
-
- numpy
|
|
30
|
-
- scipy
|
|
31
|
-
- pandas
|
|
32
|
-
- scikit-allel
|
|
33
|
-
- scikit-learn
|
|
34
|
-
- tensorflow
|
|
35
|
-
- keras
|
|
36
51
|
|
|
37
|
-
|
|
38
|
-
I'm going to focus on the steps involved to install on a linux machine using Anaconda as our python source / main
|
|
39
|
-
package manager. Assuming you have conda installed, create a new conda env
|
|
52
|
+
`diploS/HIC` requires Python 3.10+ and has the following main dependencies:
|
|
40
53
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
54
|
+
- numpy, scipy, pandas
|
|
55
|
+
- scikit-allel, scikit-learn
|
|
56
|
+
- tensorflow, keras
|
|
57
|
+
- numba
|
|
44
58
|
|
|
45
|
-
|
|
46
|
-
which is a good thing. Now we are ready to install `diploS/HIC` itself. We recommend using the binarys that
|
|
47
|
-
we have packaged using pip. Simply type
|
|
59
|
+
### Install from PyPI (recommended)
|
|
48
60
|
|
|
49
|
-
```
|
|
61
|
+
```bash
|
|
50
62
|
pip install diploshic
|
|
51
63
|
```
|
|
52
64
|
|
|
53
|
-
|
|
65
|
+
Or using [uv](https://docs.astral.sh/uv/):
|
|
54
66
|
|
|
67
|
+
```bash
|
|
68
|
+
uv pip install diploshic
|
|
55
69
|
```
|
|
56
|
-
$ git clone https://github.com/kern-lab/diploSHIC.git
|
|
57
|
-
$ cd diploSHIC
|
|
58
|
-
$ pip install .
|
|
59
|
-
```
|
|
60
70
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
https://
|
|
71
|
+
### Install from source
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
git clone https://github.com/kr-colab/diploSHIC.git
|
|
75
|
+
cd diploSHIC
|
|
76
|
+
pip install .
|
|
77
|
+
```
|
|
65
78
|
|
|
66
|
-
|
|
79
|
+
### GPU support
|
|
67
80
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
build the diploshic packge from the root of the repo dir with pip install .
|
|
81
|
+
By default, TensorFlow installs with CPU support. For GPU acceleration, see
|
|
82
|
+
the [TensorFlow GPU installation guide](https://www.tensorflow.org/install/gpu)
|
|
71
83
|
|
|
72
84
|
## Usage
|
|
73
85
|
The main program that you will interface with is `diploSHIC`. This script is installed by default
|
|
@@ -145,6 +157,14 @@ optional arguments:
|
|
|
145
157
|
information (marked by 'N'). If specified, simulations
|
|
146
158
|
will be masked in a manner mirroring windows drawn
|
|
147
159
|
from this file.
|
|
160
|
+
--vcfForMaskFileName VCFFORMASKFILENAME
|
|
161
|
+
Path to a VCF file that contains genotype information. This will be used to mask genotypes in a manner that mirrors how the true data are masked.
|
|
162
|
+
--popForMask POPFORMASK
|
|
163
|
+
The label of the population for which we should draw genotype information from the VCF for masking purposes.
|
|
164
|
+
--sampleToPopFileName SAMPLETOPOPFILENAME
|
|
165
|
+
Path to tab delimited file with population assignments (used for genotype masking); format: SampleID popID
|
|
166
|
+
--unmaskedGenoFracCutoff UNMASKEDGENOFRACCUTOFF
|
|
167
|
+
Fraction of unmasked genotypes required to retain a site (default=0.75)
|
|
148
168
|
--chrArmsForMasking CHRARMSFORMASKING
|
|
149
169
|
A comma-separated list (no spaces) of chromosome arms
|
|
150
170
|
from which we want to draw masking information (or
|
|
@@ -358,5 +378,3 @@ the output predictions will be saved in `testEmpirical.preds` and should be stra
|
|
|
358
378
|
In the interest of showing the user the whole enchilada when it comes to the workflow, I've provided the user
|
|
359
379
|
with a more detailed example on the wiki of this repo. That example can be found here: https://github.com/kern-lab/diploSHIC/wiki/A-soup-to-nuts-example
|
|
360
380
|
|
|
361
|
-
|
|
362
|
-
|
|
@@ -2,11 +2,9 @@ LICENSE
|
|
|
2
2
|
MANIFEST.in
|
|
3
3
|
README.md
|
|
4
4
|
pyproject.toml
|
|
5
|
-
setup.py
|
|
6
5
|
diploSHIC.egg-info/PKG-INFO
|
|
7
6
|
diploSHIC.egg-info/SOURCES.txt
|
|
8
7
|
diploSHIC.egg-info/dependency_links.txt
|
|
9
|
-
diploSHIC.egg-info/not-zip-safe
|
|
10
8
|
diploSHIC.egg-info/requires.txt
|
|
11
9
|
diploSHIC.egg-info/top_level.txt
|
|
12
10
|
diploshic/__init__.py
|
|
@@ -20,9 +18,8 @@ diploshic/makeFeatureVecsForSingleMs_ogSHIC.py
|
|
|
20
18
|
diploshic/makeTrainingSets.py
|
|
21
19
|
diploshic/misc.py
|
|
22
20
|
diploshic/msTools.py
|
|
21
|
+
diploshic/numba_stats.py
|
|
23
22
|
diploshic/setup.py
|
|
24
|
-
diploshic/shicstats.pyf
|
|
25
|
-
diploshic/utils.c
|
|
26
23
|
diploshic/testing/hard.fvec
|
|
27
24
|
diploshic/testing/linkedHard.fvec
|
|
28
25
|
diploshic/testing/linkedSoft.fvec
|
|
@@ -32,4 +29,5 @@ diploshic/training/hard.fvec
|
|
|
32
29
|
diploshic/training/linkedHard.fvec
|
|
33
30
|
diploshic/training/linkedSoft.fvec
|
|
34
31
|
diploshic/training/neut.fvec
|
|
35
|
-
diploshic/training/soft.fvec
|
|
32
|
+
diploshic/training/soft.fvec
|
|
33
|
+
tests/test_regression.py
|