diploSHIC 1.0.6__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {diploshic-1.0.6 → diploshic-1.1.0}/MANIFEST.in +0 -2
  2. {diploshic-1.0.6/diploSHIC.egg-info → diploshic-1.1.0}/PKG-INFO +61 -42
  3. {diploshic-1.0.6 → diploshic-1.1.0}/README.md +28 -28
  4. {diploshic-1.0.6 → diploshic-1.1.0/diploSHIC.egg-info}/PKG-INFO +61 -42
  5. {diploshic-1.0.6 → diploshic-1.1.0}/diploSHIC.egg-info/SOURCES.txt +3 -5
  6. diploshic-1.1.0/diploSHIC.egg-info/requires.txt +14 -0
  7. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/__init__.py +1 -1
  8. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/fvTools.py +302 -47
  9. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/makeFeatureVecsForChrArmFromVcfDiploid.py +29 -9
  10. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/makeFeatureVecsForChrArmFromVcf_ogSHIC.py +26 -9
  11. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/makeFeatureVecsForSingleMsDiploid.py +12 -4
  12. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/makeFeatureVecsForSingleMs_ogSHIC.py +24 -22
  13. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/msTools.py +9 -5
  14. diploshic-1.1.0/diploshic/numba_stats.py +350 -0
  15. diploshic-1.1.0/pyproject.toml +102 -0
  16. diploshic-1.1.0/tests/test_regression.py +41 -0
  17. diploshic-1.0.6/diploSHIC.egg-info/not-zip-safe +0 -1
  18. diploshic-1.0.6/diploSHIC.egg-info/requires.txt +0 -10
  19. diploshic-1.0.6/diploshic/shicstats.pyf +0 -50
  20. diploshic-1.0.6/diploshic/utils.c +0 -216
  21. diploshic-1.0.6/pyproject.toml +0 -4
  22. diploshic-1.0.6/setup.py +0 -41
  23. {diploshic-1.0.6 → diploshic-1.1.0}/LICENSE +0 -0
  24. {diploshic-1.0.6 → diploshic-1.1.0}/diploSHIC.egg-info/dependency_links.txt +0 -0
  25. {diploshic-1.0.6 → diploshic-1.1.0}/diploSHIC.egg-info/top_level.txt +0 -0
  26. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/diploSHIC +0 -0
  27. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/generateSimLaunchScript.py +0 -0
  28. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/makeTrainingSets.py +0 -0
  29. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/misc.py +0 -0
  30. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/setup.py +0 -0
  31. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/testing/hard.fvec +0 -0
  32. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/testing/linkedHard.fvec +0 -0
  33. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/testing/linkedSoft.fvec +0 -0
  34. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/testing/neut.fvec +0 -0
  35. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/testing/soft.fvec +0 -0
  36. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/training/hard.fvec +0 -0
  37. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/training/linkedHard.fvec +0 -0
  38. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/training/linkedSoft.fvec +0 -0
  39. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/training/neut.fvec +0 -0
  40. {diploshic-1.0.6 → diploshic-1.1.0}/diploshic/training/soft.fvec +0 -0
  41. {diploshic-1.0.6 → diploshic-1.1.0}/setup.cfg +0 -0
@@ -1,5 +1,3 @@
1
- include diploshic/shicstats.pyf
2
1
  include diploshic/testEmpirical.fvec
3
2
  include diploshic/testing/*
4
3
  include diploshic/training/*
5
- include diploshic/utils.c
@@ -1,22 +1,41 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: diploSHIC
3
- Version: 1.0.6
4
- Summary: diploSHIC
5
- Home-page: https://github.com/kr-colab/diploSHIC
6
- Author: Andrew Kern
7
- Author-email: adkern@uoregon.edu
3
+ Version: 1.1.0
4
+ Summary: A deep learning tool for identifying hard and soft selective sweeps in population genomic data
5
+ Author-email: Andrew Kern <adkern@uoregon.edu>
6
+ Maintainer-email: Andrew Kern <adkern@uoregon.edu>
8
7
  License: MIT
8
+ Project-URL: Homepage, https://github.com/kr-colab/diploSHIC
9
+ Project-URL: Documentation, https://github.com/kr-colab/diploSHIC/wiki
10
+ Project-URL: Repository, https://github.com/kr-colab/diploSHIC.git
11
+ Project-URL: Issues, https://github.com/kr-colab/diploSHIC/issues
12
+ Keywords: population genetics,selective sweeps,deep learning,CNN,genomics,evolution
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
22
+ Requires-Python: >=3.10
9
23
  Description-Content-Type: text/markdown
10
24
  License-File: LICENSE
11
- Requires-Dist: numpy
12
- Requires-Dist: scipy
13
- Requires-Dist: matplotlib
14
- Requires-Dist: pandas
15
- Requires-Dist: scikit-allel
16
- Requires-Dist: scikit-learn
17
- Requires-Dist: tensorflow==2.15.0
18
- Requires-Dist: keras
25
+ Requires-Dist: numpy>=1.20
26
+ Requires-Dist: scipy>=1.7
27
+ Requires-Dist: matplotlib>=3.5
28
+ Requires-Dist: pandas>=1.3
29
+ Requires-Dist: scikit-allel>=1.3
30
+ Requires-Dist: scikit-learn>=1.0
31
+ Requires-Dist: tensorflow<3.0,>=2.13
32
+ Requires-Dist: keras>=2.13
33
+ Requires-Dist: numba>=0.56
19
34
  Provides-Extra: dev
35
+ Requires-Dist: pytest>=7.0; extra == "dev"
36
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
37
+ Requires-Dist: ruff>=0.1; extra == "dev"
38
+ Dynamic: license-file
20
39
 
21
40
  # diploS/HIC
22
41
  This repo contains the implementation for `diploS/HIC` as described in Kern and Schrider (2018; https://doi.org/10.1534/g3.118.200262), along
@@ -29,46 +48,38 @@ using simulation. 2) `diploS/HIC` training and performance evaluation. 3) Calcul
29
48
  genetic simulations must be performed using separate software such as discoal (https://github.com/kern-lab/discoal)
30
49
 
31
50
  ## Installation
32
- `diploS/HIC` has a number of dependencies that should be straightforward to install using python package managers
33
- such as `conda` or `pip`. The complete list of dependencies looks like this:
34
51
 
35
- - numpy
36
- - scipy
37
- - pandas
38
- - scikit-allel
39
- - scikit-learn
40
- - tensorflow
41
- - keras
52
+ `diploS/HIC` requires Python 3.10+ and has the following main dependencies:
42
53
 
43
- ## Install on linux
44
- I'm going to focus on the steps involved to install on a linux machine using Anaconda as our python source / main
45
- package manager. Assuming you have conda installed, create a new conda env
54
+ - numpy, scipy, pandas
55
+ - scikit-allel, scikit-learn
56
+ - tensorflow, keras
57
+ - numba
46
58
 
47
- ```
48
- $ conda create -n diploshic python=3.9 --yes
49
- ```
59
+ ### Install from PyPI (recommended)
50
60
 
51
- Note that because I'm using the Anaconda version of python, pip will only install this in the anaconda directory
52
- which is a good thing. Now we are ready to install `diploS/HIC` itself. We recommend using the binarys that
53
- we have packaged using pip. Simply type
54
-
55
- ```
61
+ ```bash
56
62
  pip install diploshic
57
63
  ```
58
64
 
59
- or if you prefer you can clone and build the repo yourself
65
+ Or using [uv](https://docs.astral.sh/uv/):
60
66
 
67
+ ```bash
68
+ uv pip install diploshic
61
69
  ```
62
- $ git clone https://github.com/kern-lab/diploSHIC.git
63
- $ cd diploSHIC
64
- $ pip install .
70
+
71
+ ### Install from source
72
+
73
+ ```bash
74
+ git clone https://github.com/kr-colab/diploSHIC.git
75
+ cd diploSHIC
76
+ pip install .
65
77
  ```
66
78
 
67
- This should automatically install all the dependencies including tensorflow.
68
- You will need to determine if
69
- you want to use a CPU-only implementation (probably) or a GPU implementation of tensorflow. See
70
- https://www.tensorflow.org/install/install_linux for install instructions.
79
+ ### GPU support
71
80
 
81
+ By default, TensorFlow installs with CPU support. For GPU acceleration, see
82
+ the [TensorFlow GPU installation guide](https://www.tensorflow.org/install/gpu)
72
83
 
73
84
  ## Usage
74
85
  The main program that you will interface with is `diploSHIC`. This script is installed by default
@@ -146,6 +157,14 @@ optional arguments:
146
157
  information (marked by 'N'). If specified, simulations
147
158
  will be masked in a manner mirroring windows drawn
148
159
  from this file.
160
+ --vcfForMaskFileName VCFFORMASKFILENAME
161
+ Path to a VCF file that contains genotype information. This will be used to mask genotypes in a manner that mirrors how the true data are masked.
162
+ --popForMask POPFORMASK
163
+ The label of the population for which we should draw genotype information from the VCF for masking purposes.
164
+ --sampleToPopFileName SAMPLETOPOPFILENAME
165
+ Path to tab delimited file with population assignments (used for genotype masking); format: SampleID popID
166
+ --unmaskedGenoFracCutoff UNMASKEDGENOFRACCUTOFF
167
+ Fraction of unmasked genotypes required to retain a site (default=0.75)
149
168
  --chrArmsForMasking CHRARMSFORMASKING
150
169
  A comma-separated list (no spaces) of chromosome arms
151
170
  from which we want to draw masking information (or
@@ -9,46 +9,38 @@ using simulation. 2) `diploS/HIC` training and performance evaluation. 3) Calcul
9
9
  genetic simulations must be performed using separate software such as discoal (https://github.com/kern-lab/discoal)
10
10
 
11
11
  ## Installation
12
- `diploS/HIC` has a number of dependencies that should be straightforward to install using python package managers
13
- such as `conda` or `pip`. The complete list of dependencies looks like this:
14
12
 
15
- - numpy
16
- - scipy
17
- - pandas
18
- - scikit-allel
19
- - scikit-learn
20
- - tensorflow
21
- - keras
13
+ `diploS/HIC` requires Python 3.10+ and has the following main dependencies:
22
14
 
23
- ## Install on linux
24
- I'm going to focus on the steps involved to install on a linux machine using Anaconda as our python source / main
25
- package manager. Assuming you have conda installed, create a new conda env
15
+ - numpy, scipy, pandas
16
+ - scikit-allel, scikit-learn
17
+ - tensorflow, keras
18
+ - numba
26
19
 
27
- ```
28
- $ conda create -n diploshic python=3.9 --yes
29
- ```
20
+ ### Install from PyPI (recommended)
30
21
 
31
- Note that because I'm using the Anaconda version of python, pip will only install this in the anaconda directory
32
- which is a good thing. Now we are ready to install `diploS/HIC` itself. We recommend using the binarys that
33
- we have packaged using pip. Simply type
34
-
35
- ```
22
+ ```bash
36
23
  pip install diploshic
37
24
  ```
38
25
 
39
- or if you prefer you can clone and build the repo yourself
26
+ Or using [uv](https://docs.astral.sh/uv/):
40
27
 
28
+ ```bash
29
+ uv pip install diploshic
41
30
  ```
42
- $ git clone https://github.com/kern-lab/diploSHIC.git
43
- $ cd diploSHIC
44
- $ pip install .
31
+
32
+ ### Install from source
33
+
34
+ ```bash
35
+ git clone https://github.com/kr-colab/diploSHIC.git
36
+ cd diploSHIC
37
+ pip install .
45
38
  ```
46
39
 
47
- This should automatically install all the dependencies including tensorflow.
48
- You will need to determine if
49
- you want to use a CPU-only implementation (probably) or a GPU implementation of tensorflow. See
50
- https://www.tensorflow.org/install/install_linux for install instructions.
40
+ ### GPU support
51
41
 
42
+ By default, TensorFlow installs with CPU support. For GPU acceleration, see
43
+ the [TensorFlow GPU installation guide](https://www.tensorflow.org/install/gpu)
52
44
 
53
45
  ## Usage
54
46
  The main program that you will interface with is `diploSHIC`. This script is installed by default
@@ -126,6 +118,14 @@ optional arguments:
126
118
  information (marked by 'N'). If specified, simulations
127
119
  will be masked in a manner mirroring windows drawn
128
120
  from this file.
121
+ --vcfForMaskFileName VCFFORMASKFILENAME
122
+ Path to a VCF file that contains genotype information. This will be used to mask genotypes in a manner that mirrors how the true data are masked.
123
+ --popForMask POPFORMASK
124
+ The label of the population for which we should draw genotype information from the VCF for masking purposes.
125
+ --sampleToPopFileName SAMPLETOPOPFILENAME
126
+ Path to tab delimited file with population assignments (used for genotype masking); format: SampleID popID
127
+ --unmaskedGenoFracCutoff UNMASKEDGENOFRACCUTOFF
128
+ Fraction of unmasked genotypes required to retain a site (default=0.75)
129
129
  --chrArmsForMasking CHRARMSFORMASKING
130
130
  A comma-separated list (no spaces) of chromosome arms
131
131
  from which we want to draw masking information (or
@@ -1,22 +1,41 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: diploSHIC
3
- Version: 1.0.6
4
- Summary: diploSHIC
5
- Home-page: https://github.com/kr-colab/diploSHIC
6
- Author: Andrew Kern
7
- Author-email: adkern@uoregon.edu
3
+ Version: 1.1.0
4
+ Summary: A deep learning tool for identifying hard and soft selective sweeps in population genomic data
5
+ Author-email: Andrew Kern <adkern@uoregon.edu>
6
+ Maintainer-email: Andrew Kern <adkern@uoregon.edu>
8
7
  License: MIT
8
+ Project-URL: Homepage, https://github.com/kr-colab/diploSHIC
9
+ Project-URL: Documentation, https://github.com/kr-colab/diploSHIC/wiki
10
+ Project-URL: Repository, https://github.com/kr-colab/diploSHIC.git
11
+ Project-URL: Issues, https://github.com/kr-colab/diploSHIC/issues
12
+ Keywords: population genetics,selective sweeps,deep learning,CNN,genomics,evolution
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
22
+ Requires-Python: >=3.10
9
23
  Description-Content-Type: text/markdown
10
24
  License-File: LICENSE
11
- Requires-Dist: numpy
12
- Requires-Dist: scipy
13
- Requires-Dist: matplotlib
14
- Requires-Dist: pandas
15
- Requires-Dist: scikit-allel
16
- Requires-Dist: scikit-learn
17
- Requires-Dist: tensorflow==2.15.0
18
- Requires-Dist: keras
25
+ Requires-Dist: numpy>=1.20
26
+ Requires-Dist: scipy>=1.7
27
+ Requires-Dist: matplotlib>=3.5
28
+ Requires-Dist: pandas>=1.3
29
+ Requires-Dist: scikit-allel>=1.3
30
+ Requires-Dist: scikit-learn>=1.0
31
+ Requires-Dist: tensorflow<3.0,>=2.13
32
+ Requires-Dist: keras>=2.13
33
+ Requires-Dist: numba>=0.56
19
34
  Provides-Extra: dev
35
+ Requires-Dist: pytest>=7.0; extra == "dev"
36
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
37
+ Requires-Dist: ruff>=0.1; extra == "dev"
38
+ Dynamic: license-file
20
39
 
21
40
  # diploS/HIC
22
41
  This repo contains the implementation for `diploS/HIC` as described in Kern and Schrider (2018; https://doi.org/10.1534/g3.118.200262), along
@@ -29,46 +48,38 @@ using simulation. 2) `diploS/HIC` training and performance evaluation. 3) Calcul
29
48
  genetic simulations must be performed using separate software such as discoal (https://github.com/kern-lab/discoal)
30
49
 
31
50
  ## Installation
32
- `diploS/HIC` has a number of dependencies that should be straightforward to install using python package managers
33
- such as `conda` or `pip`. The complete list of dependencies looks like this:
34
51
 
35
- - numpy
36
- - scipy
37
- - pandas
38
- - scikit-allel
39
- - scikit-learn
40
- - tensorflow
41
- - keras
52
+ `diploS/HIC` requires Python 3.10+ and has the following main dependencies:
42
53
 
43
- ## Install on linux
44
- I'm going to focus on the steps involved to install on a linux machine using Anaconda as our python source / main
45
- package manager. Assuming you have conda installed, create a new conda env
54
+ - numpy, scipy, pandas
55
+ - scikit-allel, scikit-learn
56
+ - tensorflow, keras
57
+ - numba
46
58
 
47
- ```
48
- $ conda create -n diploshic python=3.9 --yes
49
- ```
59
+ ### Install from PyPI (recommended)
50
60
 
51
- Note that because I'm using the Anaconda version of python, pip will only install this in the anaconda directory
52
- which is a good thing. Now we are ready to install `diploS/HIC` itself. We recommend using the binarys that
53
- we have packaged using pip. Simply type
54
-
55
- ```
61
+ ```bash
56
62
  pip install diploshic
57
63
  ```
58
64
 
59
- or if you prefer you can clone and build the repo yourself
65
+ Or using [uv](https://docs.astral.sh/uv/):
60
66
 
67
+ ```bash
68
+ uv pip install diploshic
61
69
  ```
62
- $ git clone https://github.com/kern-lab/diploSHIC.git
63
- $ cd diploSHIC
64
- $ pip install .
70
+
71
+ ### Install from source
72
+
73
+ ```bash
74
+ git clone https://github.com/kr-colab/diploSHIC.git
75
+ cd diploSHIC
76
+ pip install .
65
77
  ```
66
78
 
67
- This should automatically install all the dependencies including tensorflow.
68
- You will need to determine if
69
- you want to use a CPU-only implementation (probably) or a GPU implementation of tensorflow. See
70
- https://www.tensorflow.org/install/install_linux for install instructions.
79
+ ### GPU support
71
80
 
81
+ By default, TensorFlow installs with CPU support. For GPU acceleration, see
82
+ the [TensorFlow GPU installation guide](https://www.tensorflow.org/install/gpu)
72
83
 
73
84
  ## Usage
74
85
  The main program that you will interface with is `diploSHIC`. This script is installed by default
@@ -146,6 +157,14 @@ optional arguments:
146
157
  information (marked by 'N'). If specified, simulations
147
158
  will be masked in a manner mirroring windows drawn
148
159
  from this file.
160
+ --vcfForMaskFileName VCFFORMASKFILENAME
161
+ Path to a VCF file that contains genotype information. This will be used to mask genotypes in a manner that mirrors how the true data are masked.
162
+ --popForMask POPFORMASK
163
+ The label of the population for which we should draw genotype information from the VCF for masking purposes.
164
+ --sampleToPopFileName SAMPLETOPOPFILENAME
165
+ Path to tab delimited file with population assignments (used for genotype masking); format: SampleID popID
166
+ --unmaskedGenoFracCutoff UNMASKEDGENOFRACCUTOFF
167
+ Fraction of unmasked genotypes required to retain a site (default=0.75)
149
168
  --chrArmsForMasking CHRARMSFORMASKING
150
169
  A comma-separated list (no spaces) of chromosome arms
151
170
  from which we want to draw masking information (or
@@ -2,11 +2,9 @@ LICENSE
2
2
  MANIFEST.in
3
3
  README.md
4
4
  pyproject.toml
5
- setup.py
6
5
  diploSHIC.egg-info/PKG-INFO
7
6
  diploSHIC.egg-info/SOURCES.txt
8
7
  diploSHIC.egg-info/dependency_links.txt
9
- diploSHIC.egg-info/not-zip-safe
10
8
  diploSHIC.egg-info/requires.txt
11
9
  diploSHIC.egg-info/top_level.txt
12
10
  diploshic/__init__.py
@@ -20,9 +18,8 @@ diploshic/makeFeatureVecsForSingleMs_ogSHIC.py
20
18
  diploshic/makeTrainingSets.py
21
19
  diploshic/misc.py
22
20
  diploshic/msTools.py
21
+ diploshic/numba_stats.py
23
22
  diploshic/setup.py
24
- diploshic/shicstats.pyf
25
- diploshic/utils.c
26
23
  diploshic/testing/hard.fvec
27
24
  diploshic/testing/linkedHard.fvec
28
25
  diploshic/testing/linkedSoft.fvec
@@ -32,4 +29,5 @@ diploshic/training/hard.fvec
32
29
  diploshic/training/linkedHard.fvec
33
30
  diploshic/training/linkedSoft.fvec
34
31
  diploshic/training/neut.fvec
35
- diploshic/training/soft.fvec
32
+ diploshic/training/soft.fvec
33
+ tests/test_regression.py
@@ -0,0 +1,14 @@
1
+ numpy>=1.20
2
+ scipy>=1.7
3
+ matplotlib>=3.5
4
+ pandas>=1.3
5
+ scikit-allel>=1.3
6
+ scikit-learn>=1.0
7
+ tensorflow<3.0,>=2.13
8
+ keras>=2.13
9
+ numba>=0.56
10
+
11
+ [dev]
12
+ pytest>=7.0
13
+ pytest-cov>=4.0
14
+ ruff>=0.1
@@ -1,3 +1,3 @@
1
1
  from diploshic.fvTools import *
2
2
  from diploshic.msTools import *
3
- from diploshic.shicstats import *
3
+ from diploshic.numba_stats import *