consenrich 0.7.4b2__cp312-cp312-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- consenrich/__init__.py +11 -0
- consenrich/cconsenrich.c +48685 -0
- consenrich/cconsenrich.cpython-312-darwin.so +0 -0
- consenrich/cconsenrich.pyx +861 -0
- consenrich/consenrich.py +1381 -0
- consenrich/constants.py +172 -0
- consenrich/core.py +1428 -0
- consenrich/data/ce10.sizes +6 -0
- consenrich/data/ce10_blacklist.bed +100 -0
- consenrich/data/ce10_sparse.bed +11828 -0
- consenrich/data/ce11.sizes +6 -0
- consenrich/data/ce11_blacklist.bed +97 -0
- consenrich/data/ce11_sparse.bed +11828 -0
- consenrich/data/dm6.sizes +7 -0
- consenrich/data/dm6_blacklist.bed +182 -0
- consenrich/data/dm6_sparse.bed +20000 -0
- consenrich/data/hg19.sizes +24 -0
- consenrich/data/hg19_blacklist.bed +834 -0
- consenrich/data/hg19_sparse.bed +288358 -0
- consenrich/data/hg38.sizes +24 -0
- consenrich/data/hg38_blacklist.bed +636 -0
- consenrich/data/hg38_sparse.bed +288699 -0
- consenrich/data/mm10.sizes +21 -0
- consenrich/data/mm10_blacklist.bed +3435 -0
- consenrich/data/mm10_sparse.bed +100400 -0
- consenrich/data/mm39.sizes +21 -0
- consenrich/data/mm39_blacklist.bed +3360 -0
- consenrich/data/mm39_sparse.bed +100381 -0
- consenrich/detrorm.py +249 -0
- consenrich/matching.py +901 -0
- consenrich/misc_util.py +122 -0
- consenrich-0.7.4b2.dist-info/METADATA +65 -0
- consenrich-0.7.4b2.dist-info/RECORD +37 -0
- consenrich-0.7.4b2.dist-info/WHEEL +6 -0
- consenrich-0.7.4b2.dist-info/entry_points.txt +2 -0
- consenrich-0.7.4b2.dist-info/licenses/LICENSE +21 -0
- consenrich-0.7.4b2.dist-info/top_level.txt +1 -0
consenrich/misc_util.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
r"""
|
|
3
|
+
==============================================================================
|
|
4
|
+
`consenrich.misc_util` -- Miscellaneous utility functions
|
|
5
|
+
==============================================================================
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from typing import List, Optional, Tuple
|
|
11
|
+
import logging
|
|
12
|
+
import re
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import pybedtools as bed
|
|
16
|
+
import pysam as sam
|
|
17
|
+
|
|
18
|
+
from scipy import signal, ndimage
|
|
19
|
+
|
|
20
|
+
logging.basicConfig(
|
|
21
|
+
level=logging.INFO,
|
|
22
|
+
format="%(asctime)s - %(module)s.%(funcName)s - %(levelname)s - %(message)s",
|
|
23
|
+
)
|
|
24
|
+
logging.basicConfig(
|
|
25
|
+
level=logging.WARNING,
|
|
26
|
+
format="%(asctime)s - %(module)s.%(funcName)s - %(levelname)s - %(message)s",
|
|
27
|
+
)
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def checkBamFile(bamFile: str) -> bool:
|
|
32
|
+
r"""Check that the bam file exists and is indexed
|
|
33
|
+
|
|
34
|
+
Assumes the bam file is sorted by coordinates
|
|
35
|
+
"""
|
|
36
|
+
has_index = False
|
|
37
|
+
if not os.path.exists(bamFile):
|
|
38
|
+
raise FileNotFoundError(f"Could not find {bamFile}")
|
|
39
|
+
try:
|
|
40
|
+
bamfile = sam.AlignmentFile(bamFile, "rb")
|
|
41
|
+
has_index = bamfile.check_index()
|
|
42
|
+
bamfile.close()
|
|
43
|
+
except AttributeError as aex:
|
|
44
|
+
logger.info(f"Alignments must be in BAM format:\n{aex}")
|
|
45
|
+
raise
|
|
46
|
+
except ValueError as vex:
|
|
47
|
+
has_index = False
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
if not has_index:
|
|
51
|
+
try:
|
|
52
|
+
logger.info(
|
|
53
|
+
f"Could not find index file for {bamFile}. Calling pysam.index()"
|
|
54
|
+
)
|
|
55
|
+
sam.index(bamFile)
|
|
56
|
+
has_index = True
|
|
57
|
+
except Exception as ex:
|
|
58
|
+
logger.warning(
|
|
59
|
+
f"Encountered the following exception\n{ex}\nCould not create index file for {bamFile}: is it sorted?"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return has_index
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def bamsArePairedEnd(
|
|
66
|
+
bamFiles: List[str], maxReads: int = 1_000
|
|
67
|
+
) -> List[bool]:
|
|
68
|
+
"""
|
|
69
|
+
Take a list of BAM files, return a list (bool) indicating whether
|
|
70
|
+
each BAM contains paired-end reads (True) or only single-end reads (False).
|
|
71
|
+
|
|
72
|
+
:param bamFiles: List of paths to BAM files
|
|
73
|
+
:type bamFiles: List[str]
|
|
74
|
+
:param maxReads: Maximum number of reads to check in each BAM file
|
|
75
|
+
:type maxReads: int
|
|
76
|
+
:return: List of booleans corresponding to each BAM file
|
|
77
|
+
:rtype: List[bool]
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
results = []
|
|
81
|
+
for path in bamFiles:
|
|
82
|
+
paired = False
|
|
83
|
+
seen = 0
|
|
84
|
+
with sam.AlignmentFile(path, "rb") as bam:
|
|
85
|
+
for rec in bam.fetch(until_eof=True):
|
|
86
|
+
if rec.is_paired:
|
|
87
|
+
paired = True
|
|
88
|
+
break
|
|
89
|
+
seen += 1
|
|
90
|
+
if maxReads is not None and seen >= maxReads:
|
|
91
|
+
break
|
|
92
|
+
results.append(paired)
|
|
93
|
+
return results
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def getChromSizesDict(
|
|
97
|
+
sizes_file: str,
|
|
98
|
+
excludeRegex: str = r"^chr[A-Za-z0-9]+$",
|
|
99
|
+
excludeChroms: Optional[List[str]] = None,
|
|
100
|
+
) -> dict:
|
|
101
|
+
r"""The function getChromSizesDict is a helper to get chromosome sizes file as a dictionary.
|
|
102
|
+
:param sizes_file: Path to a genome assembly's chromosome sizes file
|
|
103
|
+
:param exclude_regex: Regular expression to exclude chromosomes. Default: all non-standard chromosomes.
|
|
104
|
+
:param exclude_chroms: List of chromosomes to exclude.
|
|
105
|
+
:return: Dictionary of chromosome sizes. Formatted as `{chromosome_name: size}`
|
|
106
|
+
"""
|
|
107
|
+
if excludeChroms is None:
|
|
108
|
+
excludeChroms = []
|
|
109
|
+
return {
|
|
110
|
+
k: v
|
|
111
|
+
for k, v in pd.read_csv(
|
|
112
|
+
sizes_file,
|
|
113
|
+
sep="\t",
|
|
114
|
+
header=None,
|
|
115
|
+
index_col=0,
|
|
116
|
+
names=["chrom", "size"],
|
|
117
|
+
)["size"]
|
|
118
|
+
.to_dict()
|
|
119
|
+
.items()
|
|
120
|
+
if re.search(excludeRegex, k) is not None
|
|
121
|
+
and k not in excludeChroms
|
|
122
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: consenrich
|
|
3
|
+
Version: 0.7.4b2
|
|
4
|
+
Summary: Genome-wide estimation of signals hidden in noisy multi-sample HTS datasets
|
|
5
|
+
Author-email: "Nolan H. Hamilton" <nolan.hamilton@unc.edu>
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: cython>=3.0
|
|
10
|
+
Requires-Dist: numpy>=2.1
|
|
11
|
+
Requires-Dist: scipy>=1.15
|
|
12
|
+
Requires-Dist: pandas>=2.3.0
|
|
13
|
+
Requires-Dist: pysam>=0.23.3
|
|
14
|
+
Requires-Dist: pybedtools>=0.11.2
|
|
15
|
+
Requires-Dist: PyYAML>=6.0.2
|
|
16
|
+
Requires-Dist: PyWavelets>=1.9.0
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest; extra == "dev"
|
|
19
|
+
Requires-Dist: mypy; extra == "dev"
|
|
20
|
+
Requires-Dist: ruff; extra == "dev"
|
|
21
|
+
Requires-Dist: build; extra == "dev"
|
|
22
|
+
Requires-Dist: twine; extra == "dev"
|
|
23
|
+
Provides-Extra: docs
|
|
24
|
+
Requires-Dist: sphinx>=7.3; extra == "docs"
|
|
25
|
+
Requires-Dist: sphinx-press-theme; extra == "docs"
|
|
26
|
+
Requires-Dist: furo; extra == "docs"
|
|
27
|
+
Requires-Dist: myst-parser>=2.0; extra == "docs"
|
|
28
|
+
Requires-Dist: sphinx-autodoc-typehints>=2.1; extra == "docs"
|
|
29
|
+
Requires-Dist: sphinx-autobuild>=2025.2; extra == "docs"
|
|
30
|
+
Requires-Dist: sphinx-copybutton>=0.5.0; extra == "docs"
|
|
31
|
+
Requires-Dist: sphinxcontrib-bibtex>=2.4.0; extra == "docs"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
Dynamic: requires-python
|
|
34
|
+
|
|
35
|
+
# Consenrich
|
|
36
|
+
|
|
37
|
+
Consenrich is an adaptive linear state estimator that yields genome-wide, uncertainty-calibrated signal tracks from noisy multi-sample cohorts' epigenetic HTS data.
|
|
38
|
+
|
|
39
|
+
<p align="center">
|
|
40
|
+
<img src="docs/images/noise.png" alt="Simplified Schematic of Consenrich." width="600">
|
|
41
|
+
</p>
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
Special emphasis is placed on computational efficiency, model interpretability, and practical utility for downstream tasks that require well-resolved genome-wide signal estimates and uncertainty quantification across samples, such as:
|
|
45
|
+
|
|
46
|
+
* Consensus detection of open chromatin regions, TF binding, histone modification, etc.
|
|
47
|
+
* Candidate prioritization for differential analyses, functional validation, integrative modeling, etc.
|
|
48
|
+
|
|
49
|
+
[**See the Documentation**](https://nolan-h-hamilton.github.io/Consenrich/) for usage examples, installation details, configuration options, and an API reference.
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
## Manuscript Preprint and Citation
|
|
53
|
+
|
|
54
|
+
**BibTeX Citation**
|
|
55
|
+
|
|
56
|
+
```bibtex
|
|
57
|
+
@article {Hamilton2025,
|
|
58
|
+
author = {Hamilton, Nolan H and Huang, Yu-Chen E and McMichael, Benjamin D and Love, Michael I and Furey, Terrence S},
|
|
59
|
+
title = {Genome-Wide Uncertainty-Moderated Extraction of Signal Annotations from Multi-Sample Functional Genomics Data},
|
|
60
|
+
year = {2025},
|
|
61
|
+
doi = {10.1101/2025.02.05.636702},
|
|
62
|
+
publisher = {Cold Spring Harbor Laboratory},
|
|
63
|
+
journal = {bioRxiv}
|
|
64
|
+
}
|
|
65
|
+
```
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
consenrich-0.7.4b2.dist-info/RECORD,,
|
|
2
|
+
consenrich-0.7.4b2.dist-info/WHEEL,sha256=mer8vOuI-KlBtJIdzgyZALHdls6RZGRaE6RPt7dfKrk,138
|
|
3
|
+
consenrich-0.7.4b2.dist-info/entry_points.txt,sha256=EXIGcSTyc16vtPkfHJNs-aaSVoq6Pn5Ev5GeW1JCYn0,58
|
|
4
|
+
consenrich-0.7.4b2.dist-info/top_level.txt,sha256=jsWw8GC11dOKPcdULXk-ggW357CO-vTNW6ssryqa21Q,11
|
|
5
|
+
consenrich-0.7.4b2.dist-info/METADATA,sha256=yNgRmON1PjuxjF_201a5T30Wd3j6Bxfwt-p_GYck_7c,2559
|
|
6
|
+
consenrich-0.7.4b2.dist-info/licenses/LICENSE,sha256=w09-aT0LFrH_cL3ShJWz-xusRpbThwTpdmBD08tc8EI,1074
|
|
7
|
+
consenrich/detrorm.py,sha256=Wm3M0LH5R0UWJpKlQqxrv2xDlWMDP_DyYwEdnUI2itI,8946
|
|
8
|
+
consenrich/misc_util.py,sha256=GteSAUuWUVLV1DYWoOOikLfGI3J3gW7JKZrCBet3f6I,3689
|
|
9
|
+
consenrich/constants.py,sha256=yDehsQVmkNXeji0PUbYzJHjILPJgXIkOdznoBHWsNyo,5091
|
|
10
|
+
consenrich/__init__.py,sha256=UEAddImbk7lAPy3WyuI6kaOuSWDGPxQ8goF3l0na2NU,320
|
|
11
|
+
consenrich/core.py,sha256=CuXwBYCg1aXkfRC41vduSi7Q6d8VyTDV4WI8rITQ4C8,57436
|
|
12
|
+
consenrich/cconsenrich.cpython-312-darwin.so,sha256=d7P9Z_3GOaWTS0xqihzJU10l7P0IYf6N5ioJXXpPnJo,366120
|
|
13
|
+
consenrich/matching.py,sha256=RLF9q3HP1j6tvrg4FKCp-K1kGcoyTWn5L0S-SWMIu4A,31775
|
|
14
|
+
consenrich/consenrich.py,sha256=lqQt1_gGV-RsyTtHSzKI0BmAndZI8Yu0pw3Ihj55GO8,46540
|
|
15
|
+
consenrich/cconsenrich.pyx,sha256=6HOWSGdWhb79tCm1ErulHWtczT7lw-WzbXDQJTtjcvg,33490
|
|
16
|
+
consenrich/cconsenrich.c,sha256=v-K0n5rSX0pDqOiQhjrUt_fidOvc4bHQA_4D2l71FtE,2071670
|
|
17
|
+
consenrich/data/hg19.sizes,sha256=ewnSktA_Ps1BoiU3yeLNaJxJO-taq3uKE4-6JUnly1o,365
|
|
18
|
+
consenrich/data/mm10.sizes,sha256=sgHMQGBr6yEm4hKqeN3auQJdlMX4ZdqDQ23EdDmWedU,320
|
|
19
|
+
consenrich/data/hg38_blacklist.bed,sha256=zbamE4bP5JJT-ivyDp61LjvaI00Wo-vd0oKtFxy_u4E,14940
|
|
20
|
+
consenrich/data/ce10_sparse.bed,sha256=0jIOO1CW5itA9HsERvJ8E9Ta-bLtFipYXg76OQBPYIY,263712
|
|
21
|
+
consenrich/data/ce10_blacklist.bed,sha256=JIAbS0PbeVmtIy7MiZRZyEGK4yvA0ek0q2sjxaOAXyY,2215
|
|
22
|
+
consenrich/data/dm6_blacklist.bed,sha256=WARyyesi9b86fFXbbEi5JvBBatnYNgtavkIcxH9Z9Ww,4120
|
|
23
|
+
consenrich/data/mm39_blacklist.bed,sha256=xbWP9aa9LZFZGTFzSU4JGnTaLlpt3A3_OTLZUZ672zI,79975
|
|
24
|
+
consenrich/data/hg38.sizes,sha256=IfUNvzVusS77dscRz5LlHykgiffc6ckyMPp77Z1fpOE,365
|
|
25
|
+
consenrich/data/mm10_blacklist.bed,sha256=RiC_83MGGXSgvZo3vhCsJR5ixCr8WbONRSN-jOqel-4,81653
|
|
26
|
+
consenrich/data/mm39_sparse.bed,sha256=am6wiZvVaKtOYuyHacooDodCHKsiA1e3Bta5zNdYNA8,2409256
|
|
27
|
+
consenrich/data/ce11_sparse.bed,sha256=a-XyExCkosFoOmTnwdVnU9817BKRtT7d2AZqhCCmlMs,263712
|
|
28
|
+
consenrich/data/ce11.sizes,sha256=94La5R1EjWf6QauJiUKCMS5N7y_XKmvnU01BKbSBTZs,88
|
|
29
|
+
consenrich/data/hg19_blacklist.bed,sha256=3oF2nmPFuSxZ5LWE9mPvMCQzgy-kBoFTVdMd7z8al00,19821
|
|
30
|
+
consenrich/data/hg38_sparse.bed,sha256=hqQTeSJVlzWEZqq_DPJgKS9NIHhpa8Tur0UYqClcXQw,6902926
|
|
31
|
+
consenrich/data/mm39.sizes,sha256=LpkYY8S_gNuuXyrUG0S0DmDVF1Bkh0Fg_PzMH7otdGE,320
|
|
32
|
+
consenrich/data/hg19_sparse.bed,sha256=kNq94yialQCLNp-tbPqIdFYq3A_7L8Coxo0w3cl1bH0,6897180
|
|
33
|
+
consenrich/data/ce11_blacklist.bed,sha256=3etEe4n3o61N1HSx7pS9vYtEUWELq1VT_DOTDjLoQc0,2153
|
|
34
|
+
consenrich/data/mm10_sparse.bed,sha256=ukvVuheKf4gxzAzXUbKZ1sewsmP-WOsUz60ySENkmuw,2409812
|
|
35
|
+
consenrich/data/ce10.sizes,sha256=1crlJDro3lhUpECU_2g9eVIUFM27mw0uMzWqtdmThgA,88
|
|
36
|
+
consenrich/data/dm6.sizes,sha256=0MNGeHSdAwu-Ym_ApUlHoDybDZDNyqYhbVbR_srZiEY,100
|
|
37
|
+
consenrich/data/dm6_sparse.bed,sha256=H9wvrZs2ACdtUbDZhLXEEJ4xL5OpKOxhU_Pqf2EyDOM,445849
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Nolan H. Hamilton
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
consenrich
|