REDItools3 3.1a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of REDItools3 might be problematic. Click here for more details.
- REDItools3-3.1a0.dist-info/LICENSE +674 -0
- REDItools3-3.1a0.dist-info/METADATA +36 -0
- REDItools3-3.1a0.dist-info/RECORD +21 -0
- REDItools3-3.1a0.dist-info/WHEEL +5 -0
- REDItools3-3.1a0.dist-info/top_level.txt +1 -0
- reditools/__init__.py +1 -0
- reditools/__main__.py +37 -0
- reditools/alignment_file.py +146 -0
- reditools/alignment_manager.py +136 -0
- reditools/analyze.py +552 -0
- reditools/compiled_position.py +133 -0
- reditools/compiled_reads.py +131 -0
- reditools/fasta_file.py +68 -0
- reditools/file_utils.py +132 -0
- reditools/homopolymerics.py +92 -0
- reditools/index.py +268 -0
- reditools/logger.py +44 -0
- reditools/reditools.py +456 -0
- reditools/region.py +130 -0
- reditools/rtchecks.py +274 -0
- reditools/utils.py +106 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: REDItools3
|
|
3
|
+
Version: 3.1a0
|
|
4
|
+
Author: Ernesto Picardi
|
|
5
|
+
Author-email: Adam Handen <adam.handen@gmail.com>
|
|
6
|
+
Project-URL: homepage, https://github.com/BioinfoUNIBA/REDItools3
|
|
7
|
+
Project-URL: repository, https://github.com/BioinfoUNIBA/REDItools3
|
|
8
|
+
Project-URL: issues, https://github.com/BioinfoUNIBA/REDItools3/issues
|
|
9
|
+
Keywords: bioinformatics,RNA,RNA-editing
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: GNU General Public License (GPL)
|
|
14
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
15
|
+
Classifier: Operating System :: Unix
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
18
|
+
Requires-Python: >=3.7
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: pysam >=0.22.0
|
|
22
|
+
Requires-Dist: sortedcontainers >=2.4.0
|
|
23
|
+
|
|
24
|
+
# REDItools3
|
|
25
|
+
A new REDItools implementation to speed-up the RNA editing profiling in massive RNAseq data
|
|
26
|
+
|
|
27
|
+
# Installation
|
|
28
|
+
Install from PyPi.
|
|
29
|
+
`pip install REDItools3`
|
|
30
|
+
|
|
31
|
+
Use the whl file under the dist directory.
|
|
32
|
+
`pip install dist/reditools-0.1-py3-none-any.whl`
|
|
33
|
+
|
|
34
|
+
# Usage
|
|
35
|
+
Once installed, reditools can be run from the commandline.
|
|
36
|
+
`python -m reditools`
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
reditools/__init__.py,sha256=7nSB0hrQznxrn6l95cv_pSonJTG6jZCQdbn7aT1TtvY,46
|
|
2
|
+
reditools/__main__.py,sha256=mWJ9O2LDiOpBWDBJJUN7OiM4SyltW-kVXXAGBe_JxgQ,842
|
|
3
|
+
reditools/alignment_file.py,sha256=YFyCEhMek2t93DpmpwEst5v3gDZkmRotbd6Fy_mP0aE,4258
|
|
4
|
+
reditools/alignment_manager.py,sha256=_FXwvqGWoXRdzVrwBxki2heaVZA2cQbGXqCopr-g1Hs,4138
|
|
5
|
+
reditools/analyze.py,sha256=u38yN5DmXUCW8nQP_BMfsXuvb59rFO12di5cYT8Ye58,15280
|
|
6
|
+
reditools/compiled_position.py,sha256=v540uUEie_HHUwsYQmBqeeOkUvtYlcnWj1v8gAhLUiE,3858
|
|
7
|
+
reditools/compiled_reads.py,sha256=7Hm5f7g1T8q1zDOOxZUD7aZax9b7SdQ0PlmT93hmcaE,4154
|
|
8
|
+
reditools/fasta_file.py,sha256=KBsJBs7OnBpew2PGWGp0mTxPLlpBmRrtXL4uvQw4t34,2212
|
|
9
|
+
reditools/file_utils.py,sha256=AJjU9leOxSou5U_4RAgapR9PGQz0OYQlkCudvTcXGeQ,3284
|
|
10
|
+
reditools/homopolymerics.py,sha256=BCYXBJa6YuouzccFisBFOtGfZAEOSqeqJsO-c37At84,2123
|
|
11
|
+
reditools/index.py,sha256=K3JQTMx4ojUUiPQTDMDsoYoFQQ_o-ZNqTrh5dIVFVSQ,7398
|
|
12
|
+
reditools/logger.py,sha256=u4L2SYxy4vJ4KDHEymd0b1sCa8BXXHchx8LR_wcFq1A,1210
|
|
13
|
+
reditools/reditools.py,sha256=Rb5bllqjE1wHti98p-v2t4Vu-YEvZgNv-FXcUPgDVO0,12725
|
|
14
|
+
reditools/region.py,sha256=_BiKDc5lCl1snjkokRiUWOgzA57ME3yLydEIwK9ku7U,3780
|
|
15
|
+
reditools/rtchecks.py,sha256=tkaosQDBc2XN_RlVMtNwrxZjCQoQo2bWfQISROXCmKA,8221
|
|
16
|
+
reditools/utils.py,sha256=a2qfhMcrH2QlK-JoR-HHF6_bnlo5v3jihAqqknvVIjc,2733
|
|
17
|
+
REDItools3-3.1a0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
18
|
+
REDItools3-3.1a0.dist-info/METADATA,sha256=EPD47hLxZoozfc0Gd4uFPOaid9uz81DkWI4Pkv0STpo,1289
|
|
19
|
+
REDItools3-3.1a0.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
|
|
20
|
+
REDItools3-3.1a0.dist-info/top_level.txt,sha256=wrvvbFXhmNg7s6LQqjlV_fVQYUZOOpF93IcMu_hBCx4,10
|
|
21
|
+
REDItools3-3.1a0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
reditools
|
reditools/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""REDItools3 - RNA Editing Analysis Tool."""
|
reditools/__main__.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Commandline tool for REDItools."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from reditools import analyze, homopolymerics, index
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def usage():
|
|
9
|
+
"""Print program usage."""
|
|
10
|
+
print("""usage: reditools {analyze,find-repeats,index}
|
|
11
|
+
|
|
12
|
+
REDItools3
|
|
13
|
+
|
|
14
|
+
Run Modes:
|
|
15
|
+
analyze Find editing events in one or more alignment files.
|
|
16
|
+
|
|
17
|
+
find-repeats Find repetitive elements in a genome.
|
|
18
|
+
|
|
19
|
+
index Calculate editing indices from the output of `analyze`
|
|
20
|
+
mode.
|
|
21
|
+
""")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
if __name__ == '__main__':
|
|
25
|
+
if len(sys.argv) > 1:
|
|
26
|
+
command = sys.argv.pop(1)
|
|
27
|
+
match command:
|
|
28
|
+
case 'analyze':
|
|
29
|
+
analyze.main()
|
|
30
|
+
case 'find-repeats':
|
|
31
|
+
homopolymerics.main()
|
|
32
|
+
case 'index':
|
|
33
|
+
index.main()
|
|
34
|
+
case _:
|
|
35
|
+
usage()
|
|
36
|
+
else:
|
|
37
|
+
usage()
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Wrappers for pysam files."""
|
|
2
|
+
|
|
3
|
+
from pysam.libcalignmentfile import AlignmentFile as PysamAlignmentFile
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class RTAlignmentFile(PysamAlignmentFile):
|
|
7
|
+
"""Wrapper for pysam.AlignmentFile to provide filtering on fetch."""
|
|
8
|
+
|
|
9
|
+
def __new__(cls, *args, **kwargs):
|
|
10
|
+
"""
|
|
11
|
+
Create a wrapper for pysam.AlignmentFile.
|
|
12
|
+
|
|
13
|
+
Parameters:
|
|
14
|
+
*args (list): Positional arguments for pysam.FastaFile()
|
|
15
|
+
**kwargs (dict): Keyword arguments for pysam.FastaFile()
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
PysamAlignmentFile
|
|
19
|
+
"""
|
|
20
|
+
kwargs.pop('min_quality', None)
|
|
21
|
+
kwargs.pop('min_length', None)
|
|
22
|
+
return PysamAlignmentFile.__new__(cls, *args, **kwargs)
|
|
23
|
+
|
|
24
|
+
def __init__(self, *args, min_quality=0, min_length=0, **kwargs):
|
|
25
|
+
"""
|
|
26
|
+
Create a wrapper for pysam.AlignmentFile.
|
|
27
|
+
|
|
28
|
+
Parameters:
|
|
29
|
+
*args (list): Positional arguments for pysam.FastaFile()
|
|
30
|
+
min_quality (int): Minimum read quality
|
|
31
|
+
min_length (int): Minimum read length
|
|
32
|
+
**kwargs (dict): Keyword arguments for pysam.FastaFile()
|
|
33
|
+
"""
|
|
34
|
+
PysamAlignmentFile.__init__(self)
|
|
35
|
+
|
|
36
|
+
self._checklist = []
|
|
37
|
+
|
|
38
|
+
if min_quality > 0:
|
|
39
|
+
self._min_quality = min_quality
|
|
40
|
+
self._checklist.append(self._check_quality)
|
|
41
|
+
|
|
42
|
+
if min_length > 0:
|
|
43
|
+
self._min_length = min_length
|
|
44
|
+
self._checklist.append(self._check_length)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def exclude_reads(self):
|
|
48
|
+
"""
|
|
49
|
+
Names of reads not to be fetched.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
iterable
|
|
53
|
+
"""
|
|
54
|
+
return self._exclude_reads
|
|
55
|
+
|
|
56
|
+
@exclude_reads.setter
|
|
57
|
+
def exclude_reads(self, read_names):
|
|
58
|
+
"""
|
|
59
|
+
Provide a list of read names to be skipped during fetch.
|
|
60
|
+
|
|
61
|
+
Parameters:
|
|
62
|
+
read_names (iterable): Reads to skip
|
|
63
|
+
"""
|
|
64
|
+
self._exclude_reads = set(read_names)
|
|
65
|
+
self._checklist.append(self._check_read_name)
|
|
66
|
+
|
|
67
|
+
def fetch(self, *args, **kwargs):
|
|
68
|
+
"""
|
|
69
|
+
Fetch reads aligned in a region.
|
|
70
|
+
|
|
71
|
+
Parameters:
|
|
72
|
+
*args (list): Positional arguments for pysam.FastaFile.fetch
|
|
73
|
+
*kwargs (list): Keyword arguments for pysam.FastaFile.fetch
|
|
74
|
+
|
|
75
|
+
Yields:
|
|
76
|
+
Reads
|
|
77
|
+
"""
|
|
78
|
+
if 'region' in kwargs:
|
|
79
|
+
kwargs['region'] = str(kwargs['region']) # noqa:WPS529
|
|
80
|
+
try:
|
|
81
|
+
iterator = super().fetch(*args, **kwargs)
|
|
82
|
+
except ValueError:
|
|
83
|
+
return
|
|
84
|
+
for read in iterator:
|
|
85
|
+
if self._check_read(read):
|
|
86
|
+
yield read
|
|
87
|
+
|
|
88
|
+
def fetch_by_position(self, *args, **kwargs):
|
|
89
|
+
"""
|
|
90
|
+
Retrieve reads that all start at the same point on the reference.
|
|
91
|
+
|
|
92
|
+
Parameters:
|
|
93
|
+
*args (list): Positional arguments for fetch
|
|
94
|
+
**kwargs (dict): Named arguments for fetch
|
|
95
|
+
|
|
96
|
+
Yields:
|
|
97
|
+
Lists containing reads
|
|
98
|
+
"""
|
|
99
|
+
iterator = self.fetch(*args, **kwargs)
|
|
100
|
+
|
|
101
|
+
first_read = next(iterator, None)
|
|
102
|
+
if first_read is None:
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
reads = [first_read]
|
|
106
|
+
ref_start = first_read.reference_start
|
|
107
|
+
|
|
108
|
+
for read in iterator:
|
|
109
|
+
if read.reference_start == ref_start:
|
|
110
|
+
reads.append(read)
|
|
111
|
+
else:
|
|
112
|
+
yield reads
|
|
113
|
+
reads = [read]
|
|
114
|
+
ref_start = read.reference_start
|
|
115
|
+
yield reads
|
|
116
|
+
|
|
117
|
+
# 77: NOT_MAPPED
|
|
118
|
+
# 141: NOT_MAPPED
|
|
119
|
+
# 512: QC_FAIL
|
|
120
|
+
# 256: IS_SECONDARY
|
|
121
|
+
# 2048: IS_SUPPLEMENTARY
|
|
122
|
+
# 1024: IS_DUPLICATE
|
|
123
|
+
_flags_to_toss = {77, 141, 512, 256, 2048, 1024}
|
|
124
|
+
_paired_flags_to_keep = {99, 147, 83, 163}
|
|
125
|
+
|
|
126
|
+
def _check_quality(self, read):
|
|
127
|
+
return read.mapping_quality >= self._min_quality
|
|
128
|
+
|
|
129
|
+
def _check_length(self, read):
|
|
130
|
+
return read.query_length >= self._min_length
|
|
131
|
+
|
|
132
|
+
def _check_read_name(self, read):
|
|
133
|
+
return read.query_name not in self._exclude_reads
|
|
134
|
+
|
|
135
|
+
def _check_read(self, read):
|
|
136
|
+
if read.has_tag('SA'):
|
|
137
|
+
return False
|
|
138
|
+
if read.flag in self._flags_to_toss:
|
|
139
|
+
return False
|
|
140
|
+
if read.is_paired and read.flag not in self._paired_flags_to_keep:
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
for check in self._checklist:
|
|
144
|
+
if not check(read):
|
|
145
|
+
return False
|
|
146
|
+
return True
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Wrappers for pysam files."""
|
|
2
|
+
from itertools import chain
|
|
3
|
+
|
|
4
|
+
from reditools.alignment_file import RTAlignmentFile
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ReadGroupIter(object):
|
|
8
|
+
"""Manages multiple fetch iterators."""
|
|
9
|
+
|
|
10
|
+
_iter_idx = 0
|
|
11
|
+
_reads_idx = 1
|
|
12
|
+
_start_idx = 2
|
|
13
|
+
|
|
14
|
+
def __init__(self, fetch_iters):
|
|
15
|
+
"""
|
|
16
|
+
Combine multiple fetch iterators.
|
|
17
|
+
|
|
18
|
+
Parameters:
|
|
19
|
+
fetch_iters (iterable): The iterators to combine.
|
|
20
|
+
"""
|
|
21
|
+
self._read_groups = []
|
|
22
|
+
for itr in fetch_iters:
|
|
23
|
+
reads = next(itr, None)
|
|
24
|
+
if reads is None:
|
|
25
|
+
continue
|
|
26
|
+
start = reads[0].reference_start
|
|
27
|
+
self._read_groups.append({
|
|
28
|
+
self._iter_idx: itr,
|
|
29
|
+
self._reads_idx: reads,
|
|
30
|
+
self._start_idx: start,
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
def is_empty(self):
|
|
34
|
+
"""
|
|
35
|
+
Check if there are still reads left.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
bool: True if empty, else False
|
|
39
|
+
"""
|
|
40
|
+
return not self._read_groups
|
|
41
|
+
|
|
42
|
+
def next(self):
|
|
43
|
+
"""
|
|
44
|
+
Retrieve a list of reads that all start at the same position.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
list: Reads
|
|
48
|
+
"""
|
|
49
|
+
position = self._find_start()
|
|
50
|
+
reads = []
|
|
51
|
+
for idx in range(len(self._read_groups) - 1, -1, -1):
|
|
52
|
+
group = self._read_groups[idx]
|
|
53
|
+
if group[self._start_idx] == position:
|
|
54
|
+
reads.append(group[self._reads_idx])
|
|
55
|
+
next_reads = next(group[self._iter_idx], None)
|
|
56
|
+
if next_reads is None:
|
|
57
|
+
self._read_groups.pop(idx)
|
|
58
|
+
else:
|
|
59
|
+
self._read_groups[idx] = {
|
|
60
|
+
self._iter_idx: group[self._iter_idx],
|
|
61
|
+
self._reads_idx: next_reads,
|
|
62
|
+
self._start_idx: next_reads[0].reference_start,
|
|
63
|
+
}
|
|
64
|
+
return reads
|
|
65
|
+
|
|
66
|
+
def _find_start(self):
|
|
67
|
+
return min(group[self._start_idx] for group in self._read_groups)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class AlignmentManager(object):
|
|
71
|
+
"""
|
|
72
|
+
Manage multiple RTAlignmentFiles with a single fetch.
|
|
73
|
+
|
|
74
|
+
Attributes:
|
|
75
|
+
min_quality (int): Minimum read quality (applied during add_file)
|
|
76
|
+
min_length (int): Minimum read length (applied during add_file)
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self, *args, **kwargs):
|
|
80
|
+
"""
|
|
81
|
+
Create a new manager.
|
|
82
|
+
|
|
83
|
+
Parameters:
|
|
84
|
+
*args (list): positional arguments for PysamFastaFile
|
|
85
|
+
constructor
|
|
86
|
+
**kwargs (dict): named arguments for PysamFastaFile
|
|
87
|
+
constructor
|
|
88
|
+
"""
|
|
89
|
+
self._bam_args = args
|
|
90
|
+
self._bam_kwargs = kwargs
|
|
91
|
+
self._bams = []
|
|
92
|
+
self.min_quality = 0
|
|
93
|
+
self.min_length = 0
|
|
94
|
+
self.file_list = []
|
|
95
|
+
|
|
96
|
+
def add_file(self, fname, exclude_reads=None):
|
|
97
|
+
"""
|
|
98
|
+
Add an alignment file to the manager for analysis.
|
|
99
|
+
|
|
100
|
+
Parameters:
|
|
101
|
+
fname (str): Path to BAM file
|
|
102
|
+
exclude_reads (set): Read names not to skip
|
|
103
|
+
"""
|
|
104
|
+
new_file = RTAlignmentFile(
|
|
105
|
+
fname,
|
|
106
|
+
*self._bam_args,
|
|
107
|
+
min_quality=self.min_quality,
|
|
108
|
+
min_length=self.min_length,
|
|
109
|
+
**self._bam_kwargs,
|
|
110
|
+
)
|
|
111
|
+
new_file.check_index()
|
|
112
|
+
if exclude_reads:
|
|
113
|
+
new_file.exclude_reads = exclude_reads
|
|
114
|
+
self._bams.append(new_file)
|
|
115
|
+
self.file_list.append(fname)
|
|
116
|
+
|
|
117
|
+
def fetch_by_position(self, *args, **kwargs):
|
|
118
|
+
"""
|
|
119
|
+
Perform combine fetch_by_position for all managed files.
|
|
120
|
+
|
|
121
|
+
Parameters:
|
|
122
|
+
*args (list): Positional arguments for
|
|
123
|
+
RTAlignmentFile.fetch_by_position
|
|
124
|
+
**kwargs (dict): Named arguments for
|
|
125
|
+
RTAlignmentFile.fetch_by_position
|
|
126
|
+
|
|
127
|
+
Yields:
|
|
128
|
+
list: reads from all managed files that begin at the same position.
|
|
129
|
+
"""
|
|
130
|
+
iters = [bam.fetch_by_position(*args, **kwargs) for bam in self._bams]
|
|
131
|
+
rgi = ReadGroupIter(iters)
|
|
132
|
+
while not rgi.is_empty():
|
|
133
|
+
reads = list(chain(*rgi.next()))
|
|
134
|
+
self.position = reads[0].reference_start
|
|
135
|
+
self.contig = reads[0].reference_name
|
|
136
|
+
yield reads
|