REDItools3 3.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of REDItools3 might be problematic. Click here for more details.

@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.1
2
+ Name: REDItools3
3
+ Version: 3.1a0
4
+ Author: Ernesto Picardi
5
+ Author-email: Adam Handen <adam.handen@gmail.com>
6
+ Project-URL: homepage, https://github.com/BioinfoUNIBA/REDItools3
7
+ Project-URL: repository, https://github.com/BioinfoUNIBA/REDItools3
8
+ Project-URL: issues, https://github.com/BioinfoUNIBA/REDItools3/issues
9
+ Keywords: bioinformatics,RNA,RNA-editing
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: GNU General Public License (GPL)
14
+ Classifier: Operating System :: MacOS :: MacOS X
15
+ Classifier: Operating System :: Unix
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
+ Requires-Python: >=3.7
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: pysam >=0.22.0
22
+ Requires-Dist: sortedcontainers >=2.4.0
23
+
24
+ # REDItools3
25
+ A new REDItools implementation to speed-up the RNA editing profiling in massive RNAseq data
26
+
27
+ # Installation
28
+ Install from PyPi.
29
+ `pip install REDItools3`
30
+
31
+ Use the whl file under the dist directory.
32
+ `pip install dist/reditools-0.1-py3-none-any.whl`
33
+
34
+ # Usage
35
+ Once installed, reditools can be run from the commandline.
36
+ `python -m reditools`
@@ -0,0 +1,21 @@
1
+ reditools/__init__.py,sha256=7nSB0hrQznxrn6l95cv_pSonJTG6jZCQdbn7aT1TtvY,46
2
+ reditools/__main__.py,sha256=mWJ9O2LDiOpBWDBJJUN7OiM4SyltW-kVXXAGBe_JxgQ,842
3
+ reditools/alignment_file.py,sha256=YFyCEhMek2t93DpmpwEst5v3gDZkmRotbd6Fy_mP0aE,4258
4
+ reditools/alignment_manager.py,sha256=_FXwvqGWoXRdzVrwBxki2heaVZA2cQbGXqCopr-g1Hs,4138
5
+ reditools/analyze.py,sha256=u38yN5DmXUCW8nQP_BMfsXuvb59rFO12di5cYT8Ye58,15280
6
+ reditools/compiled_position.py,sha256=v540uUEie_HHUwsYQmBqeeOkUvtYlcnWj1v8gAhLUiE,3858
7
+ reditools/compiled_reads.py,sha256=7Hm5f7g1T8q1zDOOxZUD7aZax9b7SdQ0PlmT93hmcaE,4154
8
+ reditools/fasta_file.py,sha256=KBsJBs7OnBpew2PGWGp0mTxPLlpBmRrtXL4uvQw4t34,2212
9
+ reditools/file_utils.py,sha256=AJjU9leOxSou5U_4RAgapR9PGQz0OYQlkCudvTcXGeQ,3284
10
+ reditools/homopolymerics.py,sha256=BCYXBJa6YuouzccFisBFOtGfZAEOSqeqJsO-c37At84,2123
11
+ reditools/index.py,sha256=K3JQTMx4ojUUiPQTDMDsoYoFQQ_o-ZNqTrh5dIVFVSQ,7398
12
+ reditools/logger.py,sha256=u4L2SYxy4vJ4KDHEymd0b1sCa8BXXHchx8LR_wcFq1A,1210
13
+ reditools/reditools.py,sha256=Rb5bllqjE1wHti98p-v2t4Vu-YEvZgNv-FXcUPgDVO0,12725
14
+ reditools/region.py,sha256=_BiKDc5lCl1snjkokRiUWOgzA57ME3yLydEIwK9ku7U,3780
15
+ reditools/rtchecks.py,sha256=tkaosQDBc2XN_RlVMtNwrxZjCQoQo2bWfQISROXCmKA,8221
16
+ reditools/utils.py,sha256=a2qfhMcrH2QlK-JoR-HHF6_bnlo5v3jihAqqknvVIjc,2733
17
+ REDItools3-3.1a0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
18
+ REDItools3-3.1a0.dist-info/METADATA,sha256=EPD47hLxZoozfc0Gd4uFPOaid9uz81DkWI4Pkv0STpo,1289
19
+ REDItools3-3.1a0.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
20
+ REDItools3-3.1a0.dist-info/top_level.txt,sha256=wrvvbFXhmNg7s6LQqjlV_fVQYUZOOpF93IcMu_hBCx4,10
21
+ REDItools3-3.1a0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.4.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ reditools
reditools/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """REDItools3 - RNA Editing Analysis Tool."""
reditools/__main__.py ADDED
@@ -0,0 +1,37 @@
1
+ """Commandline tool for REDItools."""
2
+
3
+ import sys
4
+
5
+ from reditools import analyze, homopolymerics, index
6
+
7
+
8
+ def usage():
9
+ """Print program usage."""
10
+ print("""usage: reditools {analyze,find-repeats,index}
11
+
12
+ REDItools3
13
+
14
+ Run Modes:
15
+ analyze Find editing events in one or more alignment files.
16
+
17
+ find-repeats Find repetitive elements in a genome.
18
+
19
+ index Calculate editing indices from the output of `analyze`
20
+ mode.
21
+ """)
22
+
23
+
24
+ if __name__ == '__main__':
25
+ if len(sys.argv) > 1:
26
+ command = sys.argv.pop(1)
27
+ match command:
28
+ case 'analyze':
29
+ analyze.main()
30
+ case 'find-repeats':
31
+ homopolymerics.main()
32
+ case 'index':
33
+ index.main()
34
+ case _:
35
+ usage()
36
+ else:
37
+ usage()
@@ -0,0 +1,146 @@
1
+ """Wrappers for pysam files."""
2
+
3
+ from pysam.libcalignmentfile import AlignmentFile as PysamAlignmentFile
4
+
5
+
6
+ class RTAlignmentFile(PysamAlignmentFile):
7
+ """Wrapper for pysam.AlignmentFile to provide filtering on fetch."""
8
+
9
+ def __new__(cls, *args, **kwargs):
10
+ """
11
+ Create a wrapper for pysam.AlignmentFile.
12
+
13
+ Parameters:
14
+ *args (list): Positional arguments for pysam.FastaFile()
15
+ **kwargs (dict): Keyword arguments for pysam.FastaFile()
16
+
17
+ Returns:
18
+ PysamAlignmentFile
19
+ """
20
+ kwargs.pop('min_quality', None)
21
+ kwargs.pop('min_length', None)
22
+ return PysamAlignmentFile.__new__(cls, *args, **kwargs)
23
+
24
+ def __init__(self, *args, min_quality=0, min_length=0, **kwargs):
25
+ """
26
+ Create a wrapper for pysam.AlignmentFile.
27
+
28
+ Parameters:
29
+ *args (list): Positional arguments for pysam.FastaFile()
30
+ min_quality (int): Minimum read quality
31
+ min_length (int): Minimum read length
32
+ **kwargs (dict): Keyword arguments for pysam.FastaFile()
33
+ """
34
+ PysamAlignmentFile.__init__(self)
35
+
36
+ self._checklist = []
37
+
38
+ if min_quality > 0:
39
+ self._min_quality = min_quality
40
+ self._checklist.append(self._check_quality)
41
+
42
+ if min_length > 0:
43
+ self._min_length = min_length
44
+ self._checklist.append(self._check_length)
45
+
46
+ @property
47
+ def exclude_reads(self):
48
+ """
49
+ Names of reads not to be fetched.
50
+
51
+ Returns:
52
+ iterable
53
+ """
54
+ return self._exclude_reads
55
+
56
+ @exclude_reads.setter
57
+ def exclude_reads(self, read_names):
58
+ """
59
+ Provide a list of read names to be skipped during fetch.
60
+
61
+ Parameters:
62
+ read_names (iterable): Reads to skip
63
+ """
64
+ self._exclude_reads = set(read_names)
65
+ self._checklist.append(self._check_read_name)
66
+
67
+ def fetch(self, *args, **kwargs):
68
+ """
69
+ Fetch reads aligned in a region.
70
+
71
+ Parameters:
72
+ *args (list): Positional arguments for pysam.FastaFile.fetch
73
+ *kwargs (list): Keyword arguments for pysam.FastaFile.fetch
74
+
75
+ Yields:
76
+ Reads
77
+ """
78
+ if 'region' in kwargs:
79
+ kwargs['region'] = str(kwargs['region']) # noqa:WPS529
80
+ try:
81
+ iterator = super().fetch(*args, **kwargs)
82
+ except ValueError:
83
+ return
84
+ for read in iterator:
85
+ if self._check_read(read):
86
+ yield read
87
+
88
+ def fetch_by_position(self, *args, **kwargs):
89
+ """
90
+ Retrieve reads that all start at the same point on the reference.
91
+
92
+ Parameters:
93
+ *args (list): Positional arguments for fetch
94
+ **kwargs (dict): Named arguments for fetch
95
+
96
+ Yields:
97
+ Lists containing reads
98
+ """
99
+ iterator = self.fetch(*args, **kwargs)
100
+
101
+ first_read = next(iterator, None)
102
+ if first_read is None:
103
+ return
104
+
105
+ reads = [first_read]
106
+ ref_start = first_read.reference_start
107
+
108
+ for read in iterator:
109
+ if read.reference_start == ref_start:
110
+ reads.append(read)
111
+ else:
112
+ yield reads
113
+ reads = [read]
114
+ ref_start = read.reference_start
115
+ yield reads
116
+
117
+ # 77: NOT_MAPPED
118
+ # 141: NOT_MAPPED
119
+ # 512: QC_FAIL
120
+ # 256: IS_SECONDARY
121
+ # 2048: IS_SUPPLEMENTARY
122
+ # 1024: IS_DUPLICATE
123
+ _flags_to_toss = {77, 141, 512, 256, 2048, 1024}
124
+ _paired_flags_to_keep = {99, 147, 83, 163}
125
+
126
+ def _check_quality(self, read):
127
+ return read.mapping_quality >= self._min_quality
128
+
129
+ def _check_length(self, read):
130
+ return read.query_length >= self._min_length
131
+
132
+ def _check_read_name(self, read):
133
+ return read.query_name not in self._exclude_reads
134
+
135
+ def _check_read(self, read):
136
+ if read.has_tag('SA'):
137
+ return False
138
+ if read.flag in self._flags_to_toss:
139
+ return False
140
+ if read.is_paired and read.flag not in self._paired_flags_to_keep:
141
+ return False
142
+
143
+ for check in self._checklist:
144
+ if not check(read):
145
+ return False
146
+ return True
@@ -0,0 +1,136 @@
1
+ """Wrappers for pysam files."""
2
+ from itertools import chain
3
+
4
+ from reditools.alignment_file import RTAlignmentFile
5
+
6
+
7
+ class ReadGroupIter(object):
8
+ """Manages multiple fetch iterators."""
9
+
10
+ _iter_idx = 0
11
+ _reads_idx = 1
12
+ _start_idx = 2
13
+
14
+ def __init__(self, fetch_iters):
15
+ """
16
+ Combine multiple fetch iterators.
17
+
18
+ Parameters:
19
+ fetch_iters (iterable): The iterators to combine.
20
+ """
21
+ self._read_groups = []
22
+ for itr in fetch_iters:
23
+ reads = next(itr, None)
24
+ if reads is None:
25
+ continue
26
+ start = reads[0].reference_start
27
+ self._read_groups.append({
28
+ self._iter_idx: itr,
29
+ self._reads_idx: reads,
30
+ self._start_idx: start,
31
+ })
32
+
33
+ def is_empty(self):
34
+ """
35
+ Check if there are still reads left.
36
+
37
+ Returns:
38
+ bool: True if empty, else False
39
+ """
40
+ return not self._read_groups
41
+
42
+ def next(self):
43
+ """
44
+ Retrieve a list of reads that all start at the same position.
45
+
46
+ Returns:
47
+ list: Reads
48
+ """
49
+ position = self._find_start()
50
+ reads = []
51
+ for idx in range(len(self._read_groups) - 1, -1, -1):
52
+ group = self._read_groups[idx]
53
+ if group[self._start_idx] == position:
54
+ reads.append(group[self._reads_idx])
55
+ next_reads = next(group[self._iter_idx], None)
56
+ if next_reads is None:
57
+ self._read_groups.pop(idx)
58
+ else:
59
+ self._read_groups[idx] = {
60
+ self._iter_idx: group[self._iter_idx],
61
+ self._reads_idx: next_reads,
62
+ self._start_idx: next_reads[0].reference_start,
63
+ }
64
+ return reads
65
+
66
+ def _find_start(self):
67
+ return min(group[self._start_idx] for group in self._read_groups)
68
+
69
+
70
+ class AlignmentManager(object):
71
+ """
72
+ Manage multiple RTAlignmentFiles with a single fetch.
73
+
74
+ Attributes:
75
+ min_quality (int): Minimum read quality (applied during add_file)
76
+ min_length (int): Minimum read length (applied during add_file)
77
+ """
78
+
79
+ def __init__(self, *args, **kwargs):
80
+ """
81
+ Create a new manager.
82
+
83
+ Parameters:
84
+ *args (list): positional arguments for PysamFastaFile
85
+ constructor
86
+ **kwargs (dict): named arguments for PysamFastaFile
87
+ constructor
88
+ """
89
+ self._bam_args = args
90
+ self._bam_kwargs = kwargs
91
+ self._bams = []
92
+ self.min_quality = 0
93
+ self.min_length = 0
94
+ self.file_list = []
95
+
96
+ def add_file(self, fname, exclude_reads=None):
97
+ """
98
+ Add an alignment file to the manager for analysis.
99
+
100
+ Parameters:
101
+ fname (str): Path to BAM file
102
+ exclude_reads (set): Read names not to skip
103
+ """
104
+ new_file = RTAlignmentFile(
105
+ fname,
106
+ *self._bam_args,
107
+ min_quality=self.min_quality,
108
+ min_length=self.min_length,
109
+ **self._bam_kwargs,
110
+ )
111
+ new_file.check_index()
112
+ if exclude_reads:
113
+ new_file.exclude_reads = exclude_reads
114
+ self._bams.append(new_file)
115
+ self.file_list.append(fname)
116
+
117
+ def fetch_by_position(self, *args, **kwargs):
118
+ """
119
+ Perform combine fetch_by_position for all managed files.
120
+
121
+ Parameters:
122
+ *args (list): Positional arguments for
123
+ RTAlignmentFile.fetch_by_position
124
+ **kwargs (dict): Named arguments for
125
+ RTAlignmentFile.fetch_by_position
126
+
127
+ Yields:
128
+ list: reads from all managed files that begin at the same position.
129
+ """
130
+ iters = [bam.fetch_by_position(*args, **kwargs) for bam in self._bams]
131
+ rgi = ReadGroupIter(iters)
132
+ while not rgi.is_empty():
133
+ reads = list(chain(*rgi.next()))
134
+ self.position = reads[0].reference_start
135
+ self.contig = reads[0].reference_name
136
+ yield reads