PyPI - REDItools3 - Versions diffs - 3.1a0__tar.gz → 3.3__tar.gz - Mend

REDItools3 3.1a0tar.gz → 3.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of REDItools3 might be problematic. Click here for more details.

Files changed (26) hide show

{reditools3-3.1a0 → reditools3-3.3}/PKG-INFO RENAMED Viewed

@@ -1,19 +1,19 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: REDItools3
-Version: 3.1a0
+Version: 3.3
 Author: Ernesto Picardi
 Author-email: Adam Handen <adam.handen@gmail.com>
 Project-URL: homepage, https://github.com/BioinfoUNIBA/REDItools3
 Project-URL: repository, https://github.com/BioinfoUNIBA/REDItools3
 Project-URL: issues, https://github.com/BioinfoUNIBA/REDItools3/issues
 Keywords: bioinformatics,RNA,RNA-editing
-Classifier: Development Status :: 3 - Alpha
+Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)
 Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: Unix
-Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.7
 Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown

{reditools3-3.1a0 → reditools3-3.3}/REDItools3.egg-info/PKG-INFO RENAMED Viewed

@@ -1,19 +1,19 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: REDItools3
-Version: 3.1a0
+Version: 3.3
 Author: Ernesto Picardi
 Author-email: Adam Handen <adam.handen@gmail.com>
 Project-URL: homepage, https://github.com/BioinfoUNIBA/REDItools3
 Project-URL: repository, https://github.com/BioinfoUNIBA/REDItools3
 Project-URL: issues, https://github.com/BioinfoUNIBA/REDItools3/issues
 Keywords: bioinformatics,RNA,RNA-editing
-Classifier: Development Status :: 3 - Alpha
+Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)
 Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: Unix
-Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.7
 Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown

{reditools3-3.1a0 → reditools3-3.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "REDItools3"
-version = "v3.1-alpha"
+version = "v3.3"
 authors = [
   { name="Adam Handen", email="adam.handen@gmail.com" },
   { name="Ernesto Picardi" },
@@ -12,18 +12,18 @@ authors = [
 readme = "README.md"
 dependencies = [
     "pysam>=0.22.0",
-    "sortedcontainers>=2.4.0"
+    "sortedcontainers>=2.4.0",
 ]
 keywords = ["bioinformatics", "RNA", "RNA-editing"]
 requires-python = ">=3.7"
 classifiers = [
-    "Development Status :: 3 - Alpha",
+    "Development Status :: 5 - Production/Stable",
     "Intended Audience :: Developers",
     "Intended Audience :: Science/Research",
     "License :: OSI Approved :: GNU General Public License (GPL)",
     "Operating System :: MacOS :: MacOS X",
     "Operating System :: Unix",
-    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.7",
     "Topic :: Scientific/Engineering :: Bio-Informatics",
 ]

{reditools3-3.1a0 → reditools3-3.3}/reditools/analyze.py RENAMED Viewed

@@ -79,25 +79,18 @@ def setup_rtools(options):  # noqa:WPS213,WPS231
         rtools.log_level = Logger.info_level
     if options.load_omopolymeric_file:
-        regions = file_utils.load_omopolymeric_regions(
-            options.load_omopolymeric_file,
-        )
+        regions = file_utils.read_bed_file(options.load_omopolymeric_file)
         rtools.exclude(regions)
-    if options.create_omopolymeric_file:
-        rtools.create_omopolymeric_positions(
-            options.create_omopolymeric_file,
-            options.omopolymeric_span,
-        )
     if options.splicing_file:
-        rtools.load_splicing_file(
+        rtools.splice_positions = file_utils.load_splicing_file(
             options.splicing_file,
             options.splicing_span,
         )
     if options.bed_file:
-        rtools.load_target_positions(options.bed_file)
+        regions = file_utils.read_bed_file(options.bed_file)
+        rtools.target_positions = regions
     if options.exclude_regions:
         for fname in options.exclude_regions:
             regions = file_utils.read_bed_file(fname)
@@ -109,10 +102,11 @@ def setup_rtools(options):  # noqa:WPS213,WPS231
     rtools.max_base_position = options.max_base_position
     rtools.min_base_quality = options.min_base_quality
-    rtools.min_column_length = options.min_column_length
+    rtools.min_column_length = options.min_read_depth
     rtools.min_edits = options.min_edits
     rtools.min_edits_per_nucleotide = options.min_edits_per_nucleotide
     rtools.strand = options.strand
+    rtools.max_alts = options.max_editing_nucleotides
     rtools.strand_confidence_threshold = options.strand_confidence_threshold
@@ -215,7 +209,7 @@ def run(options, in_queue, out_queue):
     except Exception as exc:
         if options.debug:
             traceback.print_exception(*sys.exc_info())
-        sys.stderr.write(f'[ERROR] {exc}\n')
+        sys.stderr.write(f'[ERROR] ({type(exc)}) {exc}\n')
 def parse_options():  # noqa:WPS213
@@ -225,21 +219,26 @@ def parse_options():  # noqa:WPS213
     Returns:
         namespace: commandline args
     """
-    parser = argparse.ArgumentParser(description='REDItools 2.0')
+    parser = argparse.ArgumentParser(
+        prog="reditools analyze",
+        description='REDItools3',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
     parser.add_argument(
         'file',
         nargs='+',
-        help='The bam file to be analyzed',
+        help='The bam file(s) to be analyzed.',
     )
     parser.add_argument(
         '-r',
         '--reference',
-        help='The reference FASTA file',
+        help='Reference FASTA file.',
     )
     parser.add_argument(
         '-o',
         '--output-file',
-        help='The output statistics file',
+        help='Path to write output to.',
+        default='/dev/stdout',
     )
     parser.add_argument(
         '-s',
@@ -248,96 +247,85 @@ def parse_options():  # noqa:WPS213
         type=int,
         default=0,
         help='Strand: this can be 0 (unstranded),' +
-        '1 (secondstrand oriented) or ' +
-        '2 (firststrand oriented)',
+        '1 (second strand oriented) or ' +
+        '2 (first strand oriented).',
     )
     parser.add_argument(
         '-a',
         '--append-file',
         action='store_true',
-        help='Appends results to file (and creates if not existing)',
+        help='Appends results to file (and creates if not existing).',
     )
     parser.add_argument(
         '-g',
         '--region',
-        help='The self.region of the bam file to be analyzed',
+        help='Only analyzes the specified region.',
     )
     parser.add_argument(
         '-m',
         '--load-omopolymeric-file',
-        help='The file containing the omopolymeric positions',
-    )
-    parser.add_argument(
-        '-c',
-        '--create-omopolymeric-file',
-        default=False,
-        help='Path to write omopolymeric positions to',
-        action='store_true',
+        help='BED file of omopolymeric positions.',
     )
     parser.add_argument(
         '-os',
         '--omopolymeric-span',
         type=int,
         default=5,
-        help='The omopolymeric span',
+        help='The omopolymeric span.',
     )
     parser.add_argument(
         '-sf',
         '--splicing-file',
-        help='The file containing the splicing sites positions',
+        help='The file containing splicing site positions.',
     )
     parser.add_argument(
         '-ss',
         '--splicing-span',
         type=int,
         default=4,
-        help='The splicing span',
+        help='The splicing span.',
     )
     parser.add_argument(
         '-mrl',
         '--min-read-length',
         type=int,
         default=30,  # noqa:WPS432
-        help='Reads whose length is below this value will be discarded.',
+        help='Reads with length below -mrl will be discarded.',
     )
     parser.add_argument(
         '-q',
         '--min-read-quality',
         type=int,
         default=20,  # noqa:WPS432
-        help='Reads with mapping quality below this value will be discarded.',
+        help='Reads with mapping quality below -q will be discarded.',
     )
     parser.add_argument(
         '-bq',
         '--min-base-quality',
         type=int,
         default=30,  # noqa:WPS432
-        help='Base quality below this value will not be included in ' +
-        'the analysis.',
+        help='Base quality below -bq will bed discarded.',
     )
     parser.add_argument(
         '-mbp',
         '--min-base-position',
         type=int,
         default=0,
-        help='Bases which reside in a previous position (in the read)' +
-        'will not be included in the analysis.',
+        help='Ignores the first -mbp bases in each read.',
     )
     parser.add_argument(
         '-Mbp',
         '--max-base-position',
         type=int,
         default=0,
-        help='Bases which reside in a further position (in the read)' +
-        'will not be included in the analysis.',
+        help='Ignores the last -Mpb bases in each read.',
     )
     parser.add_argument(
         '-l',
-        '--min-column-length',
+        '--min-read-depth',
         type=int,
         default=1,
-        help='Positions whose columns have length below this value will' +
-        'not be included in the analysis.',
+        help='Only report on positions with at least -l read depth',
     )
     parser.add_argument(
         '-e',
@@ -351,8 +339,7 @@ def parse_options():  # noqa:WPS213
         '--min-edits-per-nucleotide',
         type=int,
         default=0,
-        help='Positions whose columns have bases with less than' +
-        'min-edits-per-base edits will not be included in the analysis.',
+        help='Positions with fewer than -men edits will not be discarded.',
     )
     parser.add_argument(
         '-me',
@@ -360,16 +347,14 @@ def parse_options():  # noqa:WPS213
         type=int,
         default=0,  # noqa:WPS432
         help='The minimum number of editing events (per position). ' +
-        'Positions whose columns have bases with less than ' +
-        '"min-edits-per-base edits" will not be included in the ' +
-        'analysis.',
+        'Positions with fewer than -me edits will be discarded.',
     )
     parser.add_argument(
         '-Men',
         '--max-editing-nucleotides',
         type=int,
-        default=100,  # noqa:WPS432
-        help='The maximum number of editing nucleotides, from 0 to 4 ' +
+        default=4,  # noqa:WPS432
+        help='The maximum number of editing nucleotides, from 0 to 3 ' +
         '(per position). Positions whose columns have more than ' +
         '"max-editing-nucleotides" will not be included in the analysis.',
     )
@@ -378,8 +363,8 @@ def parse_options():  # noqa:WPS213
         '--strand-confidence-threshold',
         type=float,
         default=0.7,  # noqa:WPS432
-        help='Only report the strandedness if at least this proportion of ' +
-        'reads are of a given strand',
+        help='Only report the strandedness if at least -T proportion of ' +
+        'reads are of a given strand.',
     )
     parser.add_argument(
         '-C',
@@ -393,25 +378,25 @@ def parse_options():  # noqa:WPS213
         '-V',
         '--verbose',
         default=False,
-        help='Verbose information in stderr',
+        help='Run in verbose mode.',
         action='store_true',
     )
     parser.add_argument(
         '-N',
         '--dna',
         default=False,
-        help='Run REDItools 2.0 on DNA-Seq data',
+        help='Run REDItools on DNA-Seq data.',
         action='store_true',
     )
     parser.add_argument(
         '-B',
         '--bed_file',
-        help='Path of BED file containing target self.regions',
+        help='Only analyze regions in the provided BED file.',
     )
     parser.add_argument(
         '-t',
         '--threads',
-        help='Number of threads to run',
+        help='Number of threads for parallel processing.',
         type=int,
         default=1,
     )
@@ -419,7 +404,7 @@ def parse_options():  # noqa:WPS213
         '-w',
         '--window',
         help='How many bp should be processed by each thread at a time. ' +
-        'Defaults to full contig.',
+        'Zero uses the full contig.',
         type=int,
         default=0,
     )
@@ -427,18 +412,18 @@ def parse_options():  # noqa:WPS213
         '-k',
         '--exclude_regions',
         nargs='+',
-        help='Path of BED file containing regions to exclude from analysis',
+        help='Skip regions in the provided BED file(s).',
     )
     parser.add_argument(
         '-E',
         '--exclude_reads',
-        help='Path to a text file listing read names to exclude from analysis',
+        help='Text file listing read names to exclude from analysis.',
     )
     parser.add_argument(
         '-d',
         '--debug',
         default=False,
-        help='REDItools is run in DEBUG mode.',
+        help='Run in debug mode.',
         action='store_true',
     )

{reditools3-3.1a0 → reditools3-3.3}/reditools/file_utils.py RENAMED Viewed

@@ -2,11 +2,8 @@
 import csv
 import os
-from collections import defaultdict
 from gzip import open as gzip_open
-from sortedcontainers import SortedSet
 from reditools.region import Region
@@ -68,54 +65,36 @@ def concat(output, *fnames, clean_up=True, encoding='utf-8'):
             os.remove(fname)
-def load_poly_regions(fname):
-    """
-    Read omopolymeric positions from a file.
-    Parameters:
-        fname (str): File path
-    Returns:
-        (dict): Contigs and regions
-    """
-    poly_regions = defaultdict(set)
-    with read_bed_file(fname) as reader:
-        for row in reader:
-            poly_regions[row[0]] = Region(
-                contig=row[0],
-                start=row[1],
-                stop=row[2],
-            )
-    return poly_regions
-def load_splicing_file(splicing_file, span):
+def load_splicing_file(splicing_file, splicing_span):
     """
     Read splicing positions from a file.
     Parameters:
         splicing_file (str): File path
-        span(int): Width of splice sites
+        splicing_span(int): Width of splice sites
-    Returns:
-        (dict): Contig and positions
+    Yeilds:
+        Splicing file contents as Regions.
     """
-    splice_positions = defaultdict(SortedSet)
     strand_map = {'-': 'D', '+': 'A'}
-    with open_stream(splicing_file, 'r') as stream:
-        for line in stream:
-            fields = line.strip().split()
-            chrom = fields[0]
-            strand = fields[4]
-            splice = fields[3]
-            span = int(fields[1])
-            coe = -1 if strand_map.get(strand, None) == splice else 1
-            new_positions = [1 + span + coe * fctr for fctr in range(span)]
-            splice_positions[chrom] |= new_positions
-        return splice_positions
+    stream = open_stream(splicing_file)
+    reader = csv.reader(
+        filter(lambda row: row[0] != '#', stream),
+        delimiter=' ',
+    )
+    for row in reader:
+        contig = row[0]
+        span = int(row[1])
+        splice = row[3]
+        strand = row[4]
+        coe = -1 if strand_map.get(strand, None) == splice else 1
+        start = 1 + span
+        stop = start + splicing_span * coe
+        if start > stop:
+            start, stop = stop, start
+        yield Region(contig=contig, start=start, stop=stop)
 def load_text_file(file_name):

{reditools3-3.1a0 → reditools3-3.3}/reditools/homopolymerics.py RENAMED Viewed

@@ -42,7 +42,11 @@ def parse_options():
     Returns:
         namespace
     """
-    parser = argparse.ArgumentParser(description='REDItools 2.0')
+    parser = argparse.ArgumentParser(
+        prog="reditools find-repeats",
+        description='REDItools3',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
     parser.add_argument(
         'file',
         help='The fasta file to be analyzed',
@@ -57,6 +61,7 @@ def parse_options():
     parser.add_argument(
         '-o',
         '--output',
+        default='/dev/stdout',
         help='Destination to write results. Default is to use STDOUT. ' +
         'If the filename ends in .gz, the contents will be gzipped.',
     )

{reditools3-3.1a0 → reditools3-3.3}/reditools/index.py RENAMED Viewed

@@ -180,7 +180,11 @@ def parse_options():  # noqa:WPS213
     Returns:
         namespace: commandline args
     """
-    parser = argparse.ArgumentParser(description='REDItools 2.0')
+    parser = argparse.ArgumentParser(
+        prog="reditools index",
+        description='REDItools3',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
     parser.add_argument(
         'file',
         nargs='+',
@@ -189,6 +193,7 @@ def parse_options():  # noqa:WPS213
     parser.add_argument(
         '-o',
         '--output-file',
+        default='/dev/stdout',
         help='The output statistics file',
     )
     parser.add_argument(
@@ -239,7 +244,7 @@ def main():
             indexer.add_target_from_bed(trg_fname)
     if options.output_file:
-        stream = open_stream(options.output_fipe, 'w')
+        stream = open_stream(options.output_file, 'w')
     else:
         stream = sys.stdout

{reditools3-3.1a0 → reditools3-3.3}/reditools/reditools.py RENAMED Viewed

@@ -127,7 +127,6 @@ class REDItools(object):
         self._target_positions = False
         self._exclude_positions = {}
         self._splice_positions = []
         self._specific_edits = None
         self.reference = None
@@ -294,6 +293,20 @@ class REDItools(object):
         """
         return self._exclude_positions
+    @property
+    def max_alts(self):
+        """Maximum number of alternative bases for a position."""
+        return self._max_alts
+    @max_alts.setter
+    def max_alts(self, max_alts):
+        self._max_alts = max_alts
+        function = self._rtqc.check_max_alts
+        if max_alts < 3:
+            self._rtqc.add(function)
+        else:
+            self._rtqc.discard(function)
     def exclude(self, regions):
         """
         Explicitly skip specified genomic regions.

{reditools3-3.1a0 → reditools3-3.3}/reditools/rtchecks.py RENAMED Viewed

@@ -149,12 +149,14 @@ class RTChecks(object):
         Returns:
             (bool): True if there are sufficient edits
         """
-        for num_edits in bases.get_min_edits():
-            if 0 < num_edits < rtools.min_edits_per_nucleotide:
+        for base in "ATCG":
+            if base == bases.ref:
+                continue
+            if bases[base] < rtools.min_edits_per_nucleotide:
                 rtools.log(
                     Logger.debug_level,
                     'DISCARDING COLUMN edits={} < {}',
-                    num_edits,
+                    bases[base],
                     rtools.min_edits_per_nucleotide,
                 )
                 return False
@@ -272,3 +274,26 @@ class RTChecks(object):
                 )
                 return False
         return True
+    def check_max_alts(self, bases, rtools):
+        """
+        Check that there are no more than a max number of alts.
+        Parameters:
+            bases (CompiledPosition): Base position under analysis
+            rtools (REDItools): Object running the analysis
+        Returns:
+            (bool): True if there are n or fewer alts
+        """
+        alts = bases.get_variants()
+        if len(alts) > rtools.max_alts:
+            rtools.log(
+                Logger.debug_level,
+                'DISCARD COLUMN alts={} > {}',
+                len(alts),
+                rtools.max_alts,
+            )
+            return False
+        return True