PyPI - REDItools3 - Versions diffs - 3.1a0__py3-none-any.whl → 3.2a0__py3-none-any.whl - Mend

REDItools3 3.1a0py3-none-any.whl → 3.2a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/METADATA +2 -2
{REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/RECORD +11 -11
{REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/WHEEL +1 -1
reditools/analyze.py +42 -58
reditools/file_utils.py +21 -42
reditools/homopolymerics.py +6 -1
reditools/index.py +7 -2
reditools/reditools.py +14 -1
reditools/rtchecks.py +23 -0
{REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/LICENSE +0 -0
{REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/top_level.txt +0 -0

{REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: REDItools3
-Version: 3.1a0
+Version: 3.2a0
 Author: Ernesto Picardi
 Author-email: Adam Handen <adam.handen@gmail.com>
 Project-URL: homepage, https://github.com/BioinfoUNIBA/REDItools3
@@ -13,7 +13,7 @@ Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)
 Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: Unix
-Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.7
 Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown

{REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/RECORD RENAMED Viewed

@@ -2,20 +2,20 @@ reditools/__init__.py,sha256=7nSB0hrQznxrn6l95cv_pSonJTG6jZCQdbn7aT1TtvY,46
 reditools/__main__.py,sha256=mWJ9O2LDiOpBWDBJJUN7OiM4SyltW-kVXXAGBe_JxgQ,842
 reditools/alignment_file.py,sha256=YFyCEhMek2t93DpmpwEst5v3gDZkmRotbd6Fy_mP0aE,4258
 reditools/alignment_manager.py,sha256=_FXwvqGWoXRdzVrwBxki2heaVZA2cQbGXqCopr-g1Hs,4138
-reditools/analyze.py,sha256=u38yN5DmXUCW8nQP_BMfsXuvb59rFO12di5cYT8Ye58,15280
+reditools/analyze.py,sha256=tW9Rz-R_8R-mJ2uQP5fpGFTH7TEv-2pOsigtbKqwYDY,14649
 reditools/compiled_position.py,sha256=v540uUEie_HHUwsYQmBqeeOkUvtYlcnWj1v8gAhLUiE,3858
 reditools/compiled_reads.py,sha256=7Hm5f7g1T8q1zDOOxZUD7aZax9b7SdQ0PlmT93hmcaE,4154
 reditools/fasta_file.py,sha256=KBsJBs7OnBpew2PGWGp0mTxPLlpBmRrtXL4uvQw4t34,2212
-reditools/file_utils.py,sha256=AJjU9leOxSou5U_4RAgapR9PGQz0OYQlkCudvTcXGeQ,3284
-reditools/homopolymerics.py,sha256=BCYXBJa6YuouzccFisBFOtGfZAEOSqeqJsO-c37At84,2123
-reditools/index.py,sha256=K3JQTMx4ojUUiPQTDMDsoYoFQQ_o-ZNqTrh5dIVFVSQ,7398
+reditools/file_utils.py,sha256=MfQPzJ4ogbwNvIiEu1oooS64EJH1CFdRS8eoqT9Zo4w,2763
+reditools/homopolymerics.py,sha256=UsHTr0e_OP_dkGq5te-oTSe5u6kzi5UJOF9t9QAunUk,2269
+reditools/index.py,sha256=jLgWwKXIA_e-bqVu74SDZXmrdWch_syDSmMnFZPbqz4,7537
 reditools/logger.py,sha256=u4L2SYxy4vJ4KDHEymd0b1sCa8BXXHchx8LR_wcFq1A,1210
-reditools/reditools.py,sha256=Rb5bllqjE1wHti98p-v2t4Vu-YEvZgNv-FXcUPgDVO0,12725
+reditools/reditools.py,sha256=RNH7aKC2QnbafA7T9E6UpV5Llv3FjfDIabjPCuwDgW0,13111
 reditools/region.py,sha256=_BiKDc5lCl1snjkokRiUWOgzA57ME3yLydEIwK9ku7U,3780
-reditools/rtchecks.py,sha256=tkaosQDBc2XN_RlVMtNwrxZjCQoQo2bWfQISROXCmKA,8221
+reditools/rtchecks.py,sha256=TmCow38fCRwSICvx3nlOxy6Q216BcDXESGhM7bB_ixo,8878
 reditools/utils.py,sha256=a2qfhMcrH2QlK-JoR-HHF6_bnlo5v3jihAqqknvVIjc,2733
-REDItools3-3.1a0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-REDItools3-3.1a0.dist-info/METADATA,sha256=EPD47hLxZoozfc0Gd4uFPOaid9uz81DkWI4Pkv0STpo,1289
-REDItools3-3.1a0.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
-REDItools3-3.1a0.dist-info/top_level.txt,sha256=wrvvbFXhmNg7s6LQqjlV_fVQYUZOOpF93IcMu_hBCx4,10
-REDItools3-3.1a0.dist-info/RECORD,,
+REDItools3-3.2a0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+REDItools3-3.2a0.dist-info/METADATA,sha256=NRwZTGGmlHBkP6XiQ4Sdql-XXRR2Ii27beCDf_jCt90,1288
+REDItools3-3.2a0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
+REDItools3-3.2a0.dist-info/top_level.txt,sha256=wrvvbFXhmNg7s6LQqjlV_fVQYUZOOpF93IcMu_hBCx4,10
+REDItools3-3.2a0.dist-info/RECORD,,

{REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.4.0)
+Generator: setuptools (75.5.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

reditools/analyze.py CHANGED Viewed

@@ -79,19 +79,11 @@ def setup_rtools(options):  # noqa:WPS213,WPS231
         rtools.log_level = Logger.info_level
     if options.load_omopolymeric_file:
-        regions = file_utils.load_omopolymeric_regions(
-            options.load_omopolymeric_file,
-        )
+        regions = file_utils.read_bed_file(options.load_omopolymeric_file)
         rtools.exclude(regions)
-    if options.create_omopolymeric_file:
-        rtools.create_omopolymeric_positions(
-            options.create_omopolymeric_file,
-            options.omopolymeric_span,
-        )
     if options.splicing_file:
-        rtools.load_splicing_file(
+        rtools.splice_positions = file_utils.load_splicing_file(
             options.splicing_file,
             options.splicing_span,
         )
@@ -109,10 +101,11 @@ def setup_rtools(options):  # noqa:WPS213,WPS231
     rtools.max_base_position = options.max_base_position
     rtools.min_base_quality = options.min_base_quality
-    rtools.min_column_length = options.min_column_length
+    rtools.min_column_length = options.min_read_depth
     rtools.min_edits = options.min_edits
     rtools.min_edits_per_nucleotide = options.min_edits_per_nucleotide
     rtools.strand = options.strand
+    rtools.max_alts = options.max_editing_nucleotides
     rtools.strand_confidence_threshold = options.strand_confidence_threshold
@@ -225,21 +218,26 @@ def parse_options():  # noqa:WPS213
     Returns:
         namespace: commandline args
     """
-    parser = argparse.ArgumentParser(description='REDItools 2.0')
+    parser = argparse.ArgumentParser(
+        prog="reditools analyze",
+        description='REDItools3',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
     parser.add_argument(
         'file',
         nargs='+',
-        help='The bam file to be analyzed',
+        help='The bam file(s) to be analyzed.',
     )
     parser.add_argument(
         '-r',
         '--reference',
-        help='The reference FASTA file',
+        help='Reference FASTA file.',
     )
     parser.add_argument(
         '-o',
         '--output-file',
-        help='The output statistics file',
+        help='Path to write output to.',
+        default='/dev/stdout',
     )
     parser.add_argument(
         '-s',
@@ -248,96 +246,85 @@ def parse_options():  # noqa:WPS213
         type=int,
         default=0,
         help='Strand: this can be 0 (unstranded),' +
-        '1 (secondstrand oriented) or ' +
-        '2 (firststrand oriented)',
+        '1 (second strand oriented) or ' +
+        '2 (first strand oriented).',
     )
     parser.add_argument(
         '-a',
         '--append-file',
         action='store_true',
-        help='Appends results to file (and creates if not existing)',
+        help='Appends results to file (and creates if not existing).',
     )
     parser.add_argument(
         '-g',
         '--region',
-        help='The self.region of the bam file to be analyzed',
+        help='Only analyzes the specified region.',
     )
     parser.add_argument(
         '-m',
         '--load-omopolymeric-file',
-        help='The file containing the omopolymeric positions',
-    )
-    parser.add_argument(
-        '-c',
-        '--create-omopolymeric-file',
-        default=False,
-        help='Path to write omopolymeric positions to',
-        action='store_true',
+        help='BED file of omopolymeric positions.',
     )
     parser.add_argument(
         '-os',
         '--omopolymeric-span',
         type=int,
         default=5,
-        help='The omopolymeric span',
+        help='The omopolymeric span.',
     )
     parser.add_argument(
         '-sf',
         '--splicing-file',
-        help='The file containing the splicing sites positions',
+        help='The file containing splicing site positions.',
     )
     parser.add_argument(
         '-ss',
         '--splicing-span',
         type=int,
         default=4,
-        help='The splicing span',
+        help='The splicing span.',
     )
     parser.add_argument(
         '-mrl',
         '--min-read-length',
         type=int,
         default=30,  # noqa:WPS432
-        help='Reads whose length is below this value will be discarded.',
+        help='Reads with length below -mrl will be discarded.',
     )
     parser.add_argument(
         '-q',
         '--min-read-quality',
         type=int,
         default=20,  # noqa:WPS432
-        help='Reads with mapping quality below this value will be discarded.',
+        help='Reads with mapping quality below -q will be discarded.',
     )
     parser.add_argument(
         '-bq',
         '--min-base-quality',
         type=int,
         default=30,  # noqa:WPS432
-        help='Base quality below this value will not be included in ' +
-        'the analysis.',
+        help='Base quality below -bq will bed discarded.',
     )
     parser.add_argument(
         '-mbp',
         '--min-base-position',
         type=int,
         default=0,
-        help='Bases which reside in a previous position (in the read)' +
-        'will not be included in the analysis.',
+        help='Ignores the first -mbp bases in each read.',
     )
     parser.add_argument(
         '-Mbp',
         '--max-base-position',
         type=int,
         default=0,
-        help='Bases which reside in a further position (in the read)' +
-        'will not be included in the analysis.',
+        help='Ignores the last -Mpb bases in each read.',
     )
     parser.add_argument(
         '-l',
-        '--min-column-length',
+        '--min-read-depth',
         type=int,
         default=1,
-        help='Positions whose columns have length below this value will' +
-        'not be included in the analysis.',
+        help='Only report on positions with at least -l read depth',
     )
     parser.add_argument(
         '-e',
@@ -351,8 +338,7 @@ def parse_options():  # noqa:WPS213
         '--min-edits-per-nucleotide',
         type=int,
         default=0,
-        help='Positions whose columns have bases with less than' +
-        'min-edits-per-base edits will not be included in the analysis.',
+        help='Positions with fewer than -men edits will not be discarded.',
     )
     parser.add_argument(
         '-me',
@@ -360,16 +346,14 @@ def parse_options():  # noqa:WPS213
         type=int,
         default=0,  # noqa:WPS432
         help='The minimum number of editing events (per position). ' +
-        'Positions whose columns have bases with less than ' +
-        '"min-edits-per-base edits" will not be included in the ' +
-        'analysis.',
+        'Positions with fewer than -me edits will be discarded.',
     )
     parser.add_argument(
         '-Men',
         '--max-editing-nucleotides',
         type=int,
-        default=100,  # noqa:WPS432
-        help='The maximum number of editing nucleotides, from 0 to 4 ' +
+        default=4,  # noqa:WPS432
+        help='The maximum number of editing nucleotides, from 0 to 3 ' +
         '(per position). Positions whose columns have more than ' +
         '"max-editing-nucleotides" will not be included in the analysis.',
     )
@@ -378,8 +362,8 @@ def parse_options():  # noqa:WPS213
         '--strand-confidence-threshold',
         type=float,
         default=0.7,  # noqa:WPS432
-        help='Only report the strandedness if at least this proportion of ' +
-        'reads are of a given strand',
+        help='Only report the strandedness if at least -T proportion of ' +
+        'reads are of a given strand.',
     )
     parser.add_argument(
         '-C',
@@ -393,25 +377,25 @@ def parse_options():  # noqa:WPS213
         '-V',
         '--verbose',
         default=False,
-        help='Verbose information in stderr',
+        help='Run in verbose mode.',
         action='store_true',
     )
     parser.add_argument(
         '-N',
         '--dna',
         default=False,
-        help='Run REDItools 2.0 on DNA-Seq data',
+        help='Run REDItools on DNA-Seq data.',
         action='store_true',
     )
     parser.add_argument(
         '-B',
         '--bed_file',
-        help='Path of BED file containing target self.regions',
+        help='Only analyze regions in the provided BED file.',
     )
     parser.add_argument(
         '-t',
         '--threads',
-        help='Number of threads to run',
+        help='Number of threads for parallel processing.',
         type=int,
         default=1,
     )
@@ -419,7 +403,7 @@ def parse_options():  # noqa:WPS213
         '-w',
         '--window',
         help='How many bp should be processed by each thread at a time. ' +
-        'Defaults to full contig.',
+        'Zero uses the full contig.',
         type=int,
         default=0,
     )
@@ -427,18 +411,18 @@ def parse_options():  # noqa:WPS213
         '-k',
         '--exclude_regions',
         nargs='+',
-        help='Path of BED file containing regions to exclude from analysis',
+        help='Skip regions in the provided BED file(s).',
     )
     parser.add_argument(
         '-E',
         '--exclude_reads',
-        help='Path to a text file listing read names to exclude from analysis',
+        help='Text file listing read names to exclude from analysis.',
     )
     parser.add_argument(
         '-d',
         '--debug',
         default=False,
-        help='REDItools is run in DEBUG mode.',
+        help='Run in debug mode.',
         action='store_true',
     )

reditools/file_utils.py CHANGED Viewed

@@ -2,11 +2,8 @@
 import csv
 import os
-from collections import defaultdict
 from gzip import open as gzip_open
-from sortedcontainers import SortedSet
 from reditools.region import Region
@@ -68,54 +65,36 @@ def concat(output, *fnames, clean_up=True, encoding='utf-8'):
             os.remove(fname)
-def load_poly_regions(fname):
-    """
-    Read omopolymeric positions from a file.
-    Parameters:
-        fname (str): File path
-    Returns:
-        (dict): Contigs and regions
-    """
-    poly_regions = defaultdict(set)
-    with read_bed_file(fname) as reader:
-        for row in reader:
-            poly_regions[row[0]] = Region(
-                contig=row[0],
-                start=row[1],
-                stop=row[2],
-            )
-    return poly_regions
-def load_splicing_file(splicing_file, span):
+def load_splicing_file(splicing_file, splicing_span):
     """
     Read splicing positions from a file.
     Parameters:
         splicing_file (str): File path
-        span(int): Width of splice sites
+        splicing_span(int): Width of splice sites
-    Returns:
-        (dict): Contig and positions
+    Yeilds:
+        Splicing file contents as Regions.
     """
-    splice_positions = defaultdict(SortedSet)
     strand_map = {'-': 'D', '+': 'A'}
-    with open_stream(splicing_file, 'r') as stream:
-        for line in stream:
-            fields = line.strip().split()
-            chrom = fields[0]
-            strand = fields[4]
-            splice = fields[3]
-            span = int(fields[1])
-            coe = -1 if strand_map.get(strand, None) == splice else 1
-            new_positions = [1 + span + coe * fctr for fctr in range(span)]
-            splice_positions[chrom] |= new_positions
-        return splice_positions
+    stream = open_stream(splicing_file)
+    reader = csv.reader(
+        filter(lambda row: row[0] != '#', stream),
+        delimiter=' ',
+    )
+    for row in reader:
+        contig = row[0]
+        span = int(row[1])
+        splice = row[3]
+        strand = row[4]
+        coe = -1 if strand_map.get(strand, None) == splice else 1
+        start = 1 + span
+        stop = start + splicing_span * coe
+        if start > stop:
+            start, stop = stop, start
+        yield Region(contig=contig, start=start, stop=stop)
 def load_text_file(file_name):

reditools/homopolymerics.py CHANGED Viewed

@@ -42,7 +42,11 @@ def parse_options():
     Returns:
         namespace
     """
-    parser = argparse.ArgumentParser(description='REDItools 2.0')
+    parser = argparse.ArgumentParser(
+        prog="reditools find-repeats",
+        description='REDItools3',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
     parser.add_argument(
         'file',
         help='The fasta file to be analyzed',
@@ -57,6 +61,7 @@ def parse_options():
     parser.add_argument(
         '-o',
         '--output',
+        default='/dev/stdout',
         help='Destination to write results. Default is to use STDOUT. ' +
         'If the filename ends in .gz, the contents will be gzipped.',
     )

reditools/index.py CHANGED Viewed

@@ -180,7 +180,11 @@ def parse_options():  # noqa:WPS213
     Returns:
         namespace: commandline args
     """
-    parser = argparse.ArgumentParser(description='REDItools 2.0')
+    parser = argparse.ArgumentParser(
+        prog="reditools index",
+        description='REDItools3',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
     parser.add_argument(
         'file',
         nargs='+',
@@ -189,6 +193,7 @@ def parse_options():  # noqa:WPS213
     parser.add_argument(
         '-o',
         '--output-file',
+        default='/dev/stdout',
         help='The output statistics file',
     )
     parser.add_argument(
@@ -239,7 +244,7 @@ def main():
             indexer.add_target_from_bed(trg_fname)
     if options.output_file:
-        stream = open_stream(options.output_fipe, 'w')
+        stream = open_stream(options.output_file, 'w')
     else:
         stream = sys.stdout

reditools/reditools.py CHANGED Viewed

@@ -127,7 +127,6 @@ class REDItools(object):
         self._target_positions = False
         self._exclude_positions = {}
         self._splice_positions = []
         self._specific_edits = None
         self.reference = None
@@ -294,6 +293,20 @@ class REDItools(object):
         """
         return self._exclude_positions
+    @property
+    def max_alts(self):
+        """Maximum number of alternative bases for a position."""
+        return self._max_alts
+    @max_alts.setter
+    def max_alts(self, max_alts):
+        self._max_alts = max_alts
+        function = self._rtqc.check_max_alts
+        if max_alts < 3:
+            self._rtqc.add(function)
+        else:
+            self._rtqc.discard(function)
     def exclude(self, regions):
         """
         Explicitly skip specified genomic regions.

reditools/rtchecks.py CHANGED Viewed

@@ -272,3 +272,26 @@ class RTChecks(object):
                 )
                 return False
         return True
+    def check_max_alts(self, bases, rtools):
+        """
+        Check that there are no more than a max number of alts.
+        Parameters:
+            bases (CompiledPosition): Base position under analysis
+            rtools (REDItools): Object running the analysis
+        Returns:
+            (bool): True if there are n or fewer alts
+        """
+        alts = bases.get_variants()
+        if len(alts) > rtools.max_alts:
+            rtools.log(
+                Logger.debug_level,
+                'DISCARD COLUMN alts={} > {}',
+                len(alts),
+                rtools.max_alts,
+            )
+            return False
+        return True

{REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/LICENSE RENAMED Viewed

File without changes

{REDItools3-3.1a0.dist-info → REDItools3-3.2a0.dist-info}/top_level.txt RENAMED Viewed

File without changes

REDItools3 3.1a0__py3-none-any.whl → 3.2a0__py3-none-any.whl

REDItools3 3.1a0py3-none-any.whl → 3.2a0py3-none-any.whl